The collections module provides specialized container types beyond the built-ins. Here's when and how to use them.
Counter
from collections import Counter
# Count elements
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
counts = Counter(words)
print(counts) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})
# Most common
print(counts.most_common(2)) # [('apple', 3), ('banana', 2)]
# Access count
print(counts["apple"]) # 3
print(counts["missing"]) # 0 (not KeyError)
# Arithmetic
a = Counter(["a", "b", "a"])
b = Counter(["b", "c"])
print(a + b) # Counter({'a': 2, 'b': 2, 'c': 1})
print(a - b) # Counter({'a': 2})
# Update counts
counts.update(["apple", "date"])defaultdict
from collections import defaultdict
# List as default
groups = defaultdict(list)
for item in [("a", 1), ("b", 2), ("a", 3)]:
groups[item[0]].append(item[1])
print(dict(groups)) # {'a': [1, 3], 'b': [2]}
# Int as default (counting)
counts = defaultdict(int)
for char in "hello":
counts[char] += 1
print(dict(counts)) # {'h': 1, 'e': 1, 'l': 2, 'o': 1}
# Set as default
seen = defaultdict(set)
for user, page in [("alice", "/home"), ("alice", "/about"), ("bob", "/home")]:
seen[user].add(page)
# Nested defaultdict
tree = lambda: defaultdict(tree)
taxonomy = tree()
taxonomy["animal"]["mammal"]["dog"] = "woof"deque (Double-Ended Queue)
from collections import deque
# Create deque
d = deque([1, 2, 3])
# Add to both ends (O(1))
d.append(4) # [1, 2, 3, 4]
d.appendleft(0) # [0, 1, 2, 3, 4]
# Remove from both ends (O(1))
d.pop() # 4
d.popleft() # 0
# Rotate
d = deque([1, 2, 3, 4, 5])
d.rotate(2) # [4, 5, 1, 2, 3]
d.rotate(-2) # [1, 2, 3, 4, 5]
# Fixed-size (sliding window)
recent = deque(maxlen=3)
for i in range(5):
recent.append(i)
print(list(recent)) # [2, 3, 4]namedtuple
from collections import namedtuple
# Define a type
Point = namedtuple("Point", ["x", "y"])
p = Point(1, 2)
print(p.x, p.y) # 1 2
print(p[0]) # 1 (also indexable)
# With defaults (Python 3.7+)
Point = namedtuple("Point", ["x", "y", "z"], defaults=[0])
p = Point(1, 2) # z defaults to 0
# Convert to dict
print(p._asdict()) # {'x': 1, 'y': 2, 'z': 0}
# Create from dict
data = {"x": 1, "y": 2, "z": 3}
p = Point(**data)
# Replace values (returns new tuple)
p2 = p._replace(x=10)OrderedDict
from collections import OrderedDict
# Remembers insertion order (dicts do too since 3.7)
od = OrderedDict([("a", 1), ("b", 2), ("c", 3)])
# Move to end
od.move_to_end("a") # a goes to end
od.move_to_end("c", last=False) # c goes to beginning
# Pop from end
od.popitem(last=True) # Removes last
od.popitem(last=False) # Removes first
# Equality considers order
OrderedDict([("a", 1), ("b", 2)]) == OrderedDict([("b", 2), ("a", 1)])
# False (unlike regular dicts)ChainMap
from collections import ChainMap
# Layer multiple dicts
defaults = {"color": "red", "size": "medium"}
user_prefs = {"color": "blue"}
config = ChainMap(user_prefs, defaults)
print(config["color"]) # blue (from user_prefs)
print(config["size"]) # medium (from defaults)
# Writes go to first dict
config["theme"] = "dark"
print(user_prefs) # {'color': 'blue', 'theme': 'dark'}
# Add new layer
admin = {"color": "green"}
config = config.new_child(admin)
print(config["color"]) # greenUserDict, UserList, UserString
from collections import UserDict
# Easier to subclass than dict
class ValidatedDict(UserDict):
def __setitem__(self, key, value):
if not isinstance(key, str):
raise TypeError("Keys must be strings")
super().__setitem__(key, value)
d = ValidatedDict()
d["name"] = "Alice" # OK
# d[123] = "value" # TypeErrorPractical Patterns
from collections import Counter, defaultdict, deque
# Word frequency
def word_frequency(text):
words = text.lower().split()
return Counter(words).most_common(10)
# Group by attribute
def group_by(items, key):
groups = defaultdict(list)
for item in items:
groups[key(item)].append(item)
return dict(groups)
# LRU cache (simplified)
class LRUCache:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
return None
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)
# Sliding window average
def moving_average(values, window_size):
window = deque(maxlen=window_size)
for value in values:
window.append(value)
if len(window) == window_size:
yield sum(window) / window_size
# Invert a dict
def invert_dict(d):
result = defaultdict(list)
for key, value in d.items():
result[value].append(key)
return dict(result)Counter Recipes
from collections import Counter
# Subtract counts
inventory = Counter(apples=5, oranges=3)
sold = Counter(apples=2, oranges=1)
inventory.subtract(sold)
print(inventory) # Counter({'apples': 3, 'oranges': 2})
# Total count
print(sum(inventory.values())) # 5
# Elements (returns iterator)
print(list(inventory.elements())) # ['apples', 'apples', 'apples', 'oranges', 'oranges']
# Intersection (minimum counts)
a = Counter(["a", "a", "b"])
b = Counter(["a", "b", "b"])
print(a & b) # Counter({'a': 1, 'b': 1})
# Union (maximum counts)
print(a | b) # Counter({'a': 2, 'b': 2})deque Recipes
from collections import deque
# BFS with deque
def bfs(graph, start):
visited = set()
queue = deque([start])
while queue:
node = queue.popleft()
if node not in visited:
visited.add(node)
queue.extend(graph.get(node, []))
return visited
# Tail of file (last N lines)
def tail(filename, n=10):
with open(filename) as f:
return deque(f, maxlen=n)
# Round-robin scheduler
def round_robin(tasks):
queue = deque(tasks)
while queue:
task = queue.popleft()
if not task.is_done():
task.run_slice()
queue.append(task)When to Use What
| Type | Use When |
|---|---|
Counter | Counting occurrences |
defaultdict | Grouping, aggregating |
deque | Queue/stack, sliding window |
namedtuple | Simple data classes (immutable) |
OrderedDict | Need to reorder, pop from ends |
ChainMap | Layered configs, scope chains |
collections provides the right tool for common patterns. Use them instead of reinventing with plain dicts and lists.
React to this post: