Dataclasses eliminate boilerplate for data-holding classes. Here's how to use them beyond the basics.
Basic Refresher
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
p = Point(1.0, 2.0)
print(p) # Point(x=1.0, y=2.0)You get __init__, __repr__, __eq__ for free.
Field Options
from dataclasses import dataclass, field
from typing import List
@dataclass
class Config:
name: str
debug: bool = False # Simple default
tags: List[str] = field(default_factory=list) # Mutable default
_cache: dict = field(default_factory=dict, repr=False) # Hidden from repr
id: int = field(init=False) # Not in __init__
def __post_init__(self):
self.id = hash(self.name)Field parameters
| Parameter | Effect |
|---|---|
default | Default value |
default_factory | Callable for mutable defaults |
repr | Include in __repr__ |
compare | Include in comparisons |
hash | Include in __hash__ |
init | Include in __init__ |
kw_only | Keyword-only argument (3.10+) |
post_init
Runs after __init__. Use for validation, derived fields, or transformations.
from dataclasses import dataclass, field
@dataclass
class Rectangle:
width: float
height: float
area: float = field(init=False)
def __post_init__(self):
if self.width <= 0 or self.height <= 0:
raise ValueError("Dimensions must be positive")
self.area = self.width * self.height
r = Rectangle(3, 4)
print(r.area) # 12InitVar (init-only variables)
Pass data to __post_init__ without storing it.
from dataclasses import dataclass, field, InitVar
@dataclass
class User:
name: str
password: InitVar[str] # Not stored
password_hash: str = field(init=False)
def __post_init__(self, password: str):
import hashlib
self.password_hash = hashlib.sha256(password.encode()).hexdigest()
u = User("alice", "secret123")
print(u.password_hash) # hash value
# u.password # AttributeError - not storedImmutability with frozen
from dataclasses import dataclass
@dataclass(frozen=True)
class Point:
x: float
y: float
p = Point(1, 2)
# p.x = 3 # FrozenInstanceErrorFrozen dataclasses are hashable by default (can be dict keys, set members).
Slots (Python 3.10+)
Save memory and get faster attribute access.
from dataclasses import dataclass
@dataclass(slots=True)
class Point:
x: float
y: float
# Uses __slots__ instead of __dict__
# ~20-30% memory savings for many instancesKeyword-Only Fields (Python 3.10+)
from dataclasses import dataclass, field
@dataclass
class Config:
name: str
# Everything after kw_only=True is keyword-only
debug: bool = field(default=False, kw_only=True)
verbose: bool = field(default=False, kw_only=True)
# Config("app", True, False) # TypeError
Config("app", debug=True) # OKOr use KW_ONLY sentinel:
from dataclasses import dataclass, KW_ONLY
@dataclass
class Config:
name: str
_: KW_ONLY
debug: bool = False
verbose: bool = FalseInheritance
from dataclasses import dataclass
@dataclass
class Animal:
name: str
age: int
@dataclass
class Dog(Animal):
breed: str
d = Dog("Rex", 5, "German Shepherd")
print(d) # Dog(name='Rex', age=5, breed='German Shepherd')Default values and inheritance
Fields without defaults can't follow fields with defaults:
@dataclass
class Base:
x: int = 0
@dataclass
class Child(Base):
y: int # Error! Non-default follows default
# Fix: give y a default, or use kw_only
@dataclass
class Child(Base):
y: int = field(kw_only=True) # OKComparison and Ordering
from dataclasses import dataclass
@dataclass(order=True)
class Version:
major: int
minor: int
patch: int
v1 = Version(1, 2, 3)
v2 = Version(1, 2, 4)
print(v1 < v2) # True
versions = [Version(2, 0, 0), Version(1, 0, 0), Version(1, 1, 0)]
print(sorted(versions)) # Sorted by (major, minor, patch)Custom sort key
from dataclasses import dataclass, field
@dataclass(order=True)
class Task:
sort_key: tuple = field(init=False, repr=False)
priority: int
name: str
def __post_init__(self):
self.sort_key = (self.priority, self.name)asdict and astuple
from dataclasses import dataclass, asdict, astuple
@dataclass
class Point:
x: float
y: float
p = Point(1, 2)
print(asdict(p)) # {'x': 1, 'y': 2}
print(astuple(p)) # (1, 2)
# Useful for JSON serialization
import json
json.dumps(asdict(p))replace (copy with changes)
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class Config:
host: str
port: int
debug: bool = False
prod = Config("prod.example.com", 443)
dev = replace(prod, host="localhost", port=8080, debug=True)Factory Patterns
from dataclasses import dataclass, field
from typing import ClassVar
import uuid
@dataclass
class Entity:
id: str = field(default_factory=lambda: str(uuid.uuid4()))
_counter: ClassVar[int] = 0 # Class variable, not instance
@classmethod
def create(cls, **kwargs):
cls._counter += 1
return cls(**kwargs)Validation Pattern
from dataclasses import dataclass
@dataclass
class Email:
address: str
def __post_init__(self):
if "@" not in self.address:
raise ValueError(f"Invalid email: {self.address}")
self.address = self.address.lower().strip()
# Or use __setattr__ for frozen classes
@dataclass(frozen=True)
class PositiveInt:
value: int
def __post_init__(self):
if self.value <= 0:
raise ValueError("Must be positive")Combining with Properties
from dataclasses import dataclass, field
@dataclass
class Circle:
radius: float
_area: float = field(init=False, repr=False)
def __post_init__(self):
self._area = 3.14159 * self.radius ** 2
@property
def area(self) -> float:
return self._area
@property
def diameter(self) -> float:
return self.radius * 2When to Use Dataclasses
Good for:
- Configuration objects
- DTOs (data transfer objects)
- Value objects
- Simple domain models
Consider alternatives when:
- Need complex validation → Pydantic
- Need ORM features → SQLAlchemy models
- Need serialization flexibility → attrs
- Very performance-critical → named tuples or plain classes
Dataclasses hit the sweet spot between plain dicts and full-featured classes. Use them when you need structured data with minimal ceremony.