The hashlib module provides cryptographic hash functions. Use it to create fingerprints of data, verify file integrity, and implement password hashing (with proper key derivation).
Basic Hashing
import hashlib
# SHA-256 (recommended for most uses)
h = hashlib.sha256(b'Hello, World!')
print(h.hexdigest()) # dffd6021bb2bd5b0af676290809ec3...
# Same result
h = hashlib.sha256()
h.update(b'Hello, World!')
print(h.hexdigest())Common Algorithms
import hashlib
data = b'test data'
# Modern, secure algorithms
print(hashlib.sha256(data).hexdigest()) # 64 chars
print(hashlib.sha384(data).hexdigest()) # 96 chars
print(hashlib.sha512(data).hexdigest()) # 128 chars
print(hashlib.sha3_256(data).hexdigest()) # 64 chars
# Legacy (avoid for security)
print(hashlib.sha1(data).hexdigest()) # 40 chars
print(hashlib.md5(data).hexdigest()) # 32 charsFile Hashing
import hashlib
def hash_file(filepath, algorithm='sha256'):
"""Compute hash of a file."""
h = hashlib.new(algorithm)
with open(filepath, 'rb') as f:
while chunk := f.read(8192):
h.update(chunk)
return h.hexdigest()
# Usage
checksum = hash_file('document.pdf')
print(f"SHA-256: {checksum}")Verify File Integrity
import hashlib
def verify_checksum(filepath, expected_hash, algorithm='sha256'):
"""Verify file matches expected hash."""
actual = hash_file(filepath, algorithm)
return actual.lower() == expected_hash.lower()
# Verify downloaded file
if verify_checksum('download.zip', 'a1b2c3d4...'):
print("File integrity verified")
else:
print("File corrupted or tampered!")Available Algorithms
import hashlib
# Guaranteed available
print(hashlib.algorithms_guaranteed)
# {'sha256', 'sha384', 'sha512', 'sha224', 'sha1', 'md5', ...}
# Available on this system
print(hashlib.algorithms_available)
# Includes OpenSSL algorithmsIncremental Updates
import hashlib
# Hash large data incrementally
h = hashlib.sha256()
h.update(b'first part ')
h.update(b'second part ')
h.update(b'third part')
final = h.hexdigest()
# Same as hashing all at once
combined = hashlib.sha256(b'first part second part third part').hexdigest()
assert final == combinedBinary vs Hex Digest
import hashlib
h = hashlib.sha256(b'data')
# Raw bytes (32 bytes for SHA-256)
binary = h.digest()
print(len(binary)) # 32
# Hex string (64 characters)
hex_str = h.hexdigest()
print(len(hex_str)) # 64Copy Hash State
import hashlib
# Share common prefix
h1 = hashlib.sha256(b'common-prefix-')
# Branch into different hashes
h2 = h1.copy()
h3 = h1.copy()
h2.update(b'suffix-a')
h3.update(b'suffix-b')
print(h2.hexdigest()) # Different
print(h3.hexdigest()) # DifferentPassword Hashing (PBKDF2)
Never store plain hashes of passwords—use key derivation:
import hashlib
import secrets
def hash_password(password):
"""Hash password with PBKDF2."""
salt = secrets.token_bytes(16)
key = hashlib.pbkdf2_hmac(
'sha256',
password.encode(),
salt,
100_000 # iterations
)
return salt + key # Store both
def verify_password(password, stored):
"""Verify password against stored hash."""
salt = stored[:16]
stored_key = stored[16:]
key = hashlib.pbkdf2_hmac(
'sha256',
password.encode(),
salt,
100_000
)
return secrets.compare_digest(key, stored_key)Scrypt (Memory-Hard)
import hashlib
import secrets
def hash_password_scrypt(password):
"""Hash password with scrypt (memory-hard)."""
salt = secrets.token_bytes(16)
key = hashlib.scrypt(
password.encode(),
salt=salt,
n=2**14, # CPU/memory cost
r=8, # Block size
p=1 # Parallelization
)
return salt + key
def verify_password_scrypt(password, stored):
"""Verify scrypt password."""
salt = stored[:16]
stored_key = stored[16:]
key = hashlib.scrypt(
password.encode(),
salt=salt,
n=2**14,
r=8,
p=1
)
return secrets.compare_digest(key, stored_key)Content-Addressed Storage
import hashlib
import os
class ContentStore:
def __init__(self, path):
self.path = path
os.makedirs(path, exist_ok=True)
def put(self, data):
"""Store data, return hash as key."""
h = hashlib.sha256(data).hexdigest()
filepath = os.path.join(self.path, h)
if not os.path.exists(filepath):
with open(filepath, 'wb') as f:
f.write(data)
return h
def get(self, key):
"""Retrieve data by hash."""
filepath = os.path.join(self.path, key)
with open(filepath, 'rb') as f:
return f.read()
store = ContentStore('./store')
key = store.put(b'my data')
data = store.get(key)BLAKE2 (Fast and Secure)
import hashlib
# BLAKE2b (optimized for 64-bit)
h = hashlib.blake2b(b'data')
print(h.hexdigest())
# BLAKE2s (optimized for 32-bit, smaller)
h = hashlib.blake2s(b'data')
print(h.hexdigest())
# Custom digest size
h = hashlib.blake2b(b'data', digest_size=32)
print(len(h.digest())) # 32 bytesKeyed Hashing with BLAKE2
import hashlib
# BLAKE2 supports built-in keying (like HMAC)
key = b'secret-key'
h = hashlib.blake2b(b'message', key=key)
print(h.hexdigest())SHA-3 (Keccak)
import hashlib
data = b'test'
# SHA-3 variants
print(hashlib.sha3_224(data).hexdigest())
print(hashlib.sha3_256(data).hexdigest())
print(hashlib.sha3_384(data).hexdigest())
print(hashlib.sha3_512(data).hexdigest())
# SHAKE (variable output)
print(hashlib.shake_128(data).hexdigest(32)) # 32 bytes
print(hashlib.shake_256(data).hexdigest(64)) # 64 bytesAlgorithm Recommendations
| Use Case | Algorithm |
|---|---|
| General integrity | SHA-256 |
| File checksums | SHA-256 |
| High security | SHA-512, SHA-3 |
| Performance critical | BLAKE2b |
| Password hashing | scrypt, PBKDF2 |
| Legacy compatibility | MD5, SHA-1 (not secure) |
Digest Sizes
import hashlib
algorithms = ['md5', 'sha1', 'sha256', 'sha384', 'sha512', 'blake2b', 'blake2s']
for algo in algorithms:
h = hashlib.new(algo)
print(f"{algo}: {h.digest_size} bytes, {h.digest_size * 2} hex chars")Hashlib is essential for any application dealing with data integrity or security.
React to this post: