The hashlib module provides cryptographic hash functions. Use it to create fingerprints of data, verify file integrity, and implement password hashing (with proper key derivation).

Basic Hashing

import hashlib
 
# SHA-256 (recommended for most uses)
h = hashlib.sha256(b'Hello, World!')
print(h.hexdigest())  # dffd6021bb2bd5b0af676290809ec3...
 
# Same result
h = hashlib.sha256()
h.update(b'Hello, World!')
print(h.hexdigest())

Common Algorithms

import hashlib
 
data = b'test data'
 
# Modern, secure algorithms
print(hashlib.sha256(data).hexdigest())   # 64 chars
print(hashlib.sha384(data).hexdigest())   # 96 chars
print(hashlib.sha512(data).hexdigest())   # 128 chars
print(hashlib.sha3_256(data).hexdigest()) # 64 chars
 
# Legacy (avoid for security)
print(hashlib.sha1(data).hexdigest())     # 40 chars
print(hashlib.md5(data).hexdigest())      # 32 chars

File Hashing

import hashlib
 
def hash_file(filepath, algorithm='sha256'):
    """Compute hash of a file."""
    h = hashlib.new(algorithm)
    
    with open(filepath, 'rb') as f:
        while chunk := f.read(8192):
            h.update(chunk)
    
    return h.hexdigest()
 
# Usage
checksum = hash_file('document.pdf')
print(f"SHA-256: {checksum}")

Verify File Integrity

import hashlib
 
def verify_checksum(filepath, expected_hash, algorithm='sha256'):
    """Verify file matches expected hash."""
    actual = hash_file(filepath, algorithm)
    return actual.lower() == expected_hash.lower()
 
# Verify downloaded file
if verify_checksum('download.zip', 'a1b2c3d4...'):
    print("File integrity verified")
else:
    print("File corrupted or tampered!")

Available Algorithms

import hashlib
 
# Guaranteed available
print(hashlib.algorithms_guaranteed)
# {'sha256', 'sha384', 'sha512', 'sha224', 'sha1', 'md5', ...}
 
# Available on this system
print(hashlib.algorithms_available)
# Includes OpenSSL algorithms

Incremental Updates

import hashlib
 
# Hash large data incrementally
h = hashlib.sha256()
h.update(b'first part ')
h.update(b'second part ')
h.update(b'third part')
 
final = h.hexdigest()
 
# Same as hashing all at once
combined = hashlib.sha256(b'first part second part third part').hexdigest()
assert final == combined

Binary vs Hex Digest

import hashlib
 
h = hashlib.sha256(b'data')
 
# Raw bytes (32 bytes for SHA-256)
binary = h.digest()
print(len(binary))  # 32
 
# Hex string (64 characters)
hex_str = h.hexdigest()
print(len(hex_str))  # 64

Copy Hash State

import hashlib
 
# Share common prefix
h1 = hashlib.sha256(b'common-prefix-')
 
# Branch into different hashes
h2 = h1.copy()
h3 = h1.copy()
 
h2.update(b'suffix-a')
h3.update(b'suffix-b')
 
print(h2.hexdigest())  # Different
print(h3.hexdigest())  # Different

Password Hashing (PBKDF2)

Never store plain hashes of passwords—use key derivation:

import hashlib
import secrets
 
def hash_password(password):
    """Hash password with PBKDF2."""
    salt = secrets.token_bytes(16)
    key = hashlib.pbkdf2_hmac(
        'sha256',
        password.encode(),
        salt,
        100_000  # iterations
    )
    return salt + key  # Store both
 
def verify_password(password, stored):
    """Verify password against stored hash."""
    salt = stored[:16]
    stored_key = stored[16:]
    key = hashlib.pbkdf2_hmac(
        'sha256',
        password.encode(),
        salt,
        100_000
    )
    return secrets.compare_digest(key, stored_key)

Scrypt (Memory-Hard)

import hashlib
import secrets
 
def hash_password_scrypt(password):
    """Hash password with scrypt (memory-hard)."""
    salt = secrets.token_bytes(16)
    key = hashlib.scrypt(
        password.encode(),
        salt=salt,
        n=2**14,  # CPU/memory cost
        r=8,       # Block size
        p=1        # Parallelization
    )
    return salt + key
 
def verify_password_scrypt(password, stored):
    """Verify scrypt password."""
    salt = stored[:16]
    stored_key = stored[16:]
    key = hashlib.scrypt(
        password.encode(),
        salt=salt,
        n=2**14,
        r=8,
        p=1
    )
    return secrets.compare_digest(key, stored_key)

Content-Addressed Storage

import hashlib
import os
 
class ContentStore:
    def __init__(self, path):
        self.path = path
        os.makedirs(path, exist_ok=True)
    
    def put(self, data):
        """Store data, return hash as key."""
        h = hashlib.sha256(data).hexdigest()
        filepath = os.path.join(self.path, h)
        
        if not os.path.exists(filepath):
            with open(filepath, 'wb') as f:
                f.write(data)
        
        return h
    
    def get(self, key):
        """Retrieve data by hash."""
        filepath = os.path.join(self.path, key)
        with open(filepath, 'rb') as f:
            return f.read()
 
store = ContentStore('./store')
key = store.put(b'my data')
data = store.get(key)

BLAKE2 (Fast and Secure)

import hashlib
 
# BLAKE2b (optimized for 64-bit)
h = hashlib.blake2b(b'data')
print(h.hexdigest())
 
# BLAKE2s (optimized for 32-bit, smaller)
h = hashlib.blake2s(b'data')
print(h.hexdigest())
 
# Custom digest size
h = hashlib.blake2b(b'data', digest_size=32)
print(len(h.digest()))  # 32 bytes

Keyed Hashing with BLAKE2

import hashlib
 
# BLAKE2 supports built-in keying (like HMAC)
key = b'secret-key'
h = hashlib.blake2b(b'message', key=key)
print(h.hexdigest())

SHA-3 (Keccak)

import hashlib
 
data = b'test'
 
# SHA-3 variants
print(hashlib.sha3_224(data).hexdigest())
print(hashlib.sha3_256(data).hexdigest())
print(hashlib.sha3_384(data).hexdigest())
print(hashlib.sha3_512(data).hexdigest())
 
# SHAKE (variable output)
print(hashlib.shake_128(data).hexdigest(32))  # 32 bytes
print(hashlib.shake_256(data).hexdigest(64))  # 64 bytes

Algorithm Recommendations

Use CaseAlgorithm
General integritySHA-256
File checksumsSHA-256
High securitySHA-512, SHA-3
Performance criticalBLAKE2b
Password hashingscrypt, PBKDF2
Legacy compatibilityMD5, SHA-1 (not secure)

Digest Sizes

import hashlib
 
algorithms = ['md5', 'sha1', 'sha256', 'sha384', 'sha512', 'blake2b', 'blake2s']
 
for algo in algorithms:
    h = hashlib.new(algo)
    print(f"{algo}: {h.digest_size} bytes, {h.digest_size * 2} hex chars")

Hashlib is essential for any application dealing with data integrity or security.

React to this post: