The statistics module provides functions for basic statistical calculations. No external dependencies needed—just import and calculate.
Central Tendency
Mean (Average)
import statistics
data = [1, 2, 3, 4, 5]
print(statistics.mean(data)) # 3Median (Middle Value)
import statistics
data = [1, 3, 5, 7, 9]
print(statistics.median(data)) # 5
# Even number of values: average of middle two
data = [1, 2, 3, 4]
print(statistics.median(data)) # 2.5Mode (Most Common)
import statistics
data = [1, 2, 2, 3, 3, 3, 4]
print(statistics.mode(data)) # 3
# Works with strings too
colors = ['red', 'blue', 'red', 'green', 'red']
print(statistics.mode(colors)) # 'red'Multimode (All Modes)
import statistics
data = [1, 1, 2, 2, 3]
print(statistics.multimode(data)) # [1, 2]Spread
Variance
import statistics
data = [2, 4, 4, 4, 5, 5, 7, 9]
# Sample variance (n-1 denominator)
print(statistics.variance(data)) # 4.571...
# Population variance (n denominator)
print(statistics.pvariance(data)) # 4.0Standard Deviation
import statistics
data = [2, 4, 4, 4, 5, 5, 7, 9]
# Sample standard deviation
print(statistics.stdev(data)) # 2.138...
# Population standard deviation
print(statistics.pstdev(data)) # 2.0When to Use Sample vs Population
- Sample (
variance,stdev): When data is a sample from a larger population - Population (
pvariance,pstdev): When data represents the entire population
Most real-world scenarios use sample functions.
Quantiles
import statistics
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
# Quartiles
q1 = statistics.quantiles(data, n=4)[0]
q2 = statistics.quantiles(data, n=4)[1] # median
q3 = statistics.quantiles(data, n=4)[2]
print(f"Q1: {q1}, Q2: {q2}, Q3: {q3}")
# Q1: 2.75, Q2: 5.5, Q3: 8.25
# Percentiles
percentiles = statistics.quantiles(data, n=100)
print(f"90th percentile: {percentiles[89]}")Median Variants
import statistics
data = [1, 3, 5, 7]
# Standard median (average of middle two)
print(statistics.median(data)) # 4.0
# Low median (lower of middle two)
print(statistics.median_low(data)) # 3
# High median (higher of middle two)
print(statistics.median_high(data)) # 5Harmonic and Geometric Means
import statistics
data = [1, 2, 4, 8]
# Harmonic mean (good for rates)
print(statistics.harmonic_mean(data)) # 2.133...
# Geometric mean (good for growth rates)
print(statistics.geometric_mean(data)) # 2.828...Correlation
import statistics
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
# Pearson correlation coefficient
r = statistics.correlation(x, y)
print(f"Correlation: {r:.3f}") # 0.774
# Linear regression
slope, intercept = statistics.linear_regression(x, y)
print(f"y = {slope:.2f}x + {intercept:.2f}")Covariance
import statistics
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
print(statistics.covariance(x, y)) # 1.5Practical Examples
Grade Analysis
import statistics
grades = [85, 90, 78, 92, 88, 76, 95, 89]
print(f"Average: {statistics.mean(grades):.1f}")
print(f"Median: {statistics.median(grades)}")
print(f"Std Dev: {statistics.stdev(grades):.1f}")
# Grade distribution
quartiles = statistics.quantiles(grades, n=4)
print(f"25%: {quartiles[0]}, 50%: {quartiles[1]}, 75%: {quartiles[2]}")Response Time Analysis
import statistics
response_times = [120, 145, 132, 118, 155, 128, 142, 135]
mean = statistics.mean(response_times)
median = statistics.median(response_times)
stdev = statistics.stdev(response_times)
print(f"Mean: {mean:.0f}ms")
print(f"Median: {median}ms")
print(f"Std Dev: {stdev:.0f}ms")
# Identify outliers (beyond 2 std devs)
outliers = [t for t in response_times if abs(t - mean) > 2 * stdev]
print(f"Outliers: {outliers}")A/B Test Analysis
import statistics
control = [2.1, 2.5, 2.3, 2.8, 2.2]
treatment = [2.8, 3.1, 2.9, 3.2, 3.0]
control_mean = statistics.mean(control)
treatment_mean = statistics.mean(treatment)
improvement = (treatment_mean - control_mean) / control_mean * 100
print(f"Control mean: {control_mean:.2f}")
print(f"Treatment mean: {treatment_mean:.2f}")
print(f"Improvement: {improvement:.1f}%")NormalDist
Work with normal distributions:
from statistics import NormalDist
# Create distribution
dist = NormalDist(mu=100, sigma=15) # IQ distribution
# Probability density
print(dist.pdf(100)) # Peak at mean
# Cumulative probability
print(dist.cdf(115)) # ~84% below 115
# Inverse CDF (quantile)
print(dist.inv_cdf(0.5)) # 100 (median)
# Sample generation
samples = dist.samples(10)statistics vs numpy
Use statistics when:
- Simple calculations on small datasets
- No external dependencies allowed
- Standard library only environment
Use numpy when:
- Large datasets (much faster)
- Advanced statistical operations
- Already using numpy/pandas ecosystem
# statistics: readable, standard library
import statistics
mean = statistics.mean(data)
# numpy: faster for large arrays
import numpy as np
mean = np.mean(data)Empty Data Handling
Statistics functions raise StatisticsError on empty data:
import statistics
try:
statistics.mean([])
except statistics.StatisticsError:
print("No data!")The statistics module covers 90% of basic statistical needs with zero dependencies. For more advanced analysis, graduate to numpy/scipy, but for quick calculations, it's perfect.
React to this post: