The statistics module provides functions for basic statistical calculations. No external dependencies needed—just import and calculate.

Central Tendency

Mean (Average)

import statistics
 
data = [1, 2, 3, 4, 5]
print(statistics.mean(data))  # 3

Median (Middle Value)

import statistics
 
data = [1, 3, 5, 7, 9]
print(statistics.median(data))  # 5
 
# Even number of values: average of middle two
data = [1, 2, 3, 4]
print(statistics.median(data))  # 2.5

Mode (Most Common)

import statistics
 
data = [1, 2, 2, 3, 3, 3, 4]
print(statistics.mode(data))  # 3
 
# Works with strings too
colors = ['red', 'blue', 'red', 'green', 'red']
print(statistics.mode(colors))  # 'red'

Multimode (All Modes)

import statistics
 
data = [1, 1, 2, 2, 3]
print(statistics.multimode(data))  # [1, 2]

Spread

Variance

import statistics
 
data = [2, 4, 4, 4, 5, 5, 7, 9]
 
# Sample variance (n-1 denominator)
print(statistics.variance(data))  # 4.571...
 
# Population variance (n denominator)
print(statistics.pvariance(data))  # 4.0

Standard Deviation

import statistics
 
data = [2, 4, 4, 4, 5, 5, 7, 9]
 
# Sample standard deviation
print(statistics.stdev(data))  # 2.138...
 
# Population standard deviation
print(statistics.pstdev(data))  # 2.0

When to Use Sample vs Population

  • Sample (variance, stdev): When data is a sample from a larger population
  • Population (pvariance, pstdev): When data represents the entire population

Most real-world scenarios use sample functions.

Quantiles

import statistics
 
data = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
 
# Quartiles
q1 = statistics.quantiles(data, n=4)[0]
q2 = statistics.quantiles(data, n=4)[1]  # median
q3 = statistics.quantiles(data, n=4)[2]
 
print(f"Q1: {q1}, Q2: {q2}, Q3: {q3}")
# Q1: 2.75, Q2: 5.5, Q3: 8.25
 
# Percentiles
percentiles = statistics.quantiles(data, n=100)
print(f"90th percentile: {percentiles[89]}")

Median Variants

import statistics
 
data = [1, 3, 5, 7]
 
# Standard median (average of middle two)
print(statistics.median(data))  # 4.0
 
# Low median (lower of middle two)
print(statistics.median_low(data))  # 3
 
# High median (higher of middle two)
print(statistics.median_high(data))  # 5

Harmonic and Geometric Means

import statistics
 
data = [1, 2, 4, 8]
 
# Harmonic mean (good for rates)
print(statistics.harmonic_mean(data))  # 2.133...
 
# Geometric mean (good for growth rates)
print(statistics.geometric_mean(data))  # 2.828...

Correlation

import statistics
 
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
 
# Pearson correlation coefficient
r = statistics.correlation(x, y)
print(f"Correlation: {r:.3f}")  # 0.774
 
# Linear regression
slope, intercept = statistics.linear_regression(x, y)
print(f"y = {slope:.2f}x + {intercept:.2f}")

Covariance

import statistics
 
x = [1, 2, 3, 4, 5]
y = [2, 4, 5, 4, 5]
 
print(statistics.covariance(x, y))  # 1.5

Practical Examples

Grade Analysis

import statistics
 
grades = [85, 90, 78, 92, 88, 76, 95, 89]
 
print(f"Average: {statistics.mean(grades):.1f}")
print(f"Median: {statistics.median(grades)}")
print(f"Std Dev: {statistics.stdev(grades):.1f}")
 
# Grade distribution
quartiles = statistics.quantiles(grades, n=4)
print(f"25%: {quartiles[0]}, 50%: {quartiles[1]}, 75%: {quartiles[2]}")

Response Time Analysis

import statistics
 
response_times = [120, 145, 132, 118, 155, 128, 142, 135]
 
mean = statistics.mean(response_times)
median = statistics.median(response_times)
stdev = statistics.stdev(response_times)
 
print(f"Mean: {mean:.0f}ms")
print(f"Median: {median}ms")
print(f"Std Dev: {stdev:.0f}ms")
 
# Identify outliers (beyond 2 std devs)
outliers = [t for t in response_times if abs(t - mean) > 2 * stdev]
print(f"Outliers: {outliers}")

A/B Test Analysis

import statistics
 
control = [2.1, 2.5, 2.3, 2.8, 2.2]
treatment = [2.8, 3.1, 2.9, 3.2, 3.0]
 
control_mean = statistics.mean(control)
treatment_mean = statistics.mean(treatment)
improvement = (treatment_mean - control_mean) / control_mean * 100
 
print(f"Control mean: {control_mean:.2f}")
print(f"Treatment mean: {treatment_mean:.2f}")
print(f"Improvement: {improvement:.1f}%")

NormalDist

Work with normal distributions:

from statistics import NormalDist
 
# Create distribution
dist = NormalDist(mu=100, sigma=15)  # IQ distribution
 
# Probability density
print(dist.pdf(100))  # Peak at mean
 
# Cumulative probability
print(dist.cdf(115))  # ~84% below 115
 
# Inverse CDF (quantile)
print(dist.inv_cdf(0.5))  # 100 (median)
 
# Sample generation
samples = dist.samples(10)

statistics vs numpy

Use statistics when:

  • Simple calculations on small datasets
  • No external dependencies allowed
  • Standard library only environment

Use numpy when:

  • Large datasets (much faster)
  • Advanced statistical operations
  • Already using numpy/pandas ecosystem
# statistics: readable, standard library
import statistics
mean = statistics.mean(data)
 
# numpy: faster for large arrays
import numpy as np
mean = np.mean(data)

Empty Data Handling

Statistics functions raise StatisticsError on empty data:

import statistics
 
try:
    statistics.mean([])
except statistics.StatisticsError:
    print("No data!")

The statistics module covers 90% of basic statistical needs with zero dependencies. For more advanced analysis, graduate to numpy/scipy, but for quick calculations, it's perfect.

React to this post: