The urllib.parse module handles URL parsing and manipulation. It's essential for working with web URLs—splitting them apart, building them up, and encoding them properly.
Parse a URL
Break a URL into components:
from urllib.parse import urlparse
url = 'https://user:pass@example.com:8080/path/to/page?query=1&sort=name#section'
result = urlparse(url)
print(result.scheme) # https
print(result.netloc) # user:pass@example.com:8080
print(result.hostname) # example.com
print(result.port) # 8080
print(result.path) # /path/to/page
print(result.query) # query=1&sort=name
print(result.fragment) # section
print(result.username) # user
print(result.password) # passBuild a URL
Assemble components into a URL:
from urllib.parse import urlunparse
components = (
'https', # scheme
'example.com', # netloc
'/search', # path
'', # params
'q=python', # query
'results' # fragment
)
url = urlunparse(components)
print(url) # https://example.com/search?q=python#resultsParse Query String
Extract query parameters:
from urllib.parse import parse_qs, parse_qsl
query = 'name=Alice&age=30&hobby=coding&hobby=gaming'
# As dict (values are lists)
params = parse_qs(query)
print(params)
# {'name': ['Alice'], 'age': ['30'], 'hobby': ['coding', 'gaming']}
# As list of tuples
pairs = parse_qsl(query)
print(pairs)
# [('name', 'Alice'), ('age', '30'), ('hobby', 'coding'), ('hobby', 'gaming')]Build Query String
Create query string from dict:
from urllib.parse import urlencode
params = {
'search': 'python tutorial',
'page': 1,
'sort': 'date'
}
query = urlencode(params)
print(query) # search=python+tutorial&page=1&sort=date
# Multiple values for same key
params = [('tag', 'python'), ('tag', 'web'), ('tag', 'api')]
query = urlencode(params)
print(query) # tag=python&tag=web&tag=apiURL Encoding
Encode special characters:
from urllib.parse import quote, quote_plus
text = 'hello world & more'
# Standard encoding (spaces become %20)
print(quote(text)) # hello%20world%20%26%20more
# Plus encoding (spaces become +, for query strings)
print(quote_plus(text)) # hello+world+%26+more
# Safe characters aren't encoded
print(quote('/path/to/file')) # /path/to/file
print(quote('/path/to/file', safe='')) # %2Fpath%2Fto%2FfileURL Decoding
from urllib.parse import unquote, unquote_plus
encoded = 'hello%20world%20%26%20more'
print(unquote(encoded)) # hello world & more
plus_encoded = 'hello+world+%26+more'
print(unquote_plus(plus_encoded)) # hello world & moreJoin URLs
Combine base URL with relative path:
from urllib.parse import urljoin
base = 'https://example.com/docs/guide/'
# Relative paths
print(urljoin(base, 'chapter1.html'))
# https://example.com/docs/guide/chapter1.html
print(urljoin(base, '../api/'))
# https://example.com/docs/api/
# Absolute path starts from root
print(urljoin(base, '/about'))
# https://example.com/about
# Full URL replaces everything
print(urljoin(base, 'https://other.com'))
# https://other.comModify Query Parameters
Add or update query params:
from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
def add_query_params(url, params):
"""Add query parameters to a URL."""
parsed = urlparse(url)
existing = parse_qs(parsed.query)
existing.update(params)
# Convert lists to single values if only one item
query = urlencode({k: v[0] if len(v) == 1 else v
for k, v in existing.items()}, doseq=True)
return urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
query,
parsed.fragment
))
url = 'https://example.com/search?q=python'
new_url = add_query_params(url, {'page': ['2'], 'sort': ['date']})
print(new_url)
# https://example.com/search?q=python&page=2&sort=dateSplit and Unsplit
More control over URL components:
from urllib.parse import urlsplit, urlunsplit
url = 'https://example.com/path?query=1#fragment'
result = urlsplit(url)
print(result)
# SplitResult(scheme='https', netloc='example.com',
# path='/path', query='query=1', fragment='fragment')
# Rebuild
new_url = urlunsplit(result)Validate URLs
Basic URL validation:
from urllib.parse import urlparse
def is_valid_url(url):
"""Check if URL has required components."""
try:
result = urlparse(url)
return all([result.scheme, result.netloc])
except:
return False
print(is_valid_url('https://example.com')) # True
print(is_valid_url('not-a-url')) # False
print(is_valid_url('//example.com')) # False (no scheme)Extract Domain
from urllib.parse import urlparse
def get_domain(url):
"""Extract domain from URL."""
parsed = urlparse(url)
return parsed.hostname
print(get_domain('https://www.example.com/page')) # www.example.com
print(get_domain('https://sub.example.com:8080/')) # sub.example.comSafe URL Building
Build URLs without injection risks:
from urllib.parse import urlencode, quote
def build_search_url(base, query):
"""Build search URL safely."""
params = urlencode({'q': query})
return f'{base}?{params}'
# User input is safely encoded
user_input = '"><script>alert(1)</script>'
url = build_search_url('https://example.com/search', user_input)
print(url)
# https://example.com/search?q=%22%3E%3Cscript%3Ealert%281%29%3C%2Fscript%3EHandle IDN (International Domain Names)
from urllib.parse import urlparse
# Unicode domains work
url = 'https://münchen.example/path'
parsed = urlparse(url)
print(parsed.hostname) # münchen.exampleCommon Patterns
from urllib.parse import urlparse, urljoin, urlencode
# API endpoint builder
class APIClient:
def __init__(self, base_url):
self.base_url = base_url
def build_url(self, path, params=None):
url = urljoin(self.base_url, path)
if params:
url += '?' + urlencode(params)
return url
api = APIClient('https://api.example.com/v1/')
print(api.build_url('users', {'limit': 10}))
# https://api.example.com/v1/users?limit=10When to Use urllib.parse
Use urllib.parse when:
- Parsing URLs from user input
- Building URLs programmatically
- Encoding query parameters
- Manipulating URL components
For HTTP requests, use requests or httpx—but use urllib.parse for URL manipulation before making requests.
React to this post: