The urllib.parse module handles URL parsing and manipulation. It's essential for working with web URLs—splitting them apart, building them up, and encoding them properly.

Parse a URL

Break a URL into components:

from urllib.parse import urlparse
 
url = 'https://user:pass@example.com:8080/path/to/page?query=1&sort=name#section'
result = urlparse(url)
 
print(result.scheme)    # https
print(result.netloc)    # user:pass@example.com:8080
print(result.hostname)  # example.com
print(result.port)      # 8080
print(result.path)      # /path/to/page
print(result.query)     # query=1&sort=name
print(result.fragment)  # section
print(result.username)  # user
print(result.password)  # pass

Build a URL

Assemble components into a URL:

from urllib.parse import urlunparse
 
components = (
    'https',           # scheme
    'example.com',     # netloc
    '/search',         # path
    '',                # params
    'q=python',        # query
    'results'          # fragment
)
 
url = urlunparse(components)
print(url)  # https://example.com/search?q=python#results

Parse Query String

Extract query parameters:

from urllib.parse import parse_qs, parse_qsl
 
query = 'name=Alice&age=30&hobby=coding&hobby=gaming'
 
# As dict (values are lists)
params = parse_qs(query)
print(params)
# {'name': ['Alice'], 'age': ['30'], 'hobby': ['coding', 'gaming']}
 
# As list of tuples
pairs = parse_qsl(query)
print(pairs)
# [('name', 'Alice'), ('age', '30'), ('hobby', 'coding'), ('hobby', 'gaming')]

Build Query String

Create query string from dict:

from urllib.parse import urlencode
 
params = {
    'search': 'python tutorial',
    'page': 1,
    'sort': 'date'
}
 
query = urlencode(params)
print(query)  # search=python+tutorial&page=1&sort=date
 
# Multiple values for same key
params = [('tag', 'python'), ('tag', 'web'), ('tag', 'api')]
query = urlencode(params)
print(query)  # tag=python&tag=web&tag=api

URL Encoding

Encode special characters:

from urllib.parse import quote, quote_plus
 
text = 'hello world & more'
 
# Standard encoding (spaces become %20)
print(quote(text))  # hello%20world%20%26%20more
 
# Plus encoding (spaces become +, for query strings)
print(quote_plus(text))  # hello+world+%26+more
 
# Safe characters aren't encoded
print(quote('/path/to/file'))  # /path/to/file
print(quote('/path/to/file', safe=''))  # %2Fpath%2Fto%2Ffile

URL Decoding

from urllib.parse import unquote, unquote_plus
 
encoded = 'hello%20world%20%26%20more'
print(unquote(encoded))  # hello world & more
 
plus_encoded = 'hello+world+%26+more'
print(unquote_plus(plus_encoded))  # hello world & more

Join URLs

Combine base URL with relative path:

from urllib.parse import urljoin
 
base = 'https://example.com/docs/guide/'
 
# Relative paths
print(urljoin(base, 'chapter1.html'))
# https://example.com/docs/guide/chapter1.html
 
print(urljoin(base, '../api/'))
# https://example.com/docs/api/
 
# Absolute path starts from root
print(urljoin(base, '/about'))
# https://example.com/about
 
# Full URL replaces everything
print(urljoin(base, 'https://other.com'))
# https://other.com

Modify Query Parameters

Add or update query params:

from urllib.parse import urlparse, parse_qs, urlencode, urlunparse
 
def add_query_params(url, params):
    """Add query parameters to a URL."""
    parsed = urlparse(url)
    existing = parse_qs(parsed.query)
    existing.update(params)
    
    # Convert lists to single values if only one item
    query = urlencode({k: v[0] if len(v) == 1 else v 
                       for k, v in existing.items()}, doseq=True)
    
    return urlunparse((
        parsed.scheme,
        parsed.netloc,
        parsed.path,
        parsed.params,
        query,
        parsed.fragment
    ))
 
url = 'https://example.com/search?q=python'
new_url = add_query_params(url, {'page': ['2'], 'sort': ['date']})
print(new_url)
# https://example.com/search?q=python&page=2&sort=date

Split and Unsplit

More control over URL components:

from urllib.parse import urlsplit, urlunsplit
 
url = 'https://example.com/path?query=1#fragment'
result = urlsplit(url)
 
print(result)
# SplitResult(scheme='https', netloc='example.com', 
#             path='/path', query='query=1', fragment='fragment')
 
# Rebuild
new_url = urlunsplit(result)

Validate URLs

Basic URL validation:

from urllib.parse import urlparse
 
def is_valid_url(url):
    """Check if URL has required components."""
    try:
        result = urlparse(url)
        return all([result.scheme, result.netloc])
    except:
        return False
 
print(is_valid_url('https://example.com'))  # True
print(is_valid_url('not-a-url'))  # False
print(is_valid_url('//example.com'))  # False (no scheme)

Extract Domain

from urllib.parse import urlparse
 
def get_domain(url):
    """Extract domain from URL."""
    parsed = urlparse(url)
    return parsed.hostname
 
print(get_domain('https://www.example.com/page'))  # www.example.com
print(get_domain('https://sub.example.com:8080/'))  # sub.example.com

Safe URL Building

Build URLs without injection risks:

from urllib.parse import urlencode, quote
 
def build_search_url(base, query):
    """Build search URL safely."""
    params = urlencode({'q': query})
    return f'{base}?{params}'
 
# User input is safely encoded
user_input = '"><script>alert(1)</script>'
url = build_search_url('https://example.com/search', user_input)
print(url)
# https://example.com/search?q=%22%3E%3Cscript%3Ealert%281%29%3C%2Fscript%3E

Handle IDN (International Domain Names)

from urllib.parse import urlparse
 
# Unicode domains work
url = 'https://münchen.example/path'
parsed = urlparse(url)
print(parsed.hostname)  # münchen.example

Common Patterns

from urllib.parse import urlparse, urljoin, urlencode
 
# API endpoint builder
class APIClient:
    def __init__(self, base_url):
        self.base_url = base_url
    
    def build_url(self, path, params=None):
        url = urljoin(self.base_url, path)
        if params:
            url += '?' + urlencode(params)
        return url
 
api = APIClient('https://api.example.com/v1/')
print(api.build_url('users', {'limit': 10}))
# https://api.example.com/v1/users?limit=10

When to Use urllib.parse

Use urllib.parse when:

  • Parsing URLs from user input
  • Building URLs programmatically
  • Encoding query parameters
  • Manipulating URL components

For HTTP requests, use requests or httpx—but use urllib.parse for URL manipulation before making requests.

React to this post: