145 lines
4 KiB
Python
145 lines
4 KiB
Python
######################################################################
|
|
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
|
|
#
|
|
# This program and the accompanying materials are made
|
|
# available under the terms of the Eclipse Public License 2.0
|
|
# which is available at https://www.eclipse.org/legal/epl-2.0/
|
|
#
|
|
# SPDX-License-Identifier: EPL-2.0
|
|
######################################################################
|
|
|
|
"""
|
|
A package for learning network programming in Python.
|
|
|
|
This module (file) manages the parsing of HTTP requests.
|
|
"""
|
|
|
|
|
|
def parse_request(buf: bytes) -> dict[str, dict]:
|
|
"""Parses a full HTTP request bytes buffer into a dict.
|
|
|
|
The parsed request dict contains two keys:
|
|
- head: dict[str, str]
|
|
Information on the HTTP request header (i.e. the first request line);
|
|
output of `parse_request_head`.
|
|
- params: dict[str, str]
|
|
List of the HTTP parameters (i.e. the following lines);
|
|
output of `parse_request_params`.
|
|
|
|
An example of return:
|
|
```
|
|
{
|
|
'head': { 'verb': 'GET', 'resource': '//index.html'},
|
|
'params': {
|
|
'Host': 'localhost:8000',
|
|
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0',
|
|
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
|
|
'Accept-Language': 'en-GB,en;q=0.5',
|
|
'Accept-Encoding': 'gzip, deflate, br',
|
|
[SNIP]
|
|
}
|
|
}
|
|
```
|
|
|
|
Parameters
|
|
----------
|
|
- buf: bytes
|
|
The HTTP request buffer.
|
|
|
|
Returns
|
|
-------
|
|
dict[str, dict]
|
|
The parsed content of the HTTP request.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
The request is not valid HTTP.
|
|
"""
|
|
if buf == b'':
|
|
raise ValueError("Received empty request")
|
|
lines = buf.decode('utf-8').strip().splitlines()
|
|
|
|
req_head = parse_request_head(lines[0])
|
|
req_params = dict()
|
|
if len(lines) > 1:
|
|
req_params = parse_request_params(lines[1:])
|
|
|
|
return dict(
|
|
head=req_head,
|
|
params=req_params
|
|
)
|
|
|
|
def parse_request_head(line: str) -> dict[str, str]:
|
|
"""Parses a HTTP request header string (its first line) into a dict.
|
|
|
|
The parsed request dict contains two keys:
|
|
- verb: str
|
|
The _uppercase_ verb of the request, i.e. the first word of the line;
|
|
for example: "GET".
|
|
- resource: str
|
|
The requested resource, i.e. the second "word" of the line;
|
|
for example: "/index.html".
|
|
|
|
Parameters
|
|
----------
|
|
- line: str
|
|
The HTTP request header (the first line of a full HTTP request).
|
|
|
|
Returns
|
|
-------
|
|
dict[str, str]
|
|
The parsed content of the HTTP request header.
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
The request header is not valid HTTP.
|
|
"""
|
|
fields = line.split(' ')
|
|
if len(fields) != 3:
|
|
raise ValueError(f"Request header is invalid: {line}")
|
|
|
|
return dict(
|
|
verb=fields[0].upper(),
|
|
resource=fields[1]
|
|
)
|
|
|
|
def parse_request_params(lines: list[str]) -> dict[str, str]:
|
|
"""Parses HTTP request parameters (a list of lines) into a dict.
|
|
|
|
The parsed request dict contains one key/value pair per line, with the
|
|
dict key being the left part of the line (the parameter key), and the
|
|
dict value being the right part of the line (the parameter value).
|
|
|
|
The function strips leading and trailing spaces: " Host: a.org " becomes
|
|
`{"Host": "a.org"}`.
|
|
|
|
Parameters
|
|
----------
|
|
- lines: list[str]
|
|
HTTP parameters (one list item per line)
|
|
|
|
Returns
|
|
-------
|
|
dict[str, str]
|
|
Dictionary of the parameters
|
|
|
|
Raises
|
|
------
|
|
ValueError
|
|
The provided lines are not valid HTTP.
|
|
"""
|
|
params = dict()
|
|
for l in lines:
|
|
kv = l.strip().split(': ')
|
|
|
|
if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0:
|
|
raise ValueError(f"Request line is not a valid key/value pair: {l}")
|
|
|
|
params[kv[0]] = kv[1]
|
|
|
|
return params
|
|
|
|
|