ue_pe_web/scripts/03_server/src_students_10/myserver/http_request.py
2024-03-21 21:43:49 +01:00

145 lines
4 KiB
Python

######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages the parsing of HTTP requests.
"""
def parse_request(buf: bytes) -> dict[str, dict]:
"""Parses a full HTTP request bytes buffer into a dict.
The parsed request dict contains two keys:
- head: dict[str, str]
Information on the HTTP request header (i.e. the first request line);
output of `parse_request_head`.
- params: dict[str, str]
List of the HTTP parameters (i.e. the following lines);
output of `parse_request_params`.
An example of return:
```
{
'head': { 'verb': 'GET', 'resource': '//index.html'},
'params': {
'Host': 'localhost:8000',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
[SNIP]
}
}
```
Parameters
----------
- buf: bytes
The HTTP request buffer.
Returns
-------
dict[str, dict]
The parsed content of the HTTP request.
Raises
------
ValueError
The request is not valid HTTP.
"""
if buf == b'':
raise ValueError("Received empty request")
lines = buf.decode('utf-8').strip().splitlines()
req_head = parse_request_head(lines[0])
req_params = dict()
if len(lines) > 1:
req_params = parse_request_params(lines[1:])
return dict(
head=req_head,
params=req_params
)
def parse_request_head(line: str) -> dict[str, str]:
"""Parses a HTTP request header string (its first line) into a dict.
The parsed request dict contains two keys:
- verb: str
The _uppercase_ verb of the request, i.e. the first word of the line;
for example: "GET".
- resource: str
The requested resource, i.e. the second "word" of the line;
for example: "/index.html".
Parameters
----------
- line: str
The HTTP request header (the first line of a full HTTP request).
Returns
-------
dict[str, str]
The parsed content of the HTTP request header.
Raises
------
ValueError
The request header is not valid HTTP.
"""
fields = line.split(' ')
if len(fields) != 3:
raise ValueError(f"Request header is invalid: {line}")
return dict(
verb=fields[0].upper(),
resource=fields[1]
)
def parse_request_params(lines: list[str]) -> dict[str, str]:
"""Parses HTTP request parameters (a list of lines) into a dict.
The parsed request dict contains one key/value pair per line, with the
dict key being the left part of the line (the parameter key), and the
dict value being the right part of the line (the parameter value).
The function strips leading and trailing spaces: " Host: a.org " becomes
`{"Host": "a.org"}`.
Parameters
----------
- lines: list[str]
HTTP parameters (one list item per line)
Returns
-------
dict[str, str]
Dictionary of the parameters
Raises
------
ValueError
The provided lines are not valid HTTP.
"""
params = dict()
for l in lines:
kv = l.strip().split(': ')
if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0:
raise ValueError(f"Request line is not a valid key/value pair: {l}")
params[kv[0]] = kv[1]
return params