###################################################################### # Copyright (c) Adrien Luxey-Bitri, Boris Baldassari # # This program and the accompanying materials are made # available under the terms of the Eclipse Public License 2.0 # which is available at https://www.eclipse.org/legal/epl-2.0/ # # SPDX-License-Identifier: EPL-2.0 ###################################################################### """ A package for learning network programming in Python. This module (file) manages the parsing of HTTP requests. """ def parse_request(buf: bytes) -> dict[str, dict]: """Parses a full HTTP request bytes buffer into a dict. The parsed request dict contains two keys: - head: dict[str, str] Information on the HTTP request header (i.e. the first request line); output of `parse_request_head`. - params: dict[str, str] List of the HTTP parameters (i.e. the following lines); output of `parse_request_params`. An example of return: ``` { 'head': { 'verb': 'GET', 'resource': '//index.html'}, 'params': { 'Host': 'localhost:8000', 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 'Accept-Language': 'en-GB,en;q=0.5', 'Accept-Encoding': 'gzip, deflate, br', [SNIP] } } ``` Parameters ---------- - buf: bytes The HTTP request buffer. Returns ------- dict[str, dict] The parsed content of the HTTP request. Raises ------ ValueError The request is not valid HTTP. """ if buf == b'': raise ValueError("Received empty request") lines = buf.decode('utf-8').strip().splitlines() req_head = parse_request_head(lines[0]) req_params = dict() if len(lines) > 1: req_params = parse_request_params(lines[1:]) return dict( head=req_head, params=req_params ) def parse_request_head(line: str) -> dict[str, str]: """Parses a HTTP request header string (its first line) into a dict. The parsed request dict contains two keys: - verb: str The _uppercase_ verb of the request, i.e. the first word of the line; for example: "GET". - resource: str The requested resource, i.e. the second "word" of the line; for example: "/index.html". Parameters ---------- - line: str The HTTP request header (the first line of a full HTTP request). Returns ------- dict[str, str] The parsed content of the HTTP request header. Raises ------ ValueError The request header is not valid HTTP. """ fields = line.split(' ') if len(fields) != 3: raise ValueError(f"Request header is invalid: {line}") return dict( verb=fields[0].upper(), resource=fields[1] ) def parse_request_params(lines: list[str]) -> dict[str, str]: """Parses HTTP request parameters (a list of lines) into a dict. The parsed request dict contains one key/value pair per line, with the dict key being the left part of the line (the parameter key), and the dict value being the right part of the line (the parameter value). The function strips leading and trailing spaces: " Host: a.org " becomes `{"Host": "a.org"}`. Parameters ---------- - lines: list[str] HTTP parameters (one list item per line) Returns ------- dict[str, str] Dictionary of the parameters Raises ------ ValueError The provided lines are not valid HTTP. """ params = dict() for l in lines: kv = l.strip().split(': ') if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0: raise ValueError(f"Request line is not a valid key/value pair: {l}") params[kv[0]] = kv[1] return params