src.myserver.http_request

A package for learning network programming in Python.

This module (file) manages the parsing of HTTP requests.

  1######################################################################
  2# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
  3#
  4# This program and the accompanying materials are made
  5# available under the terms of the Eclipse Public License 2.0
  6# which is available at https://www.eclipse.org/legal/epl-2.0/
  7#
  8# SPDX-License-Identifier: EPL-2.0
  9######################################################################
 10
 11"""
 12A package for learning network programming in Python.
 13
 14This module (file) manages the parsing of HTTP requests.
 15"""
 16
 17# TODO: Gestion de la casse des paramètres ? 
 18
 19def parse_request(buf: bytes) -> dict[str, dict]:
 20    """Parses a full HTTP request bytes buffer into a dict. 
 21
 22    The parsed request dict contains two keys:
 23    - head: dict[str, str]
 24        Information on the HTTP request header (i.e. the first request line);
 25        output of `parse_request_head`.
 26    - params: dict[str, str]
 27        List of the HTTP parameters (i.e. the following lines); 
 28        output of `parse_request_params`.
 29        
 30    An example of return:
 31    ```
 32    {
 33        'head': { 'verb': 'GET', 'resource': '//index.html'}, 
 34        'params': {
 35            'Host': 'localhost:8000', 
 36            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', 
 37            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 
 38            'Accept-Language': 'en-GB,en;q=0.5', 
 39            'Accept-Encoding': 'gzip, deflate, br',
 40            [SNIP]
 41        }
 42    }
 43    ```
 44
 45    Parameters
 46    ----------
 47    - buf: bytes 
 48        The HTTP request buffer.
 49
 50    Returns
 51    -------
 52    dict[str, dict]
 53        The parsed content of the HTTP request.
 54
 55    Raises
 56    ------
 57    ValueError
 58        The request is not valid HTTP.
 59    """
 60    if buf == b'':
 61        raise ValueError("Received empty request")
 62    lines = buf.decode('utf-8').strip().splitlines()
 63
 64    req_head = parse_request_head(lines[0])
 65    req_params = dict()
 66    if len(lines) > 1:
 67        req_params = parse_request_params(lines[1:])
 68
 69    return dict(
 70        head=req_head,
 71        params=req_params
 72    )
 73
 74def parse_request_head(line: str) -> dict[str, str]:
 75    """Parses a HTTP request header string (its first line) into a dict.
 76
 77    The parsed request dict contains two keys:
 78    - verb: str
 79        The _uppercase_ verb of the request, i.e. the first word of the line;
 80        for example: "GET".
 81    - resource: str
 82        The requested resource, i.e. the second "word" of the line;
 83        for example: "/index.html".
 84        
 85    Parameters
 86    ----------
 87    - line: str
 88        The HTTP request header (the first line of a full HTTP request).
 89
 90    Returns
 91    -------
 92    dict[str, str]
 93        The parsed content of the HTTP request header.
 94            
 95    Raises
 96    ------
 97    ValueError
 98        The request header is not valid HTTP.
 99    """
100    fields = line.split(' ')
101    if len(fields) != 3:
102        raise ValueError(f"Request header is invalid: {line}")
103
104    return dict(
105        verb=fields[0].upper(),
106        resource=fields[1]
107    )
108
109def parse_request_params(lines: list[str]) -> dict[str, str]:
110    """Parses HTTP request parameters (a list of lines) into a dict.
111
112    The parsed request dict contains one key/value pair per line, with the 
113    dict key being the left part of the line (the parameter key), and the 
114    dict value being the right part of the line (the parameter value).
115
116    The function strips leading and trailing spaces: " Host: a.org  " becomes
117    `{"Host": "a.org"}`.
118        
119    Parameters
120    ----------
121    - lines: list[str]
122        HTTP parameters (one list item per line)
123
124    Returns
125    -------
126    dict[str, str]
127        Dictionary of the parameters
128            
129    Raises
130    ------
131    ValueError
132        The provided lines are not valid HTTP.
133    """
134    params = dict()
135    for l in lines:
136        kv = l.strip().split(': ')
137        
138        if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0:
139            raise ValueError(f"Request line is not a valid key/value pair: {l}")
140
141        params[kv[0]] = kv[1]
142    return params
def parse_request(buf: bytes) -> dict[str, dict]:
20def parse_request(buf: bytes) -> dict[str, dict]:
21    """Parses a full HTTP request bytes buffer into a dict. 
22
23    The parsed request dict contains two keys:
24    - head: dict[str, str]
25        Information on the HTTP request header (i.e. the first request line);
26        output of `parse_request_head`.
27    - params: dict[str, str]
28        List of the HTTP parameters (i.e. the following lines); 
29        output of `parse_request_params`.
30        
31    An example of return:
32    ```
33    {
34        'head': { 'verb': 'GET', 'resource': '//index.html'}, 
35        'params': {
36            'Host': 'localhost:8000', 
37            'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', 
38            'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 
39            'Accept-Language': 'en-GB,en;q=0.5', 
40            'Accept-Encoding': 'gzip, deflate, br',
41            [SNIP]
42        }
43    }
44    ```
45
46    Parameters
47    ----------
48    - buf: bytes 
49        The HTTP request buffer.
50
51    Returns
52    -------
53    dict[str, dict]
54        The parsed content of the HTTP request.
55
56    Raises
57    ------
58    ValueError
59        The request is not valid HTTP.
60    """
61    if buf == b'':
62        raise ValueError("Received empty request")
63    lines = buf.decode('utf-8').strip().splitlines()
64
65    req_head = parse_request_head(lines[0])
66    req_params = dict()
67    if len(lines) > 1:
68        req_params = parse_request_params(lines[1:])
69
70    return dict(
71        head=req_head,
72        params=req_params
73    )

Parses a full HTTP request bytes buffer into a dict.

The parsed request dict contains two keys:

  • head: dict[str, str] Information on the HTTP request header (i.e. the first request line); output of parse_request_head.
  • params: dict[str, str] List of the HTTP parameters (i.e. the following lines); output of parse_request_params.

An example of return:

{
    'head': { 'verb': 'GET', 'resource': '//index.html'}, 
    'params': {
        'Host': 'localhost:8000', 
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0', 
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8', 
        'Accept-Language': 'en-GB,en;q=0.5', 
        'Accept-Encoding': 'gzip, deflate, br',
        [SNIP]
    }
}

Parameters

  • buf: bytes The HTTP request buffer.

Returns

dict[str, dict] The parsed content of the HTTP request.

Raises

ValueError The request is not valid HTTP.

def parse_request_head(line: str) -> dict[str, str]:
 75def parse_request_head(line: str) -> dict[str, str]:
 76    """Parses a HTTP request header string (its first line) into a dict.
 77
 78    The parsed request dict contains two keys:
 79    - verb: str
 80        The _uppercase_ verb of the request, i.e. the first word of the line;
 81        for example: "GET".
 82    - resource: str
 83        The requested resource, i.e. the second "word" of the line;
 84        for example: "/index.html".
 85        
 86    Parameters
 87    ----------
 88    - line: str
 89        The HTTP request header (the first line of a full HTTP request).
 90
 91    Returns
 92    -------
 93    dict[str, str]
 94        The parsed content of the HTTP request header.
 95            
 96    Raises
 97    ------
 98    ValueError
 99        The request header is not valid HTTP.
100    """
101    fields = line.split(' ')
102    if len(fields) != 3:
103        raise ValueError(f"Request header is invalid: {line}")
104
105    return dict(
106        verb=fields[0].upper(),
107        resource=fields[1]
108    )

Parses a HTTP request header string (its first line) into a dict.

The parsed request dict contains two keys:

  • verb: str The _uppercase_ verb of the request, i.e. the first word of the line; for example: "GET".
  • resource: str The requested resource, i.e. the second "word" of the line; for example: "/index.html".

Parameters

  • line: str The HTTP request header (the first line of a full HTTP request).

Returns

dict[str, str] The parsed content of the HTTP request header.

Raises

ValueError The request header is not valid HTTP.

def parse_request_params(lines: list[str]) -> dict[str, str]:
110def parse_request_params(lines: list[str]) -> dict[str, str]:
111    """Parses HTTP request parameters (a list of lines) into a dict.
112
113    The parsed request dict contains one key/value pair per line, with the 
114    dict key being the left part of the line (the parameter key), and the 
115    dict value being the right part of the line (the parameter value).
116
117    The function strips leading and trailing spaces: " Host: a.org  " becomes
118    `{"Host": "a.org"}`.
119        
120    Parameters
121    ----------
122    - lines: list[str]
123        HTTP parameters (one list item per line)
124
125    Returns
126    -------
127    dict[str, str]
128        Dictionary of the parameters
129            
130    Raises
131    ------
132    ValueError
133        The provided lines are not valid HTTP.
134    """
135    params = dict()
136    for l in lines:
137        kv = l.strip().split(': ')
138        
139        if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0:
140            raise ValueError(f"Request line is not a valid key/value pair: {l}")
141
142        params[kv[0]] = kv[1]
143    return params

Parses HTTP request parameters (a list of lines) into a dict.

The parsed request dict contains one key/value pair per line, with the dict key being the left part of the line (the parameter key), and the dict value being the right part of the line (the parameter value).

The function strips leading and trailing spaces: " Host: a.org " becomes {"Host": "a.org"}.

Parameters

  • lines: list[str] HTTP parameters (one list item per line)

Returns

dict[str, str] Dictionary of the parameters

Raises

ValueError The provided lines are not valid HTTP.