Update steps 9 & 10.

This commit is contained in:
Boris Baldassari 2024-03-21 21:43:49 +01:00
parent 9bcfc7d783
commit 43035e610a
10 changed files with 798 additions and 21 deletions

View File

@ -0,0 +1,17 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) defines the directory as a Python module.
"""

View File

@ -0,0 +1,23 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) is the entrypoint of the package.
"""
# Plus d'infos sur __main__.py:
# https://docs.python.org/3/library/__main__.html
from myserver.cli import main
main()

View File

@ -0,0 +1,53 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages the command line interface to control the server.
"""
import argparse
import sys
from myserver.server import serve
def parse_args(argv: list[str]) -> argparse.Namespace:
"""
Parses arguments from command line.
Parameters
----------
- argv: list[str]
The list of arguments to parse.
Returns
-------
argparse.Namespace
The list of parameters and their values.
"""
parser = argparse.ArgumentParser(
prog="myserver",
description="My HTTP web server")
parser.add_argument('-p', '--port',
help='TCP port number to listen to',
default=8080, type=int,
required=True)
parser.add_argument('-r', '--root',
help='Root directory of the server',
type=str, required=True)
args = parser.parse_args(argv)
return args
def main():
args = parse_args(sys.argv[1:])
serve(int(args.port), args.root)

View File

@ -0,0 +1,23 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages date related-utilities.
"""
from time import gmtime, strftime
_RFC2616_DATE_FORMAT = '%a, %d %b %Y %H:%M:%S GMT'
def now_rfc2616():
return strftime(_RFC2616_DATE_FORMAT, gmtime())

View File

@ -0,0 +1,89 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages operations relative to the file system.
"""
import os.path as path
def resolve_location(res:str, root: str):
"""Returns the path of a resource relative to the root and its extension.
Returns ("", "") if the concatenated path does not exist.
"index.html" is appended to directory paths.
Parameters
----------
res: str
The queried resource path.
root: str
The root directory where to look into for res.
Returns
-------
str
The full disk path of the resource if it exists, or "".
str
The extension of the resource if it exists, or "".
"""
pass
def resolve_path(res:str, root: str):
"""Returns the full disk path of a resource relative to the root.
Beware that resources in a request start with a leading '/'.
If the request points to a directory, then append "index.html" to
the path.
Returns "" if the concatenated path does not exist.
Parameters
----------
res: str
The queried resource path.
root: str
The root directory where to look into for res.
Returns
-------
str
The full disk path of the resource if it exists, or "".
"""
pass
def get_resource(res_path: str):
"""Returns a resource at res_path, its content type and an HTTP code.
The HTTP status code is always 200, as we have already checked the file is present.
Parameters
----------
- res_path: str
Requested resource string.
Returns
-------
bytes
The resource content if it exists (code == 200).
int
A HTTP status code.
"""
pass

View File

@ -0,0 +1,149 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) provides information relative to the HTTP specification.
"""
def get_http_code(code: int):
"""Returns a dict corresponding to the HTTP status code.
See also : https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
Parameters
----------
- code: int
An HTTP code.
Returns
-------
dict
Information about the HTTP code, containing fileds:
- header: str
The code string to put in an HTTP reply header.
- html: str
The HTML to reply as HTTP content.
"""
if code == 200:
return {
"header": "200 OK",
"html": ""
}
elif code == 403:
return {
"header": "403 Forbidden",
"html": """<html>
<body>
<h1>Erreur 403 : Interdit</h1>
<p>Une porte fermée se tient devant vous ; et vous n'avez pas la clé.</p>
</body>
</html>
"""
}
elif code == 404:
return {
"header": "404 Not Found",
"html": """<html>
<body>
<h1>Erreur 404</h1>
<p>Vous avez traversé les limites du Web. que vous soyez, ce n'est sur aucune carte.</p>
</body>
</html>
"""
}
elif code == 501:
return {
"header": "501 Not implemented",
"html": """<html>
<body>
<h1>Erreur 501 : Non implémenté</h1>
<p>Ce que vous demandez est acceptable, mais on ne fait pas ça chez nous.</p>
</body>
</html>
"""
}
else: # 500
return {
"header": "500 Internal Server Error",
"html": """<html>
<body>
<h1>Erreur 500 : InTERNal SRveR ER0ooOR</h1>
<p>Erreur serveur inconnue.</p>
</body>
</html>
"""
}
# From: https://source.chromium.org/chromium/chromium/src/+/main:net/base/mime_util.cc;l=147
# The Chromium authors, 2012, BSD Licence
file_extension_to_content_type = {
"webm": "video/webm",
"mp3": "audio/mpeg",
"wasm": "application/wasm",
"crx": "application/x-chrome-extension",
"xhtml": "application/xhtml+xml",
"xht": "application/xhtml+xml",
"xhtm": "application/xhtml+xml",
"flac": "audio/flac",
"ogg": "audio/ogg",
"oga": "audio/ogg",
"opus": "audio/ogg",
"wav": "audio/wav",
"m4a": "audio/x-m4a",
"avif": "image/avif",
"gif": "image/gif",
"jpeg": "image/jpeg",
"jpg": "image/jpeg",
"png": "image/png",
"apng": "image/apng",
"svg": "image/svg+xml",
"svgz": "image/svg+xml",
"webp": "image/webp",
"mht": "multipart/related",
"mhtml": "multipart/related",
"css": "text/css",
"html": "text/html",
"htm": "text/html",
"shtml": "text/html",
"shtm": "text/html",
"js": "text/javascript",
"mjs": "text/javascript",
"xml": "text/xml",
"mp4": "video/mp4",
"m4v": "video/mp4",
"ogv": "video/ogg",
"ogm": "video/ogg",
"csv": "text/csv",
"ico": "image/vnd.microsoft.icon"
}
def get_http_content_type(extension: str):
"""Returns the HTTP Content-Type corresponding to a file extension.
Returns "application/octet-stream" when the extension is unknown.
Parameters
----------
- extension: str
A file extension.
Returns
-------
str
An HTTP Content-Type
"""
if file_extension_to_content_type.get(extension) is None:
return "application/octet-stream"
return file_extension_to_content_type[extension]

View File

@ -0,0 +1,144 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages the parsing of HTTP requests.
"""
def parse_request(buf: bytes) -> dict[str, dict]:
"""Parses a full HTTP request bytes buffer into a dict.
The parsed request dict contains two keys:
- head: dict[str, str]
Information on the HTTP request header (i.e. the first request line);
output of `parse_request_head`.
- params: dict[str, str]
List of the HTTP parameters (i.e. the following lines);
output of `parse_request_params`.
An example of return:
```
{
'head': { 'verb': 'GET', 'resource': '//index.html'},
'params': {
'Host': 'localhost:8000',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/115.0',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8',
'Accept-Language': 'en-GB,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
[SNIP]
}
}
```
Parameters
----------
- buf: bytes
The HTTP request buffer.
Returns
-------
dict[str, dict]
The parsed content of the HTTP request.
Raises
------
ValueError
The request is not valid HTTP.
"""
if buf == b'':
raise ValueError("Received empty request")
lines = buf.decode('utf-8').strip().splitlines()
req_head = parse_request_head(lines[0])
req_params = dict()
if len(lines) > 1:
req_params = parse_request_params(lines[1:])
return dict(
head=req_head,
params=req_params
)
def parse_request_head(line: str) -> dict[str, str]:
"""Parses a HTTP request header string (its first line) into a dict.
The parsed request dict contains two keys:
- verb: str
The _uppercase_ verb of the request, i.e. the first word of the line;
for example: "GET".
- resource: str
The requested resource, i.e. the second "word" of the line;
for example: "/index.html".
Parameters
----------
- line: str
The HTTP request header (the first line of a full HTTP request).
Returns
-------
dict[str, str]
The parsed content of the HTTP request header.
Raises
------
ValueError
The request header is not valid HTTP.
"""
fields = line.split(' ')
if len(fields) != 3:
raise ValueError(f"Request header is invalid: {line}")
return dict(
verb=fields[0].upper(),
resource=fields[1]
)
def parse_request_params(lines: list[str]) -> dict[str, str]:
"""Parses HTTP request parameters (a list of lines) into a dict.
The parsed request dict contains one key/value pair per line, with the
dict key being the left part of the line (the parameter key), and the
dict value being the right part of the line (the parameter value).
The function strips leading and trailing spaces: " Host: a.org " becomes
`{"Host": "a.org"}`.
Parameters
----------
- lines: list[str]
HTTP parameters (one list item per line)
Returns
-------
dict[str, str]
Dictionary of the parameters
Raises
------
ValueError
The provided lines are not valid HTTP.
"""
params = dict()
for l in lines:
kv = l.strip().split(': ')
if len(kv) != 2 or len(kv[0]) == 0 or len(kv[1]) == 0:
raise ValueError(f"Request line is not a valid key/value pair: {l}")
params[kv[0]] = kv[1]
return params

View File

@ -0,0 +1,97 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This module (file) manages the HTTP logging messages.
"""
from myserver.date import now_rfc2616
def log(msg: str):
"""
Logs a message to stdout, with a timestamp.
Output is: `timestamp - message`.
Parameters
----------
- msg : str
The message string to print.
"""
pass
def log_address(addr: tuple[str, int], msg: str):
"""
Logs a message to stdout, with a timestamp and an address (host:port).
Output is: `timestamp - host:port - message`.
Parameters
----------
- addr: tuple[str, int]
The address to print, as a tuple (host, port)
- msg: str
The message string to print.
"""
pass
def log_request(addr: tuple[str, int], req: dict[str, dict]):
"""
Logs a request message to stdout, with a timestamp and an address (host:port).
If the User-Agent header is passed, its value is appended at the end.
Output is: `timestamp - host:port - verb resource`.
Output with User-Agent is: `timestamp - host:port - verb resource - user_agent`.
Parameters
----------
- addr: tuple[str, int]
The address to print, as a tuple (host, port)
- req: dict[str, dict]
The request to print.
"""
msg = None
log_address(addr, msg)
def log_reply(addr: tuple[str, int], req: dict[str, dict], code: int):
"""
Logs an HTTP reply to stdout, with timestamp, address (host:port), code.
If the User-Agent header is passed, its value is appended at the end.
Output is: `timestamp - host:port - HTTP-verb HTTP-resource - code`.
Output with User-Agent is: `timestamp - host:port - HTTP-verb HTTP-resource - code - user_agent`.
Parameters
----------
- addr: tuple[str, int]
The address to print, as a tuple (host, port)
- req: dict[str, dict]
The request to print.
- code: int
The replied code to print.
"""
msg = None
log_address(addr, msg)

View File

@ -0,0 +1,199 @@
######################################################################
# Copyright (c) Adrien Luxey-Bitri, Boris Baldassari
#
# This program and the accompanying materials are made
# available under the terms of the Eclipse Public License 2.0
# which is available at https://www.eclipse.org/legal/epl-2.0/
#
# SPDX-License-Identifier: EPL-2.0
######################################################################
"""
A package for learning network programming in Python.
This part manages the socket connections and multi-threading of clients.
"""
import socket
from myserver.log import log, log_reply
from myserver.http_request import parse_request
from myserver.file import resolve_location, get_resource
from myserver.date import now_rfc2616
from myserver.http import get_http_code, get_http_content_type
_BUF_SIZE = 4096
_SERVER_ADDR = "0.0.0.0"
def serve(port: int, root: str):
"""
Serves http request connections for clients.
This function creates the network socket, listens, and as soon as it receives
a request (connection), calls the :func:`~myserver.handle_client()`.
"""
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
# Allows reusing a socket right after it got closed (after ctrl-c)
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
# Attach the socket to the port and interface provided.
s.bind((_SERVER_ADDR, port))
## Start listening on the socket.
s.listen()
log(f"Server started at {_SERVER_ADDR}:{port}.")
# Try / catch KeyboardInterrupt to close server socket properly if we hit
# the Control-C sequence to interrupt the server.
try:
while True:
c, addr = s.accept()
# Try / catch KeyboardInterrupt to properly close the client socket if
# we hit the Control-C sequence to interrupt the server.
try:
handle_client(c, addr, root)
# If the KeyboardInterrupt is raised, we pass it to the outer loop
# to also close the server socket.
except KeyboardInterrupt as e:
c.close()
raise e
except KeyboardInterrupt:
log("Received KeyboardInterrupt. Closing...")
s.close()
def handle_client(c: socket.socket, addr: tuple[str, int], root:str):
"""
Manages a single connection from a client.
In details, we:
* read data from the socket provided,
* parse this data to build the request and headers,
* call the prepare_resource() or prepare_reply() function accordingly,
* send the reply back.
* optionally write something in the log,
* close the connection.
Parameters
----------
- c: socket.socket
The socket to communicate with the client.
- addr: tuple[str, int]
The IP address and port of the client, as returned by the accept command.
- root: str
The path to the local directory to serve.
"""
# Read the socket.
buf = c.recv(_BUF_SIZE)
# Parse the request to get the headers - call parse_request().
req = parse_request(buf)
# Prepare our reply.
if req['head']['verb'] == 'GET':
reply, code = prepare_resource(root, req)
else:
# Not implemented: we treat only GET calls for now.
reply, code = prepare_reply(b"", "", 501)
# Send the reply back.
c.send(reply)
# Trace the action in the logs.
# Close the connection.
c.close()
def prepare_resource(root: str, req: dict):
"""
Retrieves the content of the resource and sets the status code.
Parameters
----------
- root: str
The path to the local directory to serve.
- req: dict[str, dict]
The request to proceed.
Returns
-------
tuple
The reply for the request, including the data and status code.
- data: str
The data (header + content) to reply on the socket.
- code: int
The status code for the reply.
"""
code = 200
content = b""
content_type = ""
res_path, res_extension = resolve_location(req['head']['resource'], root)
if res_path == "":
code = 404
else:
content_type = get_http_content_type(res_extension)
content, code = get_resource(res_path)
# Then call prepare_reply to build the final reply.
return prepare_reply(content, content_type, code)
def prepare_reply(content: bytes, content_type: str, code: int):
"""
Generates the proper answer, including the HTTP headers and content of the
webpage, and the status code.
Headers will look like that:
```
HTTP/1.0 200 OK
Content-Type: text/html
Date: Thu, 07 Mar 2024 08:29:45 GMT
Content-Length: 152
Server: RegardeMamanJeFaisUnServeurWeb/0.1
```
For more information about:
* Content type, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/MIME_types
* Status code, see https://developer.mozilla.org/en-US/docs/Web/HTTP/Status
Parameters
----------
- content: bytes
The raw data for the resource.
- content_type: str
The content type for the resource.
- code: int
The status code.
Returns
-------
tuple
The reply for the request, including the data and status code.
- data: str
The data (header + content) to reply on the socket.
- code: int
The status code for the reply.
"""
# Prepare status code
http_code_dict = get_http_code(code)
if code != 200:
content = http_code_dict['html'].encode()
content_type = get_http_content_type('html')+"; charset=utf-8"
# Prepare headers, including content-type, date, content-length, server.
header = f"""HTTP/1.0 {http_code_dict['header']}
Content-Type: {content_type}
Date: {now_rfc2616()}
Content-Length: {len(content)}
Server: RegardeMamanJeFaisUnServeurWeb/0.1
""".encode()
return header + content, code

View File

@ -88,40 +88,23 @@ def handle_client(c: socket.socket, addr: tuple[str, int], root:str):
# Read the socket.
buf = c.recv(_BUF_SIZE)
print(buf)
# Parse the request to get the headers - call parse_request().
req = parse_request(buf)
# Prepare our reply.
content_type = "text/plain"
if req['head']['verb'] != 'GET':
reply = """HTTP/1.0 501 Not Implemented
Server: RegardeMamanJeFaisUnServeurWeb/0.1
Date: Wed, 20 Mar 2024 16:36:42 GMT
Content-type: text/plain
Content-Length: 20
501 Not Implemented
"""
reply, code = prepare_reply(b"", content_type, 501)
elif req['head']['resource'] != '/':
reply = """HTTP/1.0 404 Not Found
Server: RegardeMamanJeFaisUnServeurWeb/0.1
Date: Wed, 20 Mar 2024 16:36:42 GMT
Content-type: text/plain
Content-Length: 14
404 Not Found
"""
reply, code = prepare_reply(b"", content_type, 404)
else:
content = b"You are beautiful today !"
content_type = "text/plain"
reply = prepare_reply(content, content_type, 200)
reply, code = prepare_reply(content, content_type, 200)
# If we get a GET verb from the request header, then call prepare_resource().
# Otherwise, prepare a reply with a "Non Implemented" status code.
# Send the reply back.
c.send(reply)