Added lots of code documentation to the httpd module.

This commit is contained in:
s3lph 2018-06-13 00:00:41 +02:00
parent 3caf7d2477
commit 97b8d8b054
2 changed files with 142 additions and 31 deletions

View file

@ -13,4 +13,4 @@ if __name__ == '__main__':
port = int(sys.argv[1]) port = int(sys.argv[1])
# Start the web server # Start the web server
MatematWebserver(port).start() MatematWebserver(port=port).start()

View file

@ -16,15 +16,18 @@ from datetime import datetime, timedelta
from matemat import __version__ as matemat_version from matemat import __version__ as matemat_version
# Enable IPv6 support (with implicit DualStack). # Enable IPv6 support (with implicit DualStack)
TCPServer.address_family = socket.AF_INET6 TCPServer.address_family = socket.AF_INET6
# Dictionary to hold registered pagelet paths and their handler functions. # Dictionary to hold registered pagelet paths and their handler functions
_PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes], _PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
Tuple[int, Union[bytes, str]]]] = dict() Tuple[int, Union[bytes, str]]]] = dict()
_SESSION_TIMEOUT: int = 3600
def pagelet(path: str): def pagelet(path: str):
""" """
Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to
@ -47,27 +50,66 @@ def pagelet(path: str):
""" """
def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes], def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
Tuple[int, Union[bytes, str]]]): Tuple[int, Union[bytes, str]]]):
# Add the function to the dict of pagelets. # Add the function to the dict of pagelets
_PAGELET_PATHS[path] = fun _PAGELET_PATHS[path] = fun
# Don't change the function itself at all. # Don't change the function itself at all
return fun return fun
# Return the inner function (Python requires a "real" function annotation to not have any arguments except # Return the inner function (Python requires a "real" function annotation to not have any arguments except
# the function itself). # the function itself)
return http_handler return http_handler
class MatematWebserver(object): class MatematWebserver(object):
"""
Then main webserver class, internally uses Python's http.server.
def __init__(self, port: int = 80, webroot: str = './webroot') -> None: The server will serve a pagelet, if one is defined for a request path, else it will attempt to serve a static
self._httpd = HTTPServer(('::', port), HttpHandler) resource from the webroot.
Usage:
# Listen on all interfaces on port 80 (dual-stack IPv6/IPv4)
server = MatematWebserver('::', 80, webroot='/var/www/matemat')
# Start the server. This call blocks while the server is running.
server.start()
"""
def __init__(self, listen: str = '::', port: int = 80, webroot: str = './webroot') -> None:
"""
Instantiate a MatematWebserver.
:param listen: The IPv4 or IPv6 address to listen on
:param port: The TCP port to listen on
:param webroot: Path to the webroot directory
"""
if len(listen) == 0:
# Empty string should be interpreted as all addresses
listen = '::'
# IPv4 address detection heuristic
if ':' not in listen and '.' in listen:
# Rewrite IPv4 address to IPv6-mapped form
listen = f'::ffff:{listen}'
# Create the http server
self._httpd = HTTPServer((listen, port), HttpHandler)
# Set up session vars dict
self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict() self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict()
# Resolve webroot directory
self._httpd.webroot = os.path.abspath(webroot) self._httpd.webroot = os.path.abspath(webroot)
def start(self) -> None: def start(self) -> None:
"""
Start the web server. This call blocks while the server is running.
"""
self._httpd.serve_forever() self._httpd.serve_forever()
class HttpHandler(BaseHTTPRequestHandler): class HttpHandler(BaseHTTPRequestHandler):
"""
HTTP Request handler.
This class parses HTTP requests, and calls the appropriate pagelets, or fetches a static resource from the webroot
directory.
"""
def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None: def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None:
super().__init__(request, client_address, server) super().__init__(request, client_address, server)
@ -76,29 +118,58 @@ class HttpHandler(BaseHTTPRequestHandler):
def server_version(self) -> str: def server_version(self) -> str:
return f'matemat/{matemat_version}' return f'matemat/{matemat_version}'
def start_session(self) -> Tuple[str, datetime]: def _start_session(self) -> Tuple[str, datetime]:
"""
Start a new session, or resume the session identified by the session cookie sent in the HTTP request.
:return: A tuple consisting of the session ID (a UUID string), and the session timeout date.
"""
# Reference date for session timeout
now = datetime.utcnow() now = datetime.utcnow()
# Parse cookies sent by the client
cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[])) cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[]))
cookie = SimpleCookie() cookie = SimpleCookie()
cookie.load(cookiestring) cookie.load(cookiestring)
# Read the client's session ID, if any
session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None
# If there is no active session, create a new session ID
if session_id is None or session_id not in self.server.session_vars: if session_id is None or session_id not in self.server.session_vars:
session_id = str(uuid4()) session_id = str(uuid4())
# Check for session timeout
if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now: if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now:
self.end_session(session_id) self._end_session(session_id)
raise TimeoutError('Session timed out.') raise TimeoutError('Session timed out.')
elif session_id not in self.server.session_vars: # Update or initialize the session timeout
self.server.session_vars[session_id] = (now + timedelta(seconds=10)), dict() if session_id not in self.server.session_vars:
self.server.session_vars[session_id] = (now + timedelta(seconds=_SESSION_TIMEOUT)), dict()
else:
self.server.session_vars[session_id] =\
(now + timedelta(seconds=_SESSION_TIMEOUT), self.server.session_vars[session_id][1])
# Return the session ID and timeout
return session_id, self.server.session_vars[session_id][0] return session_id, self.server.session_vars[session_id][0]
def end_session(self, session_id: str) -> None: def _end_session(self, session_id: str) -> None:
"""
Destroy a session identified by the session ID.
:param session_id: ID of the session to destroy.
"""
if session_id in self.server.session_vars: if session_id in self.server.session_vars:
del self.server.session_vars[session_id] del self.server.session_vars[session_id]
def _handle(self, method: str, path: str, args: Dict[str, str]) -> None: def _handle(self, method: str, path: str, args: Dict[str, str]) -> None:
"""
Handle a HTTP request by either dispatching it to the appropriate pagelet or by serving a static resource.
:param method: The HTTP request method (GET, POST).
:param path: The request path without GET arguments.
:param args: Arguments sent with the request. This includes GET and POST arguments, where the POST arguments
take precedence.
"""
# Start or resume a session; report an error on session timeout
try: try:
session_id, timeout = self.start_session() session_id, timeout = self._start_session()
except TimeoutError: except TimeoutError:
self.send_error(599, 'Session Timed Out', 'Session Timed Out.') self.send_error(599, 'Session Timed Out', 'Session Timed Out.')
self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT') self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT')
@ -106,42 +177,63 @@ class HttpHandler(BaseHTTPRequestHandler):
return return
self.session_id: str = session_id self.session_id: str = session_id
# Call a pagelet function, if one is registered for the requested path
if path in _PAGELET_PATHS: if path in _PAGELET_PATHS:
# Prepare some headers. Those can still be overwritten by the pagelet
headers: Dict[str, str] = { headers: Dict[str, str] = {
'Content-Type': 'text/html', 'Content-Type': 'text/html',
'Cache-Control': 'no-cache' 'Cache-Control': 'no-cache'
} }
# Call the pagelet function
hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers) hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers)
# The pagelet may return None as data as a shorthand for an empty response
if data is None: if data is None:
data = bytes() data = bytes()
# If the pagelet returns a Python str, convert it to an UTF-8 encoded bytes object
if isinstance(data, str): if isinstance(data, str):
data = data.encode('utf-8') data = data.encode('utf-8')
# Send the HTTP status code
self.send_response(hsc) self.send_response(hsc)
# Format the session cookie timeout string and send the session cookie header
expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT") expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT")
self.send_header('Set-Cookie', self.send_header('Set-Cookie',
f'matemat_session_id={session_id}; expires={expires}') f'matemat_session_id={session_id}; expires={expires}')
# Compute the body length and add the appropriate header
headers['Content-Length'] = str(len(data)) headers['Content-Length'] = str(len(data))
# Send all headers set by the pagelet
for name, value in headers.items(): for name, value in headers.items():
self.send_header(name, value) self.send_header(name, value)
# End the header section and write the body
self.end_headers() self.end_headers()
self.wfile.write(data) self.wfile.write(data)
else: else:
# No pagelet function for this path, try a static serve instead
# Only HTTP GET is allowed, else reply with a 'Method Not Allowed' header
if method != 'GET': if method != 'GET':
self.send_error(405) self.send_error(405)
self.end_headers() self.end_headers()
return return
# Create the absolute resource path, resolving '..'
filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:])) filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:]))
# Make sure the file is actually inside the webroot directory and that it exists
if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath): if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath):
# Open and read the file
with open(filepath, 'rb') as f: with open(filepath, 'rb') as f:
data = f.read() data = f.read()
# File read successfully, send 'OK' header
self.send_response(200) self.send_response(200)
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal?
mimetype, _ = mimetypes.guess_type(filepath) mimetype, _ = mimetypes.guess_type(filepath)
if mimetype is not None: # Fall back to octet-stream type, if unknown
if mimetype is None:
mimetype = 'application/octet-stream'
# Send content type header
self.send_header('Content-Type', mimetype) self.send_header('Content-Type', mimetype)
self.end_headers() self.end_headers()
if method == 'GET': # Send the requested resource as response body
self.wfile.write(data) self.wfile.write(data)
else: else:
# File does not exist or path points outside the webroot directory
self.send_response(404) self.send_response(404)
self.end_headers() self.end_headers()
@ -150,77 +242,96 @@ class HttpHandler(BaseHTTPRequestHandler):
""" """
Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the
arguments and return them as a dictionary. arguments and return them as a dictionary.
If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded. If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded.
:param request: The request string to parse. :param request: The request string to parse.
:param postbody: The POST body to parse, defaults to None. :param postbody: The POST body to parse, defaults to None.
:return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs. :return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs.
""" """
# Parse the request "URL" (i.e. only the path). # Parse the request "URL" (i.e. only the path)
tokens = urllib.parse.urlparse(request) tokens = urllib.parse.urlparse(request)
# Parse the GET arguments. # Parse the GET arguments
args = urllib.parse.parse_qs(tokens.query) args = urllib.parse.parse_qs(tokens.query)
if postbody is not None: if postbody is not None:
# Parse the POST body. # Parse the POST body
postargs = urllib.parse.parse_qs(postbody) postargs = urllib.parse.parse_qs(postbody)
# Write all POST values into the dict, overriding potential duplicates from GET. # Write all POST values into the dict, overriding potential duplicates from GET
for k, v in postargs.items(): for k, v in postargs.items():
args[k] = v args[k] = v
# urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values. # urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values
for k, v in args.items(): for k, v in args.items():
if len(v) == 1: if len(v) == 1:
args[k] = v[0] args[k] = v[0]
# Return the path and the parsed arguments. # Return the path and the parsed arguments
return tokens.path, args return tokens.path, args
# noinspection PyPep8Naming # noinspection PyPep8Naming
def do_GET(self) -> None: def do_GET(self) -> None:
"""
Called by BasicHTTPRequestHandler for GET requests.
"""
try: try:
# Parse the request and hand it to the handle function
path, args = self._parse_args(self.path) path, args = self._parse_args(self.path)
self._handle('GET', path, args) self._handle('GET', path, args)
# Special handling for some errors
except PermissionError as e: except PermissionError as e:
self.send_error(403, 'Forbidden') self.send_error(403, 'Forbidden')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
except ValueError as e: except ValueError as e:
self.send_header(400, 'Bad Request') self.send_header(400, 'Bad Request')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
except BaseException as e: except BaseException as e:
# Generic error handling
self.send_error(500, 'Internal Server Error') self.send_error(500, 'Internal Server Error')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
# noinspection PyPep8Naming # noinspection PyPep8Naming
def do_POST(self) -> None: def do_POST(self) -> None:
"""
Called by BasicHTTPRequestHandler for POST requests.
"""
try: try:
# Read the POST body, if it exists, and its MIME type is application/x-www-form-urlencoded
clen: str = self.headers.get('Content-Length', failobj='0') clen: str = self.headers.get('Content-Length', failobj='0')
ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream') ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream')
post = '' post = ''
if ctype == 'application/x-www-form-urlencoded': if ctype == 'application/x-www-form-urlencoded':
post: str = self.rfile.read(int(clen)).decode('utf-8') post: str = self.rfile.read(int(clen)).decode('utf-8')
# Parse the request and hand it to the handle function
path, args = self._parse_args(self.path, postbody=post) path, args = self._parse_args(self.path, postbody=post)
self._handle('POST', path, args) self._handle('POST', path, args)
# Special handling for some errors
except PermissionError as e: except PermissionError as e:
self.send_error(403, 'Forbidden') self.send_error(403, 'Forbidden')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
except ValueError as e: except ValueError as e:
self.send_header(400, 'Bad Request') self.send_header(400, 'Bad Request')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
except BaseException as e: except BaseException as e:
# Generic error handling
self.send_error(500, 'Internal Server Error') self.send_error(500, 'Internal Server Error')
self.end_headers() self.end_headers()
print(type(e)) print(e)
traceback.print_tb(e.__traceback__) traceback.print_tb(e.__traceback__)
@property @property
def session_vars(self) -> Dict[str, Any]: def session_vars(self) -> Dict[str, Any]:
"""
Get the session variables for the current session.
:return: Dictionary of named session variables.
"""
return self.server.session_vars[self.session_id][1] return self.server.session_vars[self.session_id][1]