diff --git a/matemat/__main__.py b/matemat/__main__.py index 819ae2b..9654b64 100644 --- a/matemat/__main__.py +++ b/matemat/__main__.py @@ -13,4 +13,4 @@ if __name__ == '__main__': port = int(sys.argv[1]) # Start the web server - MatematWebserver(port).start() + MatematWebserver(port=port).start() diff --git a/matemat/webserver/httpd.py b/matemat/webserver/httpd.py index ff6c7d4..e776151 100644 --- a/matemat/webserver/httpd.py +++ b/matemat/webserver/httpd.py @@ -16,15 +16,18 @@ from datetime import datetime, timedelta from matemat import __version__ as matemat_version -# Enable IPv6 support (with implicit DualStack). +# Enable IPv6 support (with implicit DualStack) TCPServer.address_family = socket.AF_INET6 -# Dictionary to hold registered pagelet paths and their handler functions. +# Dictionary to hold registered pagelet paths and their handler functions _PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes], Tuple[int, Union[bytes, str]]]] = dict() +_SESSION_TIMEOUT: int = 3600 + + def pagelet(path: str): """ Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to @@ -47,27 +50,66 @@ def pagelet(path: str): """ def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes], Tuple[int, Union[bytes, str]]]): - # Add the function to the dict of pagelets. + # Add the function to the dict of pagelets _PAGELET_PATHS[path] = fun - # Don't change the function itself at all. + # Don't change the function itself at all return fun # Return the inner function (Python requires a "real" function annotation to not have any arguments except - # the function itself). + # the function itself) return http_handler class MatematWebserver(object): + """ + Then main webserver class, internally uses Python's http.server. - def __init__(self, port: int = 80, webroot: str = './webroot') -> None: - self._httpd = HTTPServer(('::', port), HttpHandler) + The server will serve a pagelet, if one is defined for a request path, else it will attempt to serve a static + resource from the webroot. + + Usage: + + # Listen on all interfaces on port 80 (dual-stack IPv6/IPv4) + server = MatematWebserver('::', 80, webroot='/var/www/matemat') + # Start the server. This call blocks while the server is running. + server.start() + """ + + def __init__(self, listen: str = '::', port: int = 80, webroot: str = './webroot') -> None: + """ + Instantiate a MatematWebserver. + + :param listen: The IPv4 or IPv6 address to listen on + :param port: The TCP port to listen on + :param webroot: Path to the webroot directory + """ + if len(listen) == 0: + # Empty string should be interpreted as all addresses + listen = '::' + # IPv4 address detection heuristic + if ':' not in listen and '.' in listen: + # Rewrite IPv4 address to IPv6-mapped form + listen = f'::ffff:{listen}' + # Create the http server + self._httpd = HTTPServer((listen, port), HttpHandler) + # Set up session vars dict self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict() + # Resolve webroot directory self._httpd.webroot = os.path.abspath(webroot) def start(self) -> None: + """ + Start the web server. This call blocks while the server is running. + """ self._httpd.serve_forever() class HttpHandler(BaseHTTPRequestHandler): + """ + HTTP Request handler. + + This class parses HTTP requests, and calls the appropriate pagelets, or fetches a static resource from the webroot + directory. + """ def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None: super().__init__(request, client_address, server) @@ -76,29 +118,58 @@ class HttpHandler(BaseHTTPRequestHandler): def server_version(self) -> str: return f'matemat/{matemat_version}' - def start_session(self) -> Tuple[str, datetime]: + def _start_session(self) -> Tuple[str, datetime]: + """ + Start a new session, or resume the session identified by the session cookie sent in the HTTP request. + + :return: A tuple consisting of the session ID (a UUID string), and the session timeout date. + """ + # Reference date for session timeout now = datetime.utcnow() + # Parse cookies sent by the client cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[])) cookie = SimpleCookie() cookie.load(cookiestring) + # Read the client's session ID, if any session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None + # If there is no active session, create a new session ID if session_id is None or session_id not in self.server.session_vars: session_id = str(uuid4()) + # Check for session timeout if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now: - self.end_session(session_id) + self._end_session(session_id) raise TimeoutError('Session timed out.') - elif session_id not in self.server.session_vars: - self.server.session_vars[session_id] = (now + timedelta(seconds=10)), dict() + # Update or initialize the session timeout + if session_id not in self.server.session_vars: + self.server.session_vars[session_id] = (now + timedelta(seconds=_SESSION_TIMEOUT)), dict() + else: + self.server.session_vars[session_id] =\ + (now + timedelta(seconds=_SESSION_TIMEOUT), self.server.session_vars[session_id][1]) + # Return the session ID and timeout return session_id, self.server.session_vars[session_id][0] - def end_session(self, session_id: str) -> None: + def _end_session(self, session_id: str) -> None: + """ + Destroy a session identified by the session ID. + + :param session_id: ID of the session to destroy. + """ if session_id in self.server.session_vars: del self.server.session_vars[session_id] def _handle(self, method: str, path: str, args: Dict[str, str]) -> None: + """ + Handle a HTTP request by either dispatching it to the appropriate pagelet or by serving a static resource. + + :param method: The HTTP request method (GET, POST). + :param path: The request path without GET arguments. + :param args: Arguments sent with the request. This includes GET and POST arguments, where the POST arguments + take precedence. + """ + # Start or resume a session; report an error on session timeout try: - session_id, timeout = self.start_session() + session_id, timeout = self._start_session() except TimeoutError: self.send_error(599, 'Session Timed Out', 'Session Timed Out.') self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT') @@ -106,42 +177,63 @@ class HttpHandler(BaseHTTPRequestHandler): return self.session_id: str = session_id + # Call a pagelet function, if one is registered for the requested path if path in _PAGELET_PATHS: + # Prepare some headers. Those can still be overwritten by the pagelet headers: Dict[str, str] = { 'Content-Type': 'text/html', 'Cache-Control': 'no-cache' } + # Call the pagelet function hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers) + # The pagelet may return None as data as a shorthand for an empty response if data is None: data = bytes() + # If the pagelet returns a Python str, convert it to an UTF-8 encoded bytes object if isinstance(data, str): data = data.encode('utf-8') + # Send the HTTP status code self.send_response(hsc) + # Format the session cookie timeout string and send the session cookie header expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT") self.send_header('Set-Cookie', f'matemat_session_id={session_id}; expires={expires}') + # Compute the body length and add the appropriate header headers['Content-Length'] = str(len(data)) + # Send all headers set by the pagelet for name, value in headers.items(): self.send_header(name, value) + # End the header section and write the body self.end_headers() self.wfile.write(data) else: + # No pagelet function for this path, try a static serve instead + # Only HTTP GET is allowed, else reply with a 'Method Not Allowed' header if method != 'GET': self.send_error(405) self.end_headers() return + # Create the absolute resource path, resolving '..' filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:])) + # Make sure the file is actually inside the webroot directory and that it exists if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath): + # Open and read the file with open(filepath, 'rb') as f: data = f.read() + # File read successfully, send 'OK' header self.send_response(200) + # TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal? mimetype, _ = mimetypes.guess_type(filepath) - if mimetype is not None: - self.send_header('Content-Type', mimetype) + # Fall back to octet-stream type, if unknown + if mimetype is None: + mimetype = 'application/octet-stream' + # Send content type header + self.send_header('Content-Type', mimetype) self.end_headers() - if method == 'GET': - self.wfile.write(data) + # Send the requested resource as response body + self.wfile.write(data) else: + # File does not exist or path points outside the webroot directory self.send_response(404) self.end_headers() @@ -150,77 +242,96 @@ class HttpHandler(BaseHTTPRequestHandler): """ Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the arguments and return them as a dictionary. + If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded. + :param request: The request string to parse. :param postbody: The POST body to parse, defaults to None. :return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs. """ - # Parse the request "URL" (i.e. only the path). + # Parse the request "URL" (i.e. only the path) tokens = urllib.parse.urlparse(request) - # Parse the GET arguments. + # Parse the GET arguments args = urllib.parse.parse_qs(tokens.query) if postbody is not None: - # Parse the POST body. + # Parse the POST body postargs = urllib.parse.parse_qs(postbody) - # Write all POST values into the dict, overriding potential duplicates from GET. + # Write all POST values into the dict, overriding potential duplicates from GET for k, v in postargs.items(): args[k] = v - # urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values. + # urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values for k, v in args.items(): if len(v) == 1: args[k] = v[0] - # Return the path and the parsed arguments. + # Return the path and the parsed arguments return tokens.path, args # noinspection PyPep8Naming def do_GET(self) -> None: + """ + Called by BasicHTTPRequestHandler for GET requests. + """ try: + # Parse the request and hand it to the handle function path, args = self._parse_args(self.path) self._handle('GET', path, args) + # Special handling for some errors except PermissionError as e: self.send_error(403, 'Forbidden') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) except ValueError as e: self.send_header(400, 'Bad Request') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) except BaseException as e: + # Generic error handling self.send_error(500, 'Internal Server Error') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) # noinspection PyPep8Naming def do_POST(self) -> None: + """ + Called by BasicHTTPRequestHandler for POST requests. + """ try: + # Read the POST body, if it exists, and its MIME type is application/x-www-form-urlencoded clen: str = self.headers.get('Content-Length', failobj='0') ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream') post = '' if ctype == 'application/x-www-form-urlencoded': post: str = self.rfile.read(int(clen)).decode('utf-8') - + # Parse the request and hand it to the handle function path, args = self._parse_args(self.path, postbody=post) self._handle('POST', path, args) + # Special handling for some errors except PermissionError as e: self.send_error(403, 'Forbidden') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) except ValueError as e: self.send_header(400, 'Bad Request') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) except BaseException as e: + # Generic error handling self.send_error(500, 'Internal Server Error') self.end_headers() - print(type(e)) + print(e) traceback.print_tb(e.__traceback__) @property def session_vars(self) -> Dict[str, Any]: + """ + Get the session variables for the current session. + + :return: Dictionary of named session variables. + """ return self.server.session_vars[self.session_id][1]