matemat/matemat/webserver/httpd.py

378 lines
16 KiB
Python

from typing import Any, Callable, Dict, Tuple, Type, Union
import traceback
import os
import socket
import mimetypes
from socketserver import TCPServer
from http.server import HTTPServer, BaseHTTPRequestHandler
from http.cookies import SimpleCookie
from uuid import uuid4
from datetime import datetime, timedelta
from matemat import __version__ as matemat_version
from matemat.exceptions import HttpException
from matemat.webserver import RequestArguments
from matemat.webserver.util import parse_args
#
# Python internal class hacks
#
# Enable IPv6 support (IPv6/IPv4 dual-stack support should be implicitly enabled)
TCPServer.address_family = socket.AF_INET6
# Redirect internal logging to somewhere else, or, for now, silently discard (TODO: logger will come later)
BaseHTTPRequestHandler.log_request = lambda self, code='-', size='-': None
BaseHTTPRequestHandler.log_error = lambda self, fstring='', *args: None
# Dictionary to hold registered pagelet paths and their handler functions
_PAGELET_PATHS: Dict[str, Callable[[str, # HTTP method (GET, POST, ...)
str, # Request path
RequestArguments, # HTTP Request arguments
Dict[str, Any], # Session vars
Dict[str, str]], # Response headers
Union[ # Return type: either a response body, or a redirect
bytes, str, # Response body: will assign HTTP/1.0 200 OK
Tuple[int, str] # Redirect: First element must be 301, second the redirect path
]]] = dict()
# Inactivity timeout for client sessions
_SESSION_TIMEOUT: int = 3600
_MAX_POST: int = 1_000_000
def pagelet(path: str):
"""
Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to
the path specified as argument to the annotation.
The function must have the following signature:
(method: str,
path: str,
args: RequestArguments,
session_vars: Dict[str, Any],
headers: Dict[str, str])
-> Union[bytes, str, Tuple[int, str]]
method: The HTTP method (GET, POST) that was used.
path: The path that was requested.
args: The arguments that were passed with the request (as GET or POST arguments).
session_vars: The session storage. May be read from and written to.
headers: The dictionary of HTTP response headers. Add headers you wish to send with the response.
returns: One of the following:
- A HTTP Response body as str or bytes
- A HTTP redirect: A tuple of 301 (an int) and the path to redirect to (a str)
raises: HttpException: If a non-200 HTTP status code should be returned
:param path: The path to register the function for.
"""
def http_handler(fun: Callable[[str,
str,
RequestArguments,
Dict[str, Any],
Dict[str, str]],
Union[
bytes, str,
Tuple[int, str]
]]):
# Add the function to the dict of pagelets
_PAGELET_PATHS[path] = fun
# Don't change the function itself at all
return fun
# Return the inner function (Python requires a "real" function annotation to not have any arguments except
# the function itself)
return http_handler
class MatematHTTPServer(HTTPServer):
"""
A http.server.HTTPServer subclass that acts as a container for data that must be persistent between requests.
"""
def __init__(self,
server_address: Any,
handler: Type[BaseHTTPRequestHandler],
webroot: str,
bind_and_activate: bool = True) -> None:
super().__init__(server_address, handler, bind_and_activate)
# Resolve webroot directory
self.webroot = os.path.abspath(webroot)
# Set up session vars dict
self.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict()
class MatematWebserver(object):
"""
Then main webserver class, internally uses Python's http.server.
The server will serve a pagelet, if one is defined for a request path, else it will attempt to serve a static
resource from the webroot.
Usage:
# Listen on all interfaces on port 80 (dual-stack IPv6/IPv4)
server = MatematWebserver('::', 80, webroot='/var/www/matemat')
# Start the server. This call blocks while the server is running.
server.start()
"""
def __init__(self, listen: str = '::', port: int = 80, webroot: str = './webroot') -> None:
"""
Instantiate a MatematWebserver.
:param listen: The IPv4 or IPv6 address to listen on
:param port: The TCP port to listen on
:param webroot: Path to the webroot directory
"""
if len(listen) == 0:
# Empty string should be interpreted as all addresses
listen = '::'
# IPv4 address detection heuristic
if ':' not in listen and '.' in listen:
# Rewrite IPv4 address to IPv6-mapped form
listen = f'::ffff:{listen}'
# Create the http server
self._httpd = MatematHTTPServer((listen, port), HttpHandler, webroot)
def start(self) -> None:
"""
Start the web server. This call blocks while the server is running.
"""
self._httpd.serve_forever()
class HttpHandler(BaseHTTPRequestHandler):
"""
HTTP Request handler.
This class parses HTTP requests, and calls the appropriate pagelets, or fetches a static resource from the webroot
directory.
"""
def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None:
super().__init__(request, client_address, server)
self.server: MatematHTTPServer
@property
def server_version(self) -> str:
return f'matemat/{matemat_version}'
def _start_session(self) -> Tuple[str, datetime]:
"""
Start a new session, or resume the session identified by the session cookie sent in the HTTP request.
:return: A tuple consisting of the session ID (a UUID string), and the session timeout date.
"""
# Reference date for session timeout
now = datetime.utcnow()
# Parse cookies sent by the client
cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[]))
cookie = SimpleCookie()
cookie.load(cookiestring)
# Read the client's session ID, if any
session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None
# If there is no active session, create a new session ID
if session_id is None or session_id not in self.server.session_vars:
session_id = str(uuid4())
# Check for session timeout
if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now:
self._end_session(session_id)
raise TimeoutError('Session timed out.')
# Update or initialize the session timeout
if session_id not in self.server.session_vars:
self.server.session_vars[session_id] = (now + timedelta(seconds=_SESSION_TIMEOUT)), dict()
else:
self.server.session_vars[session_id] = \
(now + timedelta(seconds=_SESSION_TIMEOUT), self.server.session_vars[session_id][1])
# Return the session ID and timeout
return session_id, self.server.session_vars[session_id][0]
def _end_session(self, session_id: str) -> None:
"""
Destroy a session identified by the session ID.
:param session_id: ID of the session to destroy.
"""
if session_id in self.server.session_vars:
del self.server.session_vars[session_id]
@staticmethod
def _parse_pagelet_result(pagelet_res: Union[bytes, str, Tuple[int, str]], headers: Dict[str, str]) \
-> Tuple[int, bytes]:
"""
Process the return value of a pagelet function call.
:param pagelet_res: The pagelet return value.
:param headers: The dict of HTTP response headers, needed for setting the redirect header.
:return: The HTTP Response status code (an int) and body (a bytes).
:raises TypeError: If the pagelet result was not in the expected form.
"""
# The HTTP Response Status Code, defaults to 200 OK
hsc: int = 200
# The HTTP Response body, defaults to None
data: Union[bytes, str] = None
if isinstance(pagelet_res, tuple):
# If the return type is a tuple, the first element must be 301 (the HTTP Redirect status code)
head, tail = pagelet_res
if head == 301:
# Set the HTTP Response Status Code, and the redirect header
hsc = 301
headers['Location'] = tail
else:
raise TypeError(f'Return value of pagelet not understood: {pagelet_res}')
elif isinstance(pagelet_res, str) or isinstance(pagelet_res, bytes):
# Return value is a response body
data = pagelet_res
else:
# Return value is not a response body or a redirect
raise TypeError(f'Return value of pagelet not understood: {pagelet_res}')
# The pagelet may return None as data as a shorthand for an empty response
if data is None:
data = bytes()
# If the pagelet returns a Python str, convert it to an UTF-8 encoded bytes object
if isinstance(data, str):
data = data.encode('utf-8')
# Return the resulting status code and body
return hsc, data
def _handle(self, method: str, path: str, args: RequestArguments) -> None:
"""
Handle a HTTP request by either dispatching it to the appropriate pagelet or by serving a static resource.
:param method: The HTTP request method (GET, POST).
:param path: The request path without GET arguments.
:param args: Arguments sent with the request. This includes GET and POST arguments, where the POST arguments
take precedence.
"""
# Start or resume a session; redirect to / on session timeout
try:
session_id, timeout = self._start_session()
except TimeoutError:
self.send_response(302)
self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT')
self.send_header('Location', '/')
self.end_headers()
return
self.session_id: str = session_id
# Call a pagelet function, if one is registered for the requested path
if path in _PAGELET_PATHS:
# Prepare some headers. Those can still be overwritten by the pagelet
headers: Dict[str, str] = {
'Content-Type': 'text/html',
'Cache-Control': 'no-cache'
}
# Call the pagelet function
pagelet_res = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers)
# Parse the pagelet's return value, vielding a HTTP status code and a response body
hsc, data = HttpHandler._parse_pagelet_result(pagelet_res, headers)
# Send the HTTP status code
self.send_response(hsc)
# Format the session cookie timeout string and send the session cookie header
expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT")
self.send_header('Set-Cookie',
f'matemat_session_id={session_id}; expires={expires}')
# Compute the body length and add the appropriate header
headers['Content-Length'] = str(len(data))
# Send all headers set by the pagelet
for name, value in headers.items():
self.send_header(name, value)
# End the header section and write the body
self.end_headers()
self.wfile.write(data)
else:
# No pagelet function for this path, try a static serve instead
# Only HTTP GET is allowed, else reply with a 'Method Not Allowed' header
if method != 'GET':
self.send_error(405)
self.end_headers()
return
# Create the absolute resource path, resolving '..'
filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:]))
# Make sure the file is actually inside the webroot directory and that it exists
if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath):
# Open and read the file
with open(filepath, 'rb') as f:
data = f.read()
# File read successfully, send 'OK' header
self.send_response(200)
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal?
mimetype, _ = mimetypes.guess_type(filepath)
# Fall back to octet-stream type, if unknown
if mimetype is None:
mimetype = 'application/octet-stream'
# Send content type and length header
self.send_header('Content-Type', mimetype)
self.send_header('Content-Length', str(len(data)))
self.end_headers()
# Send the requested resource as response body
self.wfile.write(data)
else:
# File does not exist or path points outside the webroot directory
self.send_response(404)
self.end_headers()
# noinspection PyPep8Naming
def do_GET(self) -> None:
"""
Called by BasicHTTPRequestHandler for GET requests.
"""
try:
# Parse the request and hand it to the handle function
path, args = parse_args(self.path)
self._handle('GET', path, args)
# Special handling for some errors
except HttpException as e:
self.send_error(e.status, e.title, e.message)
except PermissionError:
self.send_error(403, 'Forbidden')
except ValueError:
self.send_error(400, 'Bad Request')
except BaseException as e:
# Generic error handling
self.send_error(500, 'Internal Server Error')
print(e)
traceback.print_tb(e.__traceback__)
# noinspection PyPep8Naming
def do_POST(self) -> None:
"""
Called by BasicHTTPRequestHandler for POST requests.
"""
try:
# Read the POST body, if it exists, and its MIME type is application/x-www-form-urlencoded
clen: int = int(str(self.headers.get('Content-Length', failobj='0')))
if clen > _MAX_POST:
raise ValueError('Request too big')
ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream')
post: bytes = self.rfile.read(clen)
path, args = parse_args(self.path, postbody=post, enctype=ctype)
# Parse the request and hand it to the handle function
self._handle('POST', path, args)
# Special handling for some errors
except HttpException as e:
self.send_error(e.status, e.title, e.message)
except PermissionError:
self.send_error(403, 'Forbidden')
except ValueError:
self.send_error(400, 'Bad Request')
except BaseException as e:
# Generic error handling
self.send_error(500, 'Internal Server Error')
print(e)
traceback.print_tb(e.__traceback__)
@property
def session_vars(self) -> Dict[str, Any]:
"""
Get the session variables for the current session.
:return: Dictionary of named session variables.
"""
return self.server.session_vars[self.session_id][1]