Added lots of code documentation to the httpd module.
This commit is contained in:
parent
3caf7d2477
commit
97b8d8b054
2 changed files with 142 additions and 31 deletions
|
@ -13,4 +13,4 @@ if __name__ == '__main__':
|
|||
port = int(sys.argv[1])
|
||||
|
||||
# Start the web server
|
||||
MatematWebserver(port).start()
|
||||
MatematWebserver(port=port).start()
|
||||
|
|
|
@ -16,15 +16,18 @@ from datetime import datetime, timedelta
|
|||
from matemat import __version__ as matemat_version
|
||||
|
||||
|
||||
# Enable IPv6 support (with implicit DualStack).
|
||||
# Enable IPv6 support (with implicit DualStack)
|
||||
TCPServer.address_family = socket.AF_INET6
|
||||
|
||||
|
||||
# Dictionary to hold registered pagelet paths and their handler functions.
|
||||
# Dictionary to hold registered pagelet paths and their handler functions
|
||||
_PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
||||
Tuple[int, Union[bytes, str]]]] = dict()
|
||||
|
||||
|
||||
_SESSION_TIMEOUT: int = 3600
|
||||
|
||||
|
||||
def pagelet(path: str):
|
||||
"""
|
||||
Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to
|
||||
|
@ -47,27 +50,66 @@ def pagelet(path: str):
|
|||
"""
|
||||
def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
||||
Tuple[int, Union[bytes, str]]]):
|
||||
# Add the function to the dict of pagelets.
|
||||
# Add the function to the dict of pagelets
|
||||
_PAGELET_PATHS[path] = fun
|
||||
# Don't change the function itself at all.
|
||||
# Don't change the function itself at all
|
||||
return fun
|
||||
# Return the inner function (Python requires a "real" function annotation to not have any arguments except
|
||||
# the function itself).
|
||||
# the function itself)
|
||||
return http_handler
|
||||
|
||||
|
||||
class MatematWebserver(object):
|
||||
"""
|
||||
Then main webserver class, internally uses Python's http.server.
|
||||
|
||||
def __init__(self, port: int = 80, webroot: str = './webroot') -> None:
|
||||
self._httpd = HTTPServer(('::', port), HttpHandler)
|
||||
The server will serve a pagelet, if one is defined for a request path, else it will attempt to serve a static
|
||||
resource from the webroot.
|
||||
|
||||
Usage:
|
||||
|
||||
# Listen on all interfaces on port 80 (dual-stack IPv6/IPv4)
|
||||
server = MatematWebserver('::', 80, webroot='/var/www/matemat')
|
||||
# Start the server. This call blocks while the server is running.
|
||||
server.start()
|
||||
"""
|
||||
|
||||
def __init__(self, listen: str = '::', port: int = 80, webroot: str = './webroot') -> None:
|
||||
"""
|
||||
Instantiate a MatematWebserver.
|
||||
|
||||
:param listen: The IPv4 or IPv6 address to listen on
|
||||
:param port: The TCP port to listen on
|
||||
:param webroot: Path to the webroot directory
|
||||
"""
|
||||
if len(listen) == 0:
|
||||
# Empty string should be interpreted as all addresses
|
||||
listen = '::'
|
||||
# IPv4 address detection heuristic
|
||||
if ':' not in listen and '.' in listen:
|
||||
# Rewrite IPv4 address to IPv6-mapped form
|
||||
listen = f'::ffff:{listen}'
|
||||
# Create the http server
|
||||
self._httpd = HTTPServer((listen, port), HttpHandler)
|
||||
# Set up session vars dict
|
||||
self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict()
|
||||
# Resolve webroot directory
|
||||
self._httpd.webroot = os.path.abspath(webroot)
|
||||
|
||||
def start(self) -> None:
|
||||
"""
|
||||
Start the web server. This call blocks while the server is running.
|
||||
"""
|
||||
self._httpd.serve_forever()
|
||||
|
||||
|
||||
class HttpHandler(BaseHTTPRequestHandler):
|
||||
"""
|
||||
HTTP Request handler.
|
||||
|
||||
This class parses HTTP requests, and calls the appropriate pagelets, or fetches a static resource from the webroot
|
||||
directory.
|
||||
"""
|
||||
|
||||
def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None:
|
||||
super().__init__(request, client_address, server)
|
||||
|
@ -76,29 +118,58 @@ class HttpHandler(BaseHTTPRequestHandler):
|
|||
def server_version(self) -> str:
|
||||
return f'matemat/{matemat_version}'
|
||||
|
||||
def start_session(self) -> Tuple[str, datetime]:
|
||||
def _start_session(self) -> Tuple[str, datetime]:
|
||||
"""
|
||||
Start a new session, or resume the session identified by the session cookie sent in the HTTP request.
|
||||
|
||||
:return: A tuple consisting of the session ID (a UUID string), and the session timeout date.
|
||||
"""
|
||||
# Reference date for session timeout
|
||||
now = datetime.utcnow()
|
||||
# Parse cookies sent by the client
|
||||
cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[]))
|
||||
cookie = SimpleCookie()
|
||||
cookie.load(cookiestring)
|
||||
# Read the client's session ID, if any
|
||||
session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None
|
||||
# If there is no active session, create a new session ID
|
||||
if session_id is None or session_id not in self.server.session_vars:
|
||||
session_id = str(uuid4())
|
||||
|
||||
# Check for session timeout
|
||||
if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now:
|
||||
self.end_session(session_id)
|
||||
self._end_session(session_id)
|
||||
raise TimeoutError('Session timed out.')
|
||||
elif session_id not in self.server.session_vars:
|
||||
self.server.session_vars[session_id] = (now + timedelta(seconds=10)), dict()
|
||||
# Update or initialize the session timeout
|
||||
if session_id not in self.server.session_vars:
|
||||
self.server.session_vars[session_id] = (now + timedelta(seconds=_SESSION_TIMEOUT)), dict()
|
||||
else:
|
||||
self.server.session_vars[session_id] =\
|
||||
(now + timedelta(seconds=_SESSION_TIMEOUT), self.server.session_vars[session_id][1])
|
||||
# Return the session ID and timeout
|
||||
return session_id, self.server.session_vars[session_id][0]
|
||||
|
||||
def end_session(self, session_id: str) -> None:
|
||||
def _end_session(self, session_id: str) -> None:
|
||||
"""
|
||||
Destroy a session identified by the session ID.
|
||||
|
||||
:param session_id: ID of the session to destroy.
|
||||
"""
|
||||
if session_id in self.server.session_vars:
|
||||
del self.server.session_vars[session_id]
|
||||
|
||||
def _handle(self, method: str, path: str, args: Dict[str, str]) -> None:
|
||||
"""
|
||||
Handle a HTTP request by either dispatching it to the appropriate pagelet or by serving a static resource.
|
||||
|
||||
:param method: The HTTP request method (GET, POST).
|
||||
:param path: The request path without GET arguments.
|
||||
:param args: Arguments sent with the request. This includes GET and POST arguments, where the POST arguments
|
||||
take precedence.
|
||||
"""
|
||||
# Start or resume a session; report an error on session timeout
|
||||
try:
|
||||
session_id, timeout = self.start_session()
|
||||
session_id, timeout = self._start_session()
|
||||
except TimeoutError:
|
||||
self.send_error(599, 'Session Timed Out', 'Session Timed Out.')
|
||||
self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT')
|
||||
|
@ -106,42 +177,63 @@ class HttpHandler(BaseHTTPRequestHandler):
|
|||
return
|
||||
self.session_id: str = session_id
|
||||
|
||||
# Call a pagelet function, if one is registered for the requested path
|
||||
if path in _PAGELET_PATHS:
|
||||
# Prepare some headers. Those can still be overwritten by the pagelet
|
||||
headers: Dict[str, str] = {
|
||||
'Content-Type': 'text/html',
|
||||
'Cache-Control': 'no-cache'
|
||||
}
|
||||
# Call the pagelet function
|
||||
hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers)
|
||||
# The pagelet may return None as data as a shorthand for an empty response
|
||||
if data is None:
|
||||
data = bytes()
|
||||
# If the pagelet returns a Python str, convert it to an UTF-8 encoded bytes object
|
||||
if isinstance(data, str):
|
||||
data = data.encode('utf-8')
|
||||
# Send the HTTP status code
|
||||
self.send_response(hsc)
|
||||
# Format the session cookie timeout string and send the session cookie header
|
||||
expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||
self.send_header('Set-Cookie',
|
||||
f'matemat_session_id={session_id}; expires={expires}')
|
||||
# Compute the body length and add the appropriate header
|
||||
headers['Content-Length'] = str(len(data))
|
||||
# Send all headers set by the pagelet
|
||||
for name, value in headers.items():
|
||||
self.send_header(name, value)
|
||||
# End the header section and write the body
|
||||
self.end_headers()
|
||||
self.wfile.write(data)
|
||||
else:
|
||||
# No pagelet function for this path, try a static serve instead
|
||||
# Only HTTP GET is allowed, else reply with a 'Method Not Allowed' header
|
||||
if method != 'GET':
|
||||
self.send_error(405)
|
||||
self.end_headers()
|
||||
return
|
||||
# Create the absolute resource path, resolving '..'
|
||||
filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:]))
|
||||
# Make sure the file is actually inside the webroot directory and that it exists
|
||||
if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath):
|
||||
# Open and read the file
|
||||
with open(filepath, 'rb') as f:
|
||||
data = f.read()
|
||||
# File read successfully, send 'OK' header
|
||||
self.send_response(200)
|
||||
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal?
|
||||
mimetype, _ = mimetypes.guess_type(filepath)
|
||||
if mimetype is not None:
|
||||
self.send_header('Content-Type', mimetype)
|
||||
# Fall back to octet-stream type, if unknown
|
||||
if mimetype is None:
|
||||
mimetype = 'application/octet-stream'
|
||||
# Send content type header
|
||||
self.send_header('Content-Type', mimetype)
|
||||
self.end_headers()
|
||||
if method == 'GET':
|
||||
self.wfile.write(data)
|
||||
# Send the requested resource as response body
|
||||
self.wfile.write(data)
|
||||
else:
|
||||
# File does not exist or path points outside the webroot directory
|
||||
self.send_response(404)
|
||||
self.end_headers()
|
||||
|
||||
|
@ -150,77 +242,96 @@ class HttpHandler(BaseHTTPRequestHandler):
|
|||
"""
|
||||
Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the
|
||||
arguments and return them as a dictionary.
|
||||
|
||||
If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded.
|
||||
|
||||
:param request: The request string to parse.
|
||||
:param postbody: The POST body to parse, defaults to None.
|
||||
:return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs.
|
||||
"""
|
||||
# Parse the request "URL" (i.e. only the path).
|
||||
# Parse the request "URL" (i.e. only the path)
|
||||
tokens = urllib.parse.urlparse(request)
|
||||
# Parse the GET arguments.
|
||||
# Parse the GET arguments
|
||||
args = urllib.parse.parse_qs(tokens.query)
|
||||
|
||||
if postbody is not None:
|
||||
# Parse the POST body.
|
||||
# Parse the POST body
|
||||
postargs = urllib.parse.parse_qs(postbody)
|
||||
# Write all POST values into the dict, overriding potential duplicates from GET.
|
||||
# Write all POST values into the dict, overriding potential duplicates from GET
|
||||
for k, v in postargs.items():
|
||||
args[k] = v
|
||||
# urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values.
|
||||
# urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values
|
||||
for k, v in args.items():
|
||||
if len(v) == 1:
|
||||
args[k] = v[0]
|
||||
# Return the path and the parsed arguments.
|
||||
# Return the path and the parsed arguments
|
||||
return tokens.path, args
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def do_GET(self) -> None:
|
||||
"""
|
||||
Called by BasicHTTPRequestHandler for GET requests.
|
||||
"""
|
||||
try:
|
||||
# Parse the request and hand it to the handle function
|
||||
path, args = self._parse_args(self.path)
|
||||
self._handle('GET', path, args)
|
||||
# Special handling for some errors
|
||||
except PermissionError as e:
|
||||
self.send_error(403, 'Forbidden')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
except ValueError as e:
|
||||
self.send_header(400, 'Bad Request')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
except BaseException as e:
|
||||
# Generic error handling
|
||||
self.send_error(500, 'Internal Server Error')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
|
||||
# noinspection PyPep8Naming
|
||||
def do_POST(self) -> None:
|
||||
"""
|
||||
Called by BasicHTTPRequestHandler for POST requests.
|
||||
"""
|
||||
try:
|
||||
# Read the POST body, if it exists, and its MIME type is application/x-www-form-urlencoded
|
||||
clen: str = self.headers.get('Content-Length', failobj='0')
|
||||
ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream')
|
||||
post = ''
|
||||
if ctype == 'application/x-www-form-urlencoded':
|
||||
post: str = self.rfile.read(int(clen)).decode('utf-8')
|
||||
|
||||
# Parse the request and hand it to the handle function
|
||||
path, args = self._parse_args(self.path, postbody=post)
|
||||
self._handle('POST', path, args)
|
||||
# Special handling for some errors
|
||||
except PermissionError as e:
|
||||
self.send_error(403, 'Forbidden')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
except ValueError as e:
|
||||
self.send_header(400, 'Bad Request')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
except BaseException as e:
|
||||
# Generic error handling
|
||||
self.send_error(500, 'Internal Server Error')
|
||||
self.end_headers()
|
||||
print(type(e))
|
||||
print(e)
|
||||
traceback.print_tb(e.__traceback__)
|
||||
|
||||
@property
|
||||
def session_vars(self) -> Dict[str, Any]:
|
||||
"""
|
||||
Get the session variables for the current session.
|
||||
|
||||
:return: Dictionary of named session variables.
|
||||
"""
|
||||
return self.server.session_vars[self.session_id][1]
|
||||
|
|
Loading…
Reference in a new issue