Added lots of code documentation to the httpd module.
This commit is contained in:
parent
3caf7d2477
commit
97b8d8b054
2 changed files with 142 additions and 31 deletions
|
@ -13,4 +13,4 @@ if __name__ == '__main__':
|
||||||
port = int(sys.argv[1])
|
port = int(sys.argv[1])
|
||||||
|
|
||||||
# Start the web server
|
# Start the web server
|
||||||
MatematWebserver(port).start()
|
MatematWebserver(port=port).start()
|
||||||
|
|
|
@ -16,15 +16,18 @@ from datetime import datetime, timedelta
|
||||||
from matemat import __version__ as matemat_version
|
from matemat import __version__ as matemat_version
|
||||||
|
|
||||||
|
|
||||||
# Enable IPv6 support (with implicit DualStack).
|
# Enable IPv6 support (with implicit DualStack)
|
||||||
TCPServer.address_family = socket.AF_INET6
|
TCPServer.address_family = socket.AF_INET6
|
||||||
|
|
||||||
|
|
||||||
# Dictionary to hold registered pagelet paths and their handler functions.
|
# Dictionary to hold registered pagelet paths and their handler functions
|
||||||
_PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
_PAGELET_PATHS: Dict[str, Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
||||||
Tuple[int, Union[bytes, str]]]] = dict()
|
Tuple[int, Union[bytes, str]]]] = dict()
|
||||||
|
|
||||||
|
|
||||||
|
_SESSION_TIMEOUT: int = 3600
|
||||||
|
|
||||||
|
|
||||||
def pagelet(path: str):
|
def pagelet(path: str):
|
||||||
"""
|
"""
|
||||||
Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to
|
Annotate a function to act as a pagelet (part of a website). The function will be called if a request is made to
|
||||||
|
@ -47,27 +50,66 @@ def pagelet(path: str):
|
||||||
"""
|
"""
|
||||||
def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
def http_handler(fun: Callable[[str, str, Dict[str, str], Dict[str, Any], Dict[str, str], bytes],
|
||||||
Tuple[int, Union[bytes, str]]]):
|
Tuple[int, Union[bytes, str]]]):
|
||||||
# Add the function to the dict of pagelets.
|
# Add the function to the dict of pagelets
|
||||||
_PAGELET_PATHS[path] = fun
|
_PAGELET_PATHS[path] = fun
|
||||||
# Don't change the function itself at all.
|
# Don't change the function itself at all
|
||||||
return fun
|
return fun
|
||||||
# Return the inner function (Python requires a "real" function annotation to not have any arguments except
|
# Return the inner function (Python requires a "real" function annotation to not have any arguments except
|
||||||
# the function itself).
|
# the function itself)
|
||||||
return http_handler
|
return http_handler
|
||||||
|
|
||||||
|
|
||||||
class MatematWebserver(object):
|
class MatematWebserver(object):
|
||||||
|
"""
|
||||||
|
Then main webserver class, internally uses Python's http.server.
|
||||||
|
|
||||||
def __init__(self, port: int = 80, webroot: str = './webroot') -> None:
|
The server will serve a pagelet, if one is defined for a request path, else it will attempt to serve a static
|
||||||
self._httpd = HTTPServer(('::', port), HttpHandler)
|
resource from the webroot.
|
||||||
|
|
||||||
|
Usage:
|
||||||
|
|
||||||
|
# Listen on all interfaces on port 80 (dual-stack IPv6/IPv4)
|
||||||
|
server = MatematWebserver('::', 80, webroot='/var/www/matemat')
|
||||||
|
# Start the server. This call blocks while the server is running.
|
||||||
|
server.start()
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, listen: str = '::', port: int = 80, webroot: str = './webroot') -> None:
|
||||||
|
"""
|
||||||
|
Instantiate a MatematWebserver.
|
||||||
|
|
||||||
|
:param listen: The IPv4 or IPv6 address to listen on
|
||||||
|
:param port: The TCP port to listen on
|
||||||
|
:param webroot: Path to the webroot directory
|
||||||
|
"""
|
||||||
|
if len(listen) == 0:
|
||||||
|
# Empty string should be interpreted as all addresses
|
||||||
|
listen = '::'
|
||||||
|
# IPv4 address detection heuristic
|
||||||
|
if ':' not in listen and '.' in listen:
|
||||||
|
# Rewrite IPv4 address to IPv6-mapped form
|
||||||
|
listen = f'::ffff:{listen}'
|
||||||
|
# Create the http server
|
||||||
|
self._httpd = HTTPServer((listen, port), HttpHandler)
|
||||||
|
# Set up session vars dict
|
||||||
self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict()
|
self._httpd.session_vars: Dict[str, Tuple[datetime, Dict[str, Any]]] = dict()
|
||||||
|
# Resolve webroot directory
|
||||||
self._httpd.webroot = os.path.abspath(webroot)
|
self._httpd.webroot = os.path.abspath(webroot)
|
||||||
|
|
||||||
def start(self) -> None:
|
def start(self) -> None:
|
||||||
|
"""
|
||||||
|
Start the web server. This call blocks while the server is running.
|
||||||
|
"""
|
||||||
self._httpd.serve_forever()
|
self._httpd.serve_forever()
|
||||||
|
|
||||||
|
|
||||||
class HttpHandler(BaseHTTPRequestHandler):
|
class HttpHandler(BaseHTTPRequestHandler):
|
||||||
|
"""
|
||||||
|
HTTP Request handler.
|
||||||
|
|
||||||
|
This class parses HTTP requests, and calls the appropriate pagelets, or fetches a static resource from the webroot
|
||||||
|
directory.
|
||||||
|
"""
|
||||||
|
|
||||||
def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None:
|
def __init__(self, request: bytes, client_address: Tuple[str, int], server: HTTPServer) -> None:
|
||||||
super().__init__(request, client_address, server)
|
super().__init__(request, client_address, server)
|
||||||
|
@ -76,29 +118,58 @@ class HttpHandler(BaseHTTPRequestHandler):
|
||||||
def server_version(self) -> str:
|
def server_version(self) -> str:
|
||||||
return f'matemat/{matemat_version}'
|
return f'matemat/{matemat_version}'
|
||||||
|
|
||||||
def start_session(self) -> Tuple[str, datetime]:
|
def _start_session(self) -> Tuple[str, datetime]:
|
||||||
|
"""
|
||||||
|
Start a new session, or resume the session identified by the session cookie sent in the HTTP request.
|
||||||
|
|
||||||
|
:return: A tuple consisting of the session ID (a UUID string), and the session timeout date.
|
||||||
|
"""
|
||||||
|
# Reference date for session timeout
|
||||||
now = datetime.utcnow()
|
now = datetime.utcnow()
|
||||||
|
# Parse cookies sent by the client
|
||||||
cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[]))
|
cookiestring = '\n'.join(self.headers.get_all('Cookie', failobj=[]))
|
||||||
cookie = SimpleCookie()
|
cookie = SimpleCookie()
|
||||||
cookie.load(cookiestring)
|
cookie.load(cookiestring)
|
||||||
|
# Read the client's session ID, if any
|
||||||
session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None
|
session_id = str(cookie['matemat_session_id'].value) if 'matemat_session_id' in cookie else None
|
||||||
|
# If there is no active session, create a new session ID
|
||||||
if session_id is None or session_id not in self.server.session_vars:
|
if session_id is None or session_id not in self.server.session_vars:
|
||||||
session_id = str(uuid4())
|
session_id = str(uuid4())
|
||||||
|
|
||||||
|
# Check for session timeout
|
||||||
if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now:
|
if session_id in self.server.session_vars and self.server.session_vars[session_id][0] < now:
|
||||||
self.end_session(session_id)
|
self._end_session(session_id)
|
||||||
raise TimeoutError('Session timed out.')
|
raise TimeoutError('Session timed out.')
|
||||||
elif session_id not in self.server.session_vars:
|
# Update or initialize the session timeout
|
||||||
self.server.session_vars[session_id] = (now + timedelta(seconds=10)), dict()
|
if session_id not in self.server.session_vars:
|
||||||
|
self.server.session_vars[session_id] = (now + timedelta(seconds=_SESSION_TIMEOUT)), dict()
|
||||||
|
else:
|
||||||
|
self.server.session_vars[session_id] =\
|
||||||
|
(now + timedelta(seconds=_SESSION_TIMEOUT), self.server.session_vars[session_id][1])
|
||||||
|
# Return the session ID and timeout
|
||||||
return session_id, self.server.session_vars[session_id][0]
|
return session_id, self.server.session_vars[session_id][0]
|
||||||
|
|
||||||
def end_session(self, session_id: str) -> None:
|
def _end_session(self, session_id: str) -> None:
|
||||||
|
"""
|
||||||
|
Destroy a session identified by the session ID.
|
||||||
|
|
||||||
|
:param session_id: ID of the session to destroy.
|
||||||
|
"""
|
||||||
if session_id in self.server.session_vars:
|
if session_id in self.server.session_vars:
|
||||||
del self.server.session_vars[session_id]
|
del self.server.session_vars[session_id]
|
||||||
|
|
||||||
def _handle(self, method: str, path: str, args: Dict[str, str]) -> None:
|
def _handle(self, method: str, path: str, args: Dict[str, str]) -> None:
|
||||||
|
"""
|
||||||
|
Handle a HTTP request by either dispatching it to the appropriate pagelet or by serving a static resource.
|
||||||
|
|
||||||
|
:param method: The HTTP request method (GET, POST).
|
||||||
|
:param path: The request path without GET arguments.
|
||||||
|
:param args: Arguments sent with the request. This includes GET and POST arguments, where the POST arguments
|
||||||
|
take precedence.
|
||||||
|
"""
|
||||||
|
# Start or resume a session; report an error on session timeout
|
||||||
try:
|
try:
|
||||||
session_id, timeout = self.start_session()
|
session_id, timeout = self._start_session()
|
||||||
except TimeoutError:
|
except TimeoutError:
|
||||||
self.send_error(599, 'Session Timed Out', 'Session Timed Out.')
|
self.send_error(599, 'Session Timed Out', 'Session Timed Out.')
|
||||||
self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT')
|
self.send_header('Set-Cookie', 'matemat_session_id=; expires=Thu, 01 Jan 1970 00:00:00 GMT')
|
||||||
|
@ -106,42 +177,63 @@ class HttpHandler(BaseHTTPRequestHandler):
|
||||||
return
|
return
|
||||||
self.session_id: str = session_id
|
self.session_id: str = session_id
|
||||||
|
|
||||||
|
# Call a pagelet function, if one is registered for the requested path
|
||||||
if path in _PAGELET_PATHS:
|
if path in _PAGELET_PATHS:
|
||||||
|
# Prepare some headers. Those can still be overwritten by the pagelet
|
||||||
headers: Dict[str, str] = {
|
headers: Dict[str, str] = {
|
||||||
'Content-Type': 'text/html',
|
'Content-Type': 'text/html',
|
||||||
'Cache-Control': 'no-cache'
|
'Cache-Control': 'no-cache'
|
||||||
}
|
}
|
||||||
|
# Call the pagelet function
|
||||||
hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers)
|
hsc, data = _PAGELET_PATHS[path](method, path, args, self.session_vars, headers)
|
||||||
|
# The pagelet may return None as data as a shorthand for an empty response
|
||||||
if data is None:
|
if data is None:
|
||||||
data = bytes()
|
data = bytes()
|
||||||
|
# If the pagelet returns a Python str, convert it to an UTF-8 encoded bytes object
|
||||||
if isinstance(data, str):
|
if isinstance(data, str):
|
||||||
data = data.encode('utf-8')
|
data = data.encode('utf-8')
|
||||||
|
# Send the HTTP status code
|
||||||
self.send_response(hsc)
|
self.send_response(hsc)
|
||||||
|
# Format the session cookie timeout string and send the session cookie header
|
||||||
expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
expires = timeout.strftime("%a, %d %b %Y %H:%M:%S GMT")
|
||||||
self.send_header('Set-Cookie',
|
self.send_header('Set-Cookie',
|
||||||
f'matemat_session_id={session_id}; expires={expires}')
|
f'matemat_session_id={session_id}; expires={expires}')
|
||||||
|
# Compute the body length and add the appropriate header
|
||||||
headers['Content-Length'] = str(len(data))
|
headers['Content-Length'] = str(len(data))
|
||||||
|
# Send all headers set by the pagelet
|
||||||
for name, value in headers.items():
|
for name, value in headers.items():
|
||||||
self.send_header(name, value)
|
self.send_header(name, value)
|
||||||
|
# End the header section and write the body
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
self.wfile.write(data)
|
self.wfile.write(data)
|
||||||
else:
|
else:
|
||||||
|
# No pagelet function for this path, try a static serve instead
|
||||||
|
# Only HTTP GET is allowed, else reply with a 'Method Not Allowed' header
|
||||||
if method != 'GET':
|
if method != 'GET':
|
||||||
self.send_error(405)
|
self.send_error(405)
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
return
|
return
|
||||||
|
# Create the absolute resource path, resolving '..'
|
||||||
filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:]))
|
filepath: str = os.path.abspath(os.path.join(self.server.webroot, path[1:]))
|
||||||
|
# Make sure the file is actually inside the webroot directory and that it exists
|
||||||
if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath):
|
if os.path.commonpath([filepath, self.server.webroot]) == self.server.webroot and os.path.exists(filepath):
|
||||||
|
# Open and read the file
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
data = f.read()
|
data = f.read()
|
||||||
|
# File read successfully, send 'OK' header
|
||||||
self.send_response(200)
|
self.send_response(200)
|
||||||
|
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal?
|
||||||
mimetype, _ = mimetypes.guess_type(filepath)
|
mimetype, _ = mimetypes.guess_type(filepath)
|
||||||
if mimetype is not None:
|
# Fall back to octet-stream type, if unknown
|
||||||
|
if mimetype is None:
|
||||||
|
mimetype = 'application/octet-stream'
|
||||||
|
# Send content type header
|
||||||
self.send_header('Content-Type', mimetype)
|
self.send_header('Content-Type', mimetype)
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
if method == 'GET':
|
# Send the requested resource as response body
|
||||||
self.wfile.write(data)
|
self.wfile.write(data)
|
||||||
else:
|
else:
|
||||||
|
# File does not exist or path points outside the webroot directory
|
||||||
self.send_response(404)
|
self.send_response(404)
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
|
|
||||||
|
@ -150,77 +242,96 @@ class HttpHandler(BaseHTTPRequestHandler):
|
||||||
"""
|
"""
|
||||||
Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the
|
Given a HTTP request path, and optionally a HTTP POST body in application/x-www-form-urlencoded form, parse the
|
||||||
arguments and return them as a dictionary.
|
arguments and return them as a dictionary.
|
||||||
|
|
||||||
If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded.
|
If a key is used both in GET and in POST, the POST value takes precedence, and the GET value is discarded.
|
||||||
|
|
||||||
:param request: The request string to parse.
|
:param request: The request string to parse.
|
||||||
:param postbody: The POST body to parse, defaults to None.
|
:param postbody: The POST body to parse, defaults to None.
|
||||||
:return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs.
|
:return: A tuple consisting of the base path and a dictionary with the parsed key/value pairs.
|
||||||
"""
|
"""
|
||||||
# Parse the request "URL" (i.e. only the path).
|
# Parse the request "URL" (i.e. only the path)
|
||||||
tokens = urllib.parse.urlparse(request)
|
tokens = urllib.parse.urlparse(request)
|
||||||
# Parse the GET arguments.
|
# Parse the GET arguments
|
||||||
args = urllib.parse.parse_qs(tokens.query)
|
args = urllib.parse.parse_qs(tokens.query)
|
||||||
|
|
||||||
if postbody is not None:
|
if postbody is not None:
|
||||||
# Parse the POST body.
|
# Parse the POST body
|
||||||
postargs = urllib.parse.parse_qs(postbody)
|
postargs = urllib.parse.parse_qs(postbody)
|
||||||
# Write all POST values into the dict, overriding potential duplicates from GET.
|
# Write all POST values into the dict, overriding potential duplicates from GET
|
||||||
for k, v in postargs.items():
|
for k, v in postargs.items():
|
||||||
args[k] = v
|
args[k] = v
|
||||||
# urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values.
|
# urllib.parse.parse_qs turns ALL arguments into arrays. This turns arrays of length 1 into scalar values
|
||||||
for k, v in args.items():
|
for k, v in args.items():
|
||||||
if len(v) == 1:
|
if len(v) == 1:
|
||||||
args[k] = v[0]
|
args[k] = v[0]
|
||||||
# Return the path and the parsed arguments.
|
# Return the path and the parsed arguments
|
||||||
return tokens.path, args
|
return tokens.path, args
|
||||||
|
|
||||||
# noinspection PyPep8Naming
|
# noinspection PyPep8Naming
|
||||||
def do_GET(self) -> None:
|
def do_GET(self) -> None:
|
||||||
|
"""
|
||||||
|
Called by BasicHTTPRequestHandler for GET requests.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
|
# Parse the request and hand it to the handle function
|
||||||
path, args = self._parse_args(self.path)
|
path, args = self._parse_args(self.path)
|
||||||
self._handle('GET', path, args)
|
self._handle('GET', path, args)
|
||||||
|
# Special handling for some errors
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.send_error(403, 'Forbidden')
|
self.send_error(403, 'Forbidden')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
self.send_header(400, 'Bad Request')
|
self.send_header(400, 'Bad Request')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
|
# Generic error handling
|
||||||
self.send_error(500, 'Internal Server Error')
|
self.send_error(500, 'Internal Server Error')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
|
|
||||||
# noinspection PyPep8Naming
|
# noinspection PyPep8Naming
|
||||||
def do_POST(self) -> None:
|
def do_POST(self) -> None:
|
||||||
|
"""
|
||||||
|
Called by BasicHTTPRequestHandler for POST requests.
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
|
# Read the POST body, if it exists, and its MIME type is application/x-www-form-urlencoded
|
||||||
clen: str = self.headers.get('Content-Length', failobj='0')
|
clen: str = self.headers.get('Content-Length', failobj='0')
|
||||||
ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream')
|
ctype: str = self.headers.get('Content-Type', failobj='application/octet-stream')
|
||||||
post = ''
|
post = ''
|
||||||
if ctype == 'application/x-www-form-urlencoded':
|
if ctype == 'application/x-www-form-urlencoded':
|
||||||
post: str = self.rfile.read(int(clen)).decode('utf-8')
|
post: str = self.rfile.read(int(clen)).decode('utf-8')
|
||||||
|
# Parse the request and hand it to the handle function
|
||||||
path, args = self._parse_args(self.path, postbody=post)
|
path, args = self._parse_args(self.path, postbody=post)
|
||||||
self._handle('POST', path, args)
|
self._handle('POST', path, args)
|
||||||
|
# Special handling for some errors
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.send_error(403, 'Forbidden')
|
self.send_error(403, 'Forbidden')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
except ValueError as e:
|
except ValueError as e:
|
||||||
self.send_header(400, 'Bad Request')
|
self.send_header(400, 'Bad Request')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
except BaseException as e:
|
except BaseException as e:
|
||||||
|
# Generic error handling
|
||||||
self.send_error(500, 'Internal Server Error')
|
self.send_error(500, 'Internal Server Error')
|
||||||
self.end_headers()
|
self.end_headers()
|
||||||
print(type(e))
|
print(e)
|
||||||
traceback.print_tb(e.__traceback__)
|
traceback.print_tb(e.__traceback__)
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def session_vars(self) -> Dict[str, Any]:
|
def session_vars(self) -> Dict[str, Any]:
|
||||||
|
"""
|
||||||
|
Get the session variables for the current session.
|
||||||
|
|
||||||
|
:return: Dictionary of named session variables.
|
||||||
|
"""
|
||||||
return self.server.session_vars[self.session_id][1]
|
return self.server.session_vars[self.session_id][1]
|
||||||
|
|
Loading…
Reference in a new issue