diff --git a/README.md b/README.md index 95aa201..49816f3 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ This project intends to provide a well-tested and maintainable alternative to - Python 3 (>=3.6) - Python dependencies: + - file-magic - jinja2 ## Usage diff --git a/matemat/webserver/httpd.py b/matemat/webserver/httpd.py index e962a8c..23045bc 100644 --- a/matemat/webserver/httpd.py +++ b/matemat/webserver/httpd.py @@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, Tuple, Type, Union import logging import os import socket -import mimetypes +import magic from socketserver import TCPServer from http.server import HTTPServer, BaseHTTPRequestHandler from http.cookies import SimpleCookie @@ -308,7 +308,6 @@ class HttpHandler(BaseHTTPRequestHandler): if path in _PAGELET_PATHS: # Prepare some headers. Those can still be overwritten by the pagelet headers: Dict[str, str] = { - 'Content-Type': 'text/html', 'Cache-Control': 'no-cache' } # Call the pagelet function @@ -328,6 +327,10 @@ class HttpHandler(BaseHTTPRequestHandler): f'matemat_session_id={session_id}; expires={expires}') # Compute the body length and add the appropriate header headers['Content-Length'] = str(len(data)) + # If the pagelet did not set its own Content-Type header, use libmagic to guess an appropriate one + if 'Content-Type' not in headers: + filemagic: magic.FileMagic = magic.detect_from_content(data) + headers['Content-Type'] = f'{filemagic.mime_type}; charset={filemagic.encoding}' # Send all headers set by the pagelet for name, value in headers.items(): self.send_header(name, value) @@ -365,13 +368,12 @@ class HttpHandler(BaseHTTPRequestHandler): data = f.read() # File read successfully, send 'OK' header self.send_response(200) - # TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal? - mimetype, _ = mimetypes.guess_type(filepath) - # Fall back to octet-stream type, if unknown - if mimetype is None: - mimetype = 'application/octet-stream' + # Guess the MIME type and encoding using libmagic + filemagic: magic.FileMagic = magic.detect_from_filename(filepath) + mimetype: str = filemagic.mime_type + charset: str = filemagic.encoding # Send content type and length header - self.send_header('Content-Type', mimetype) + self.send_header('Content-Type', f'{mimetype}; charset={charset}') self.send_header('Content-Length', str(len(data))) self.send_header('Last-Modified', fileage.strftime('%a, %d %b %Y %H:%M:%S GMT')) self.send_header('Cache-Control', 'max-age=1') diff --git a/matemat/webserver/test/test_serve.py b/matemat/webserver/test/test_serve.py index c67e06e..2dd7e7e 100644 --- a/matemat/webserver/test/test_serve.py +++ b/matemat/webserver/test/test_serve.py @@ -17,7 +17,6 @@ def serve_test_pagelet_str(method: str, session_vars: Dict[str, Any], headers: Dict[str, str], pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: - headers['Content-Type'] = 'text/plain' return 'serve test pagelet str' @@ -28,7 +27,7 @@ def serve_test_pagelet_bytes(method: str, session_vars: Dict[str, Any], headers: Dict[str, str], pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: - headers['Content-Type'] = 'application/octet-stream' + headers['Content-Type'] = 'application/x-foo-bar' return b'serve\x80test\xffpagelet\xfebytes' @@ -49,7 +48,6 @@ def serve_test_pagelet_template(method: str, session_vars: Dict[str, Any], headers: Dict[str, str], pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: - headers['Content-Type'] = 'text/plain' return TemplateResponse('test.txt', what='World') @@ -62,7 +60,6 @@ def serve_test_pagelet_fail(method: str, headers: Dict[str, str], pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: session_vars['test'] = 'hello, world!' - headers['Content-Type'] = 'text/plain' raise HttpException(599, 'Error expected during unit testing') @@ -228,3 +225,17 @@ class TestServe(AbstractHttpdTest): self.assertIsNone(packet.pagelet) # Make sure a 405 Method Not Allowed header is served self.assertEqual(405, packet.statuscode) + + def test_serve_static_libmagic(self): + # The correct Content-Type header must be guessed, if a pagelet does not provide one + self.client_sock.set_request(b'GET /just/testing/serve_pagelet_str HTTP/1.1\r\n\r\n') + HttpHandler(self.client_sock, ('::1', 45678), self.server) + packet = self.client_sock.get_response() + self.assertEqual('text/plain; charset=us-ascii', packet.headers['Content-Type']) + + def test_serve_static_libmagic_skipped(self): + # The Content-Type set by a pagelet should not be overwritten + self.client_sock.set_request(b'GET /just/testing/serve_pagelet_bytes HTTP/1.1\r\n\r\n') + HttpHandler(self.client_sock, ('::1', 45678), self.server) + packet = self.client_sock.get_response() + self.assertEqual('application/x-foo-bar', packet.headers['Content-Type']) diff --git a/requirements.txt b/requirements.txt index 7f7afbf..4913234 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +file-magic jinja2