Use a libmagic wrapper to guess Content-Type headers.

This commit is contained in:
s3lph 2018-07-20 00:32:51 +02:00
parent 96657c122b
commit 5912d1e624
4 changed files with 27 additions and 12 deletions

View file

@ -18,6 +18,7 @@ This project intends to provide a well-tested and maintainable alternative to
- Python 3 (>=3.6) - Python 3 (>=3.6)
- Python dependencies: - Python dependencies:
- file-magic
- jinja2 - jinja2
## Usage ## Usage

View file

@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, Tuple, Type, Union
import logging import logging
import os import os
import socket import socket
import mimetypes import magic
from socketserver import TCPServer from socketserver import TCPServer
from http.server import HTTPServer, BaseHTTPRequestHandler from http.server import HTTPServer, BaseHTTPRequestHandler
from http.cookies import SimpleCookie from http.cookies import SimpleCookie
@ -308,7 +308,6 @@ class HttpHandler(BaseHTTPRequestHandler):
if path in _PAGELET_PATHS: if path in _PAGELET_PATHS:
# Prepare some headers. Those can still be overwritten by the pagelet # Prepare some headers. Those can still be overwritten by the pagelet
headers: Dict[str, str] = { headers: Dict[str, str] = {
'Content-Type': 'text/html',
'Cache-Control': 'no-cache' 'Cache-Control': 'no-cache'
} }
# Call the pagelet function # Call the pagelet function
@ -328,6 +327,10 @@ class HttpHandler(BaseHTTPRequestHandler):
f'matemat_session_id={session_id}; expires={expires}') f'matemat_session_id={session_id}; expires={expires}')
# Compute the body length and add the appropriate header # Compute the body length and add the appropriate header
headers['Content-Length'] = str(len(data)) headers['Content-Length'] = str(len(data))
# If the pagelet did not set its own Content-Type header, use libmagic to guess an appropriate one
if 'Content-Type' not in headers:
filemagic: magic.FileMagic = magic.detect_from_content(data)
headers['Content-Type'] = f'{filemagic.mime_type}; charset={filemagic.encoding}'
# Send all headers set by the pagelet # Send all headers set by the pagelet
for name, value in headers.items(): for name, value in headers.items():
self.send_header(name, value) self.send_header(name, value)
@ -365,13 +368,12 @@ class HttpHandler(BaseHTTPRequestHandler):
data = f.read() data = f.read()
# File read successfully, send 'OK' header # File read successfully, send 'OK' header
self.send_response(200) self.send_response(200)
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal? # Guess the MIME type and encoding using libmagic
mimetype, _ = mimetypes.guess_type(filepath) filemagic: magic.FileMagic = magic.detect_from_filename(filepath)
# Fall back to octet-stream type, if unknown mimetype: str = filemagic.mime_type
if mimetype is None: charset: str = filemagic.encoding
mimetype = 'application/octet-stream'
# Send content type and length header # Send content type and length header
self.send_header('Content-Type', mimetype) self.send_header('Content-Type', f'{mimetype}; charset={charset}')
self.send_header('Content-Length', str(len(data))) self.send_header('Content-Length', str(len(data)))
self.send_header('Last-Modified', fileage.strftime('%a, %d %b %Y %H:%M:%S GMT')) self.send_header('Last-Modified', fileage.strftime('%a, %d %b %Y %H:%M:%S GMT'))
self.send_header('Cache-Control', 'max-age=1') self.send_header('Cache-Control', 'max-age=1')

View file

@ -17,7 +17,6 @@ def serve_test_pagelet_str(method: str,
session_vars: Dict[str, Any], session_vars: Dict[str, Any],
headers: Dict[str, str], headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return 'serve test pagelet str' return 'serve test pagelet str'
@ -28,7 +27,7 @@ def serve_test_pagelet_bytes(method: str,
session_vars: Dict[str, Any], session_vars: Dict[str, Any],
headers: Dict[str, str], headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'application/octet-stream' headers['Content-Type'] = 'application/x-foo-bar'
return b'serve\x80test\xffpagelet\xfebytes' return b'serve\x80test\xffpagelet\xfebytes'
@ -49,7 +48,6 @@ def serve_test_pagelet_template(method: str,
session_vars: Dict[str, Any], session_vars: Dict[str, Any],
headers: Dict[str, str], headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return TemplateResponse('test.txt', what='World') return TemplateResponse('test.txt', what='World')
@ -62,7 +60,6 @@ def serve_test_pagelet_fail(method: str,
headers: Dict[str, str], headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]: pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
session_vars['test'] = 'hello, world!' session_vars['test'] = 'hello, world!'
headers['Content-Type'] = 'text/plain'
raise HttpException(599, 'Error expected during unit testing') raise HttpException(599, 'Error expected during unit testing')
@ -228,3 +225,17 @@ class TestServe(AbstractHttpdTest):
self.assertIsNone(packet.pagelet) self.assertIsNone(packet.pagelet)
# Make sure a 405 Method Not Allowed header is served # Make sure a 405 Method Not Allowed header is served
self.assertEqual(405, packet.statuscode) self.assertEqual(405, packet.statuscode)
def test_serve_static_libmagic(self):
# The correct Content-Type header must be guessed, if a pagelet does not provide one
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_str HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('text/plain; charset=us-ascii', packet.headers['Content-Type'])
def test_serve_static_libmagic_skipped(self):
# The Content-Type set by a pagelet should not be overwritten
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_bytes HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('application/x-foo-bar', packet.headers['Content-Type'])

View file

@ -1 +1,2 @@
file-magic
jinja2 jinja2