Use a libmagic wrapper to guess Content-Type headers.

This commit is contained in:
s3lph 2018-07-20 00:32:51 +02:00
parent 96657c122b
commit 5912d1e624
4 changed files with 27 additions and 12 deletions

View file

@ -18,6 +18,7 @@ This project intends to provide a well-tested and maintainable alternative to
- Python 3 (>=3.6)
- Python dependencies:
- file-magic
- jinja2
## Usage

View file

@ -4,7 +4,7 @@ from typing import Any, Callable, Dict, Tuple, Type, Union
import logging
import os
import socket
import mimetypes
import magic
from socketserver import TCPServer
from http.server import HTTPServer, BaseHTTPRequestHandler
from http.cookies import SimpleCookie
@ -308,7 +308,6 @@ class HttpHandler(BaseHTTPRequestHandler):
if path in _PAGELET_PATHS:
# Prepare some headers. Those can still be overwritten by the pagelet
headers: Dict[str, str] = {
'Content-Type': 'text/html',
'Cache-Control': 'no-cache'
}
# Call the pagelet function
@ -328,6 +327,10 @@ class HttpHandler(BaseHTTPRequestHandler):
f'matemat_session_id={session_id}; expires={expires}')
# Compute the body length and add the appropriate header
headers['Content-Length'] = str(len(data))
# If the pagelet did not set its own Content-Type header, use libmagic to guess an appropriate one
if 'Content-Type' not in headers:
filemagic: magic.FileMagic = magic.detect_from_content(data)
headers['Content-Type'] = f'{filemagic.mime_type}; charset={filemagic.encoding}'
# Send all headers set by the pagelet
for name, value in headers.items():
self.send_header(name, value)
@ -365,13 +368,12 @@ class HttpHandler(BaseHTTPRequestHandler):
data = f.read()
# File read successfully, send 'OK' header
self.send_response(200)
# TODO: Guess the MIME type. Unfortunately this call solely relies on the file extension, not ideal?
mimetype, _ = mimetypes.guess_type(filepath)
# Fall back to octet-stream type, if unknown
if mimetype is None:
mimetype = 'application/octet-stream'
# Guess the MIME type and encoding using libmagic
filemagic: magic.FileMagic = magic.detect_from_filename(filepath)
mimetype: str = filemagic.mime_type
charset: str = filemagic.encoding
# Send content type and length header
self.send_header('Content-Type', mimetype)
self.send_header('Content-Type', f'{mimetype}; charset={charset}')
self.send_header('Content-Length', str(len(data)))
self.send_header('Last-Modified', fileage.strftime('%a, %d %b %Y %H:%M:%S GMT'))
self.send_header('Cache-Control', 'max-age=1')

View file

@ -17,7 +17,6 @@ def serve_test_pagelet_str(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return 'serve test pagelet str'
@ -28,7 +27,7 @@ def serve_test_pagelet_bytes(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'application/octet-stream'
headers['Content-Type'] = 'application/x-foo-bar'
return b'serve\x80test\xffpagelet\xfebytes'
@ -49,7 +48,6 @@ def serve_test_pagelet_template(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return TemplateResponse('test.txt', what='World')
@ -62,7 +60,6 @@ def serve_test_pagelet_fail(method: str,
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
session_vars['test'] = 'hello, world!'
headers['Content-Type'] = 'text/plain'
raise HttpException(599, 'Error expected during unit testing')
@ -228,3 +225,17 @@ class TestServe(AbstractHttpdTest):
self.assertIsNone(packet.pagelet)
# Make sure a 405 Method Not Allowed header is served
self.assertEqual(405, packet.statuscode)
def test_serve_static_libmagic(self):
# The correct Content-Type header must be guessed, if a pagelet does not provide one
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_str HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('text/plain; charset=us-ascii', packet.headers['Content-Type'])
def test_serve_static_libmagic_skipped(self):
# The Content-Type set by a pagelet should not be overwritten
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_bytes HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('application/x-foo-bar', packet.headers['Content-Type'])

View file

@ -1 +1,2 @@
file-magic
jinja2