Merge branch '18-libmagic-mime-type-detection' into staging-unstable

This commit is contained in:
s3lph 2018-07-20 13:04:57 +02:00
commit 63aef74e10
2 changed files with 63 additions and 9 deletions

View file

@ -4,6 +4,7 @@ from typing import Any, Callable, Dict, Tuple, Type, Union
import logging
import os
import socket
import mimetypes
import magic
from socketserver import TCPServer
from http.server import HTTPServer, BaseHTTPRequestHandler
@ -336,7 +337,11 @@ class HttpHandler(BaseHTTPRequestHandler):
except ValueError:
mimetype = 'application/octet-stream'
charset = 'binary'
headers['Content-Type'] = f'{mimetype}; charset={charset}'
# Only append the charset if it is not "binary"
if charset == 'binary':
headers['Content-Type'] = mimetype
else:
headers['Content-Type'] = f'{mimetype}; charset={charset}'
# Send all headers set by the pagelet
for name, value in headers.items():
self.send_header(name, value)
@ -374,16 +379,21 @@ class HttpHandler(BaseHTTPRequestHandler):
data = f.read()
# File read successfully, send 'OK' header
self.send_response(200)
# Guess the MIME type and encoding using libmagic
# Guess the MIME type by file extension, or use libmagic as fallback
# Use libmagic to guess the charset
try:
exttype: str = mimetypes.guess_type(filepath)[0]
filemagic: magic.FileMagic = magic.detect_from_filename(filepath)
mimetype: str = filemagic.mime_type
mimetype: str = exttype if exttype is not None else filemagic.mime_type
charset: str = filemagic.encoding
except ValueError:
mimetype = 'application/octet-stream'
charset = 'binary'
# Send content type and length header
self.send_header('Content-Type', f'{mimetype}; charset={charset}')
# Send content type and length header. Only set the charset if it's not "binary"
if charset == 'binary':
self.send_header('Content-Type', mimetype)
else:
self.send_header('Content-Type', f'{mimetype}; charset={charset}')
self.send_header('Content-Length', str(len(data)))
self.send_header('Last-Modified', fileage.strftime('%a, %d %b %Y %H:%M:%S GMT'))
self.send_header('Cache-Control', 'max-age=1')

View file

@ -17,7 +17,6 @@ def serve_test_pagelet_str(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return 'serve test pagelet str'
@ -28,7 +27,7 @@ def serve_test_pagelet_bytes(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'application/octet-stream'
headers['Content-Type'] = 'application/x-foo-bar'
return b'serve\x80test\xffpagelet\xfebytes'
@ -49,7 +48,6 @@ def serve_test_pagelet_template(method: str,
session_vars: Dict[str, Any],
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
headers['Content-Type'] = 'text/plain'
return TemplateResponse('test.txt', what='World')
@ -62,7 +60,6 @@ def serve_test_pagelet_fail(method: str,
headers: Dict[str, str],
pagelet_variables: Dict[str, str]) -> Union[bytes, str, PageletResponse]:
session_vars['test'] = 'hello, world!'
headers['Content-Type'] = 'text/plain'
raise HttpException(599, 'Error expected during unit testing')
@ -81,6 +78,15 @@ class TestServe(AbstractHttpdTest):
with open(forbidden, 'w') as f:
f.write('This should not be readable')
os.chmod(forbidden, 0)
# Create a CSS resource whose MIME type should be detected by file extension
with open(os.path.join(self.tempdir.name, 'teststyle.css'), 'w') as f:
f.write('.ninja { display: none; }\n')
# Create a file without extension (containing UTF-16 text with BOM); libmagic should take over
with open(os.path.join(self.tempdir.name, 'testdata'), 'wb') as f:
f.write(b'\xFE\xFFH\x00e\x00l\x00l\x00o\x00,\x00 \x00w\x00o\x00r\x00l\x00d\x00!\x00\n\x00')
# Create a file that will yield "text/plain; charset=binary"
with open(os.path.join(self.tempdir.name, 'testbin.txt'), 'wb') as f:
f.write(b'\x00\x00\x00\x00\x00\x00\x00\x00')
def test_serve_pagelet_str(self):
# Call the test pagelet that produces a 200 OK result
@ -228,3 +234,41 @@ class TestServe(AbstractHttpdTest):
self.assertIsNone(packet.pagelet)
# Make sure a 405 Method Not Allowed header is served
self.assertEqual(405, packet.statuscode)
def test_serve_pagelet_libmagic(self):
# The correct Content-Type header must be guessed, if a pagelet does not provide one
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_str HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('text/plain; charset=us-ascii', packet.headers['Content-Type'])
def test_serve_pagelet_libmagic_skipped(self):
# The Content-Type set by a pagelet should not be overwritten
self.client_sock.set_request(b'GET /just/testing/serve_pagelet_bytes HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
self.assertEqual('application/x-foo-bar', packet.headers['Content-Type'])
def test_serve_static_mime_extension(self):
# The correct Content-Type should be guessed by file extension primarily
self.client_sock.set_request(b'GET /teststyle.css HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
# libmagic would say text/plain instead
self.assertEqual('text/css; charset=us-ascii', packet.headers['Content-Type'])
def test_serve_static_mime_magic(self):
# The correct Content-Type should be guessed by file extension primarily
self.client_sock.set_request(b'GET /testdata HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
# Extension-based would fail, as there is no extension
self.assertEqual('text/plain; charset=utf-16be', packet.headers['Content-Type'])
def test_serve_static_mime_magic_binary(self):
# The correct Content-Type should be guessed by file extension primarily
self.client_sock.set_request(b'GET /testbin.txt HTTP/1.1\r\n\r\n')
HttpHandler(self.client_sock, ('::1', 45678), self.server)
packet = self.client_sock.get_response()
# No charset should be in the header. Yes, this is a stupid example
self.assertEqual('text/plain', packet.headers['Content-Type'])