Merge branch 'feature/background-scrape' into 'dev'
Feature: Background Scrape See merge request s3lph/icalendar-timeseries-server!1
This commit is contained in:
commit
326ecf2a73
8 changed files with 90 additions and 48 deletions
|
@ -92,7 +92,6 @@ Configuration is done through a JSON config file:
|
||||||
"port": 8090,
|
"port": 8090,
|
||||||
"start_delta": "-PT3H",
|
"start_delta": "-PT3H",
|
||||||
"end_delta": "P30D",
|
"end_delta": "P30D",
|
||||||
"cache": "PT15M",
|
|
||||||
"tz": "Europe/Zurich",
|
"tz": "Europe/Zurich",
|
||||||
"calendars": {
|
"calendars": {
|
||||||
"private": {
|
"private": {
|
||||||
|
@ -104,6 +103,7 @@ Configuration is done through a JSON config file:
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"public": {
|
"public": {
|
||||||
|
"interval": "P1D",
|
||||||
"url": "https://example.cloud/dav/me/public.ics"
|
"url": "https://example.cloud/dav/me/public.ics"
|
||||||
},
|
},
|
||||||
"confidential": {
|
"confidential": {
|
||||||
|
@ -136,11 +136,11 @@ Configuration is done through a JSON config file:
|
||||||
| `port` | int | The port to listen on. |
|
| `port` | int | The port to listen on. |
|
||||||
| `start_delta` | string | A signed ISO 8601 duration string, describing the event range start offset relative to the current time. |
|
| `start_delta` | string | A signed ISO 8601 duration string, describing the event range start offset relative to the current time. |
|
||||||
| `end_delta` | string | An unsigned ISO 8601 duration string, describing the event range end offset relative to the current time. |
|
| `end_delta` | string | An unsigned ISO 8601 duration string, describing the event range end offset relative to the current time. |
|
||||||
| `cache` | string | An unsigned ISO 8601 duration string, describing the cache timeout duration. |
|
|
||||||
| `tz` | string | The local timezone. |
|
| `tz` | string | The local timezone. |
|
||||||
| `calendars` | dict | The calendars to scrape. |
|
| `calendars` | dict | The calendars to scrape. |
|
||||||
| `keys(calendars)` | string | Name of the calendar. |
|
| `keys(calendars)` | string | Name of the calendar. |
|
||||||
| `calendars.*.url` | string | The HTTP or HTTPS URL to scrape. |
|
| `calendars.*.url` | string | The HTTP or HTTPS URL to scrape. |
|
||||||
|
| `calendars.*.interval` | string | An unsigned ISO 8601 duration string, describing the scrape interval for this calendar. |
|
||||||
| `calendars.*.ca` | string | Path to the CA certificate file to validate the server's TLS certificate against, in PEM format (optional). |
|
| `calendars.*.ca` | string | Path to the CA certificate file to validate the server's TLS certificate against, in PEM format (optional). |
|
||||||
| `calendars.*.auth` | dict | Authorization config for the calendar. |
|
| `calendars.*.auth` | dict | Authorization config for the calendar. |
|
||||||
| `calendars.*.auth[].type` | string | Authorization type, one of `none` (no authorization), `basic` (HTTP Basic Authentication), `tls` (TLS client certificate). |
|
| `calendars.*.auth[].type` | string | Authorization type, one of `none` (no authorization), `basic` (HTTP Basic Authentication), `tls` (TLS client certificate). |
|
||||||
|
|
|
@ -7,23 +7,26 @@
|
||||||
"tz": "Europe/Zurich",
|
"tz": "Europe/Zurich",
|
||||||
"calendars": {
|
"calendars": {
|
||||||
"tlstest": {
|
"tlstest": {
|
||||||
|
"interval": "PT5M",
|
||||||
"url": "https://localhost/private.ics",
|
"url": "https://localhost/private.ics",
|
||||||
"ca": "/home/sebastian/tlstest/ca/ca/ca.crt",
|
"ca": "/home/sebastian/tlstest/ca/ca/ca.crt",
|
||||||
"auth": {
|
"auth": {
|
||||||
"type": "tls",
|
"type": "tls",
|
||||||
"keyfile": "/home/sebastian/tlstest/client/combined.pem"
|
"keyfile": "/home/sebastian/tlstest/client/combined.pem"
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"filetest": {
|
||||||
|
"interval": "PT1M",
|
||||||
|
"url": "file:///srv/http/private.ics"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"key_replace": {
|
"key_replace": {
|
||||||
"summary": "a_summary",
|
"summary": "a_summary",
|
||||||
"description": "b_description",
|
"description": "b_description"
|
||||||
"calendar": "c_calendar"
|
|
||||||
},
|
},
|
||||||
"value_replace": {
|
"value_replace": {
|
||||||
"summary": "{{ summary|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
"summary": "{{ summary|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
||||||
"description": "{{ description|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
"description": "{{ description|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
||||||
"calendar": "{{ 0 if calendar == 'private' else 1 }}",
|
|
||||||
"useless_metric": "{{ start.timestamp() + end.timestamp() }}"
|
"useless_metric": "{{ start.timestamp() + end.timestamp() }}"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,29 +1,21 @@
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
import json
|
import json
|
||||||
from datetime import datetime
|
|
||||||
from urllib.error import HTTPError
|
from urllib.error import HTTPError
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
import bottle
|
import bottle
|
||||||
from isodate import Duration
|
|
||||||
|
|
||||||
from icalendar_timeseries_server.config import get_config
|
from icalendar_timeseries_server.config import get_config
|
||||||
from icalendar_timeseries_server.event import Event
|
from icalendar_timeseries_server.event import Metric
|
||||||
from icalendar_timeseries_server.cal import scrape_calendar
|
from icalendar_timeseries_server.cal import get_calendar
|
||||||
from icalendar_timeseries_server.query import MetricQuery
|
from icalendar_timeseries_server.query import MetricQuery
|
||||||
|
|
||||||
|
|
||||||
@bottle.route('/api/v1/query')
|
@bottle.route('/api/v1/query')
|
||||||
@bottle.route('/api/v1/query_range')
|
@bottle.route('/api/v1/query_range')
|
||||||
def prometheus_api():
|
def prometheus_api():
|
||||||
tz = get_config().tz
|
events: List[Metric] = []
|
||||||
now: datetime = datetime.now(tz)
|
|
||||||
start_delta: Duration = get_config().start_delta
|
|
||||||
end_delta: Duration = get_config().end_delta
|
|
||||||
start: datetime = now + start_delta
|
|
||||||
end: datetime = now + end_delta
|
|
||||||
events: List[Event] = []
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
q = MetricQuery(bottle.request.query['query'])
|
q = MetricQuery(bottle.request.query['query'])
|
||||||
|
@ -39,8 +31,8 @@ def prometheus_api():
|
||||||
return json.dumps(response)
|
return json.dumps(response)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for name, caldef in get_config().calendars.items():
|
for name in get_config().calendars.keys():
|
||||||
events.extend(scrape_calendar(name, caldef, start, end))
|
events.extend(get_calendar(name))
|
||||||
events = list(filter(q, events))
|
events = list(filter(q, events))
|
||||||
events.sort(key=lambda e: e.start)
|
events.sort(key=lambda e: e.start)
|
||||||
response = {
|
response = {
|
||||||
|
|
|
@ -1,18 +1,21 @@
|
||||||
from typing import Dict, List, Iterable, Tuple
|
from typing import Dict, List, Iterable
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import urllib.request
|
import urllib.request
|
||||||
from datetime import datetime, date, timedelta
|
from datetime import datetime, date, timedelta
|
||||||
|
from threading import Lock, Timer
|
||||||
|
|
||||||
from dateutil import rrule
|
from dateutil import rrule
|
||||||
from icalendar import cal
|
from icalendar import cal
|
||||||
|
from isodate import Duration
|
||||||
|
|
||||||
from icalendar_timeseries_server import __version__
|
from icalendar_timeseries_server import __version__
|
||||||
from icalendar_timeseries_server.config import get_config, CalendarConfig
|
from icalendar_timeseries_server.config import get_config, CalendarConfig
|
||||||
from icalendar_timeseries_server.event import Event
|
from icalendar_timeseries_server.event import Event
|
||||||
|
|
||||||
|
|
||||||
_SCRAPE_CACHE: Dict[str, Tuple[datetime, List[Event]]] = dict()
|
_SCRAPE_CACHE: Dict[str, List[Event]] = dict()
|
||||||
|
_SCRAPE_CACHE_LOCK: Lock = Lock()
|
||||||
|
|
||||||
__py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'
|
__py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'
|
||||||
USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})'
|
USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})'
|
||||||
|
@ -46,8 +49,15 @@ def _parse_recurring(event: cal.Event, start: datetime, end: datetime, duration:
|
||||||
return occurences
|
return occurences
|
||||||
|
|
||||||
|
|
||||||
def _parse_calendar(name: str, calendar: cal.Calendar, start: datetime, end: datetime) -> List[Event]:
|
def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime):
|
||||||
|
global _SCRAPE_CACHE, _SCRAPE_CACHE_LOCK
|
||||||
events = []
|
events = []
|
||||||
|
|
||||||
|
opener: urllib.request.OpenerDirector = config.get_url_opener()
|
||||||
|
with opener.open(config.url) as response:
|
||||||
|
data = response.read().decode('utf-8')
|
||||||
|
calendar = cal.Calendar.from_ical(data)
|
||||||
|
|
||||||
for element in calendar.walk():
|
for element in calendar.walk():
|
||||||
if element.name == "VEVENT":
|
if element.name == "VEVENT":
|
||||||
dtstart = element.get('dtstart').dt
|
dtstart = element.get('dtstart').dt
|
||||||
|
@ -70,23 +80,34 @@ def _parse_calendar(name: str, calendar: cal.Calendar, start: datetime, end: dat
|
||||||
for occurence in occurences:
|
for occurence in occurences:
|
||||||
if start <= occurence < end:
|
if start <= occurence < end:
|
||||||
events.append(Event(name, element, occurence, occurence + duration))
|
events.append(Event(name, element, occurence, occurence + duration))
|
||||||
return events
|
with _SCRAPE_CACHE_LOCK:
|
||||||
|
_SCRAPE_CACHE[name] = events
|
||||||
|
|
||||||
|
|
||||||
def scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime) -> List[Event]:
|
def scrape_calendar(name: str, config: CalendarConfig):
|
||||||
|
# Get current time in configured timezone
|
||||||
|
tz = get_config().tz
|
||||||
|
now: datetime = datetime.now(tz)
|
||||||
|
# Reschedule calendar scraping
|
||||||
|
cron = Timer(config.interval.totimedelta(start=now).total_seconds(),
|
||||||
|
lambda: scrape_calendar(name, config))
|
||||||
|
cron.start()
|
||||||
|
# Compute interval for which to return events
|
||||||
|
start_delta: Duration = get_config().start_delta
|
||||||
|
end_delta: Duration = get_config().end_delta
|
||||||
|
start: datetime = now + start_delta
|
||||||
|
end: datetime = now + end_delta
|
||||||
|
# Scrape and parse the calendar
|
||||||
|
_scrape_calendar(name, config, start, end)
|
||||||
|
|
||||||
|
|
||||||
|
def start_scrape_calendar(name: str, config: CalendarConfig):
|
||||||
|
# Schedule first calendar scraping
|
||||||
|
cron = Timer(0, lambda: scrape_calendar(name, config))
|
||||||
|
cron.start()
|
||||||
|
|
||||||
|
|
||||||
|
def get_calendar(name: str):
|
||||||
global _SCRAPE_CACHE
|
global _SCRAPE_CACHE
|
||||||
now: datetime = datetime.now(tz=get_config().tz)
|
with _SCRAPE_CACHE_LOCK:
|
||||||
if get_config().cache.total_seconds() > 0 and name in _SCRAPE_CACHE:
|
return _SCRAPE_CACHE.get(name, [])
|
||||||
cache_timeout, cached = _SCRAPE_CACHE[name]
|
|
||||||
if now < cache_timeout:
|
|
||||||
print('serving cached')
|
|
||||||
return cached
|
|
||||||
print('doing request')
|
|
||||||
|
|
||||||
opener: urllib.request.OpenerDirector = config.get_url_opener()
|
|
||||||
with opener.open(config.url) as response:
|
|
||||||
data = response.read().decode('utf-8')
|
|
||||||
calendar = cal.Calendar.from_ical(data)
|
|
||||||
parsed: List[Event] = _parse_calendar(name, calendar, start, end)
|
|
||||||
_SCRAPE_CACHE[name] = now + get_config().cache, parsed
|
|
||||||
return parsed
|
|
||||||
|
|
|
@ -27,6 +27,7 @@ class CalendarConfig:
|
||||||
|
|
||||||
def __init__(self, config: Dict[str, Any], config_path: str) -> None:
|
def __init__(self, config: Dict[str, Any], config_path: str) -> None:
|
||||||
self._url: str = _keycheck('url', config, str, config_path)
|
self._url: str = _keycheck('url', config, str, config_path)
|
||||||
|
self._scrape_interval: Duration = _parse_timedelta('interval', config, config_path, default_value='PT15M')
|
||||||
self._ca: Optional[str] = _keycheck('ca', config, str, config_path, optional=True)
|
self._ca: Optional[str] = _keycheck('ca', config, str, config_path, optional=True)
|
||||||
auth: Dict[str, Any] = _keycheck('auth', config, dict, config_path, default_value={'type': 'none'})
|
auth: Dict[str, Any] = _keycheck('auth', config, dict, config_path, default_value={'type': 'none'})
|
||||||
self._authtype: str = _keycheck('type', auth, str, f'{config_path}.auth',
|
self._authtype: str = _keycheck('type', auth, str, f'{config_path}.auth',
|
||||||
|
@ -56,6 +57,10 @@ class CalendarConfig:
|
||||||
def url(self) -> str:
|
def url(self) -> str:
|
||||||
return self._url
|
return self._url
|
||||||
|
|
||||||
|
@property
|
||||||
|
def interval(self) -> Duration:
|
||||||
|
return self._scrape_interval
|
||||||
|
|
||||||
def get_url_opener(self) -> urllib.request.OpenerDirector:
|
def get_url_opener(self) -> urllib.request.OpenerDirector:
|
||||||
|
|
||||||
if self._authtype == 'tls':
|
if self._authtype == 'tls':
|
||||||
|
@ -89,7 +94,6 @@ class Config:
|
||||||
self._tz: pytz.tzinfo = _parse_timezone('tz', config, '', default_value='UTC')
|
self._tz: pytz.tzinfo = _parse_timezone('tz', config, '', default_value='UTC')
|
||||||
self._start_delta: Duration = _parse_timedelta('start_delta', config, '', default_value='PT')
|
self._start_delta: Duration = _parse_timedelta('start_delta', config, '', default_value='PT')
|
||||||
self._end_delta: Duration = _parse_timedelta('end_delta', config, '', default_value='P30D')
|
self._end_delta: Duration = _parse_timedelta('end_delta', config, '', default_value='P30D')
|
||||||
self._cache: Duration = _parse_timedelta('cache', config, '', default_value='PT', force_positive=True)
|
|
||||||
self._calendars: Dict[str, CalendarConfig] = self._parse_calendars_config('calendars', config, '')
|
self._calendars: Dict[str, CalendarConfig] = self._parse_calendars_config('calendars', config, '')
|
||||||
self._key_replace = _parse_key_replace('key_replace', config, '')
|
self._key_replace = _parse_key_replace('key_replace', config, '')
|
||||||
self._value_replace = _parse_value_replace('value_replace', config, '')
|
self._value_replace = _parse_value_replace('value_replace', config, '')
|
||||||
|
@ -125,10 +129,6 @@ class Config:
|
||||||
def end_delta(self) -> Duration:
|
def end_delta(self) -> Duration:
|
||||||
return self._end_delta
|
return self._end_delta
|
||||||
|
|
||||||
@property
|
|
||||||
def cache(self) -> Duration:
|
|
||||||
return self._cache
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def calendars(self) -> Dict[str, CalendarConfig]:
|
def calendars(self) -> Dict[str, CalendarConfig]:
|
||||||
return self._calendars
|
return self._calendars
|
||||||
|
|
|
@ -38,7 +38,7 @@ class Event(Metric):
|
||||||
for attr in _ATTRIBUTES:
|
for attr in _ATTRIBUTES:
|
||||||
tmp[attr] = event.get(attr, '')
|
tmp[attr] = event.get(attr, '')
|
||||||
substitution_keys = set(_ATTRIBUTES)
|
substitution_keys = set(_ATTRIBUTES)
|
||||||
substitution_keys.update(['start', 'end'])
|
substitution_keys.update(tmp.keys())
|
||||||
substitution_keys.update(get_config().key_replace.keys())
|
substitution_keys.update(get_config().key_replace.keys())
|
||||||
substitution_keys.update(get_config().value_replace.keys())
|
substitution_keys.update(get_config().value_replace.keys())
|
||||||
for attr in substitution_keys:
|
for attr in substitution_keys:
|
||||||
|
|
|
@ -2,6 +2,7 @@ import sys
|
||||||
|
|
||||||
import bottle
|
import bottle
|
||||||
|
|
||||||
|
from icalendar_timeseries_server.cal import start_scrape_calendar
|
||||||
from icalendar_timeseries_server.config import load_config, load_default_config, get_config
|
from icalendar_timeseries_server.config import load_config, load_default_config, get_config
|
||||||
|
|
||||||
# Contains decorated bottle handler function for /api/v1/query
|
# Contains decorated bottle handler function for /api/v1/query
|
||||||
|
@ -17,7 +18,12 @@ def main():
|
||||||
else:
|
else:
|
||||||
print(f'Can only read one config file, got "{" ".join(sys.argv[1:])}"')
|
print(f'Can only read one config file, got "{" ".join(sys.argv[1:])}"')
|
||||||
exit(1)
|
exit(1)
|
||||||
bottle.run(host=get_config().addr, port=get_config().port)
|
config = get_config()
|
||||||
|
# Schedule calendar scraping in the background
|
||||||
|
for calname in config.calendars.keys():
|
||||||
|
start_scrape_calendar(calname, config.calendars[calname])
|
||||||
|
# Start the Bottle HTTP server
|
||||||
|
bottle.run(host=config.addr, port=get_config().port)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
|
@ -16,7 +16,6 @@ _CONFIG_VALID = """
|
||||||
"port": 8090,
|
"port": 8090,
|
||||||
"start_delta": "-PT3H",
|
"start_delta": "-PT3H",
|
||||||
"end_delta": "P30D",
|
"end_delta": "P30D",
|
||||||
"cache": "PT15M",
|
|
||||||
"tz": "Europe/Zurich",
|
"tz": "Europe/Zurich",
|
||||||
"calendars": {
|
"calendars": {
|
||||||
"private": {
|
"private": {
|
||||||
|
@ -28,10 +27,12 @@ _CONFIG_VALID = """
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"public": {
|
"public": {
|
||||||
"url": "https://example.cloud/dav/me/public.ics"
|
"url": "https://example.cloud/dav/me/public.ics",
|
||||||
|
"interval": "P1D"
|
||||||
},
|
},
|
||||||
"confidential": {
|
"confidential": {
|
||||||
"url": "https://example.cloud/dav/me/confidential.ics",
|
"url": "https://example.cloud/dav/me/confidential.ics",
|
||||||
|
"interval": "PT5M",
|
||||||
"ca": "/etc/ssl/ca.pem",
|
"ca": "/etc/ssl/ca.pem",
|
||||||
"auth": {
|
"auth": {
|
||||||
"type": "tls",
|
"type": "tls",
|
||||||
|
@ -124,5 +125,24 @@ class ConfigTest(unittest.TestCase):
|
||||||
self.assertEqual(config.port, 8090)
|
self.assertEqual(config.port, 8090)
|
||||||
self.assertEqual(config.start_delta, Duration(hours=-3))
|
self.assertEqual(config.start_delta, Duration(hours=-3))
|
||||||
self.assertEqual(config.end_delta, Duration(days=30))
|
self.assertEqual(config.end_delta, Duration(days=30))
|
||||||
self.assertEqual(config.cache, Duration(minutes=15))
|
|
||||||
self.assertEqual(config.tz, pytz.timezone('Europe/Zurich'))
|
self.assertEqual(config.tz, pytz.timezone('Europe/Zurich'))
|
||||||
|
|
||||||
|
def test_parse_calendars(self):
|
||||||
|
config = Config(json.loads(_CONFIG_VALID))
|
||||||
|
self.assertEqual({'public', 'private', 'confidential'}, config.calendars.keys())
|
||||||
|
|
||||||
|
self.assertEqual('https://example.cloud/dav/me/public.ics', config.calendars['public'].url)
|
||||||
|
self.assertEqual(Duration(days=1), config.calendars['public'].interval)
|
||||||
|
self.assertEqual('none', config.calendars['public']._authtype)
|
||||||
|
|
||||||
|
self.assertEqual('https://example.cloud/dav/me/private.ics', config.calendars['private'].url)
|
||||||
|
self.assertEqual(Duration(minutes=15), config.calendars['private'].interval)
|
||||||
|
self.assertEqual('basic', config.calendars['private']._authtype)
|
||||||
|
self.assertEqual('Basic bWU6bXlzdXBlcnNlY3VyZXBhc3N3b3Jk',
|
||||||
|
config.calendars['private']._request_headers['Authorization'])
|
||||||
|
|
||||||
|
self.assertEqual('https://example.cloud/dav/me/confidential.ics', config.calendars['confidential'].url)
|
||||||
|
self.assertEqual(Duration(minutes=5), config.calendars['confidential'].interval)
|
||||||
|
self.assertEqual('tls', config.calendars['confidential']._authtype)
|
||||||
|
self.assertEqual('/etc/ssl/client.pem', config.calendars['confidential']._tls_keyfile)
|
||||||
|
self.assertEqual('mysupersecurepassword', config.calendars['confidential']._tls_passphrase)
|
||||||
|
|
Loading…
Reference in a new issue