WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process
This commit is contained in:
parent
5722fd7c98
commit
bc024ea82d
7 changed files with 52 additions and 45 deletions
|
@ -92,7 +92,6 @@ Configuration is done through a JSON config file:
|
|||
"port": 8090,
|
||||
"start_delta": "-PT3H",
|
||||
"end_delta": "P30D",
|
||||
"cache": "PT15M",
|
||||
"tz": "Europe/Zurich",
|
||||
"calendars": {
|
||||
"private": {
|
||||
|
@ -104,6 +103,7 @@ Configuration is done through a JSON config file:
|
|||
}
|
||||
},
|
||||
"public": {
|
||||
"interval": "P1D",
|
||||
"url": "https://example.cloud/dav/me/public.ics"
|
||||
},
|
||||
"confidential": {
|
||||
|
@ -136,11 +136,11 @@ Configuration is done through a JSON config file:
|
|||
| `port` | int | The port to listen on. |
|
||||
| `start_delta` | string | A signed ISO 8601 duration string, describing the event range start offset relative to the current time. |
|
||||
| `end_delta` | string | An unsigned ISO 8601 duration string, describing the event range end offset relative to the current time. |
|
||||
| `cache` | string | An unsigned ISO 8601 duration string, describing the cache timeout duration. |
|
||||
| `tz` | string | The local timezone. |
|
||||
| `calendars` | dict | The calendars to scrape. |
|
||||
| `keys(calendars)` | string | Name of the calendar. |
|
||||
| `calendars.*.url` | string | The HTTP or HTTPS URL to scrape. |
|
||||
| `calendars.*.interval` | string | An unsigned ISO 8601 duration string, describing the scrape interval for this calendar. |
|
||||
| `calendars.*.ca` | string | Path to the CA certificate file to validate the server's TLS certificate against, in PEM format (optional). |
|
||||
| `calendars.*.auth` | dict | Authorization config for the calendar. |
|
||||
| `calendars.*.auth[].type` | string | Authorization type, one of `none` (no authorization), `basic` (HTTP Basic Authentication), `tls` (TLS client certificate). |
|
||||
|
|
|
@ -7,23 +7,26 @@
|
|||
"tz": "Europe/Zurich",
|
||||
"calendars": {
|
||||
"tlstest": {
|
||||
"interval": "PT5M",
|
||||
"url": "https://localhost/private.ics",
|
||||
"ca": "/home/sebastian/tlstest/ca/ca/ca.crt",
|
||||
"auth": {
|
||||
"type": "tls",
|
||||
"keyfile": "/home/sebastian/tlstest/client/combined.pem"
|
||||
}
|
||||
},
|
||||
"filetest": {
|
||||
"interval": "PT1M",
|
||||
"url": "file:///srv/http/private.ics"
|
||||
}
|
||||
},
|
||||
"key_replace": {
|
||||
"summary": "a_summary",
|
||||
"description": "b_description",
|
||||
"calendar": "c_calendar"
|
||||
"description": "b_description"
|
||||
},
|
||||
"value_replace": {
|
||||
"summary": "{{ summary|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
||||
"description": "{{ description|truncate(100, end=' \\N{HORIZONTAL ELLIPSIS}') }}",
|
||||
"calendar": "{{ 0 if calendar == 'private' else 1 }}",
|
||||
"useless_metric": "{{ start.timestamp() + end.timestamp() }}"
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,29 +1,21 @@
|
|||
from typing import List
|
||||
|
||||
import json
|
||||
from datetime import datetime
|
||||
from urllib.error import HTTPError
|
||||
import traceback
|
||||
|
||||
import bottle
|
||||
from isodate import Duration
|
||||
|
||||
from icalendar_timeseries_server.config import get_config
|
||||
from icalendar_timeseries_server.event import Event
|
||||
from icalendar_timeseries_server.cal import scrape_calendar
|
||||
from icalendar_timeseries_server.event import Metric
|
||||
from icalendar_timeseries_server.cal import get_calendar
|
||||
from icalendar_timeseries_server.query import MetricQuery
|
||||
|
||||
|
||||
@bottle.route('/api/v1/query')
|
||||
@bottle.route('/api/v1/query_range')
|
||||
def prometheus_api():
|
||||
tz = get_config().tz
|
||||
now: datetime = datetime.now(tz)
|
||||
start_delta: Duration = get_config().start_delta
|
||||
end_delta: Duration = get_config().end_delta
|
||||
start: datetime = now + start_delta
|
||||
end: datetime = now + end_delta
|
||||
events: List[Event] = []
|
||||
events: List[Metric] = []
|
||||
|
||||
try:
|
||||
q = MetricQuery(bottle.request.query['query'])
|
||||
|
@ -39,8 +31,8 @@ def prometheus_api():
|
|||
return json.dumps(response)
|
||||
|
||||
try:
|
||||
for name, caldef in get_config().calendars.items():
|
||||
events.extend(scrape_calendar(name, caldef, start, end))
|
||||
for name in get_config().calendars.keys():
|
||||
events.extend(get_calendar(name))
|
||||
events = list(filter(q, events))
|
||||
events.sort(key=lambda e: e.start)
|
||||
response = {
|
||||
|
|
|
@ -1,18 +1,20 @@
|
|||
from typing import Dict, List, Iterable, Tuple
|
||||
from typing import Dict, List, Iterable
|
||||
|
||||
import sys
|
||||
import urllib.request
|
||||
from datetime import datetime, date, timedelta
|
||||
from threading import Timer
|
||||
|
||||
from dateutil import rrule
|
||||
from icalendar import cal
|
||||
from isodate import Duration
|
||||
|
||||
from icalendar_timeseries_server import __version__
|
||||
from icalendar_timeseries_server.config import get_config, CalendarConfig
|
||||
from icalendar_timeseries_server.event import Event
|
||||
|
||||
|
||||
_SCRAPE_CACHE: Dict[str, Tuple[datetime, List[Event]]] = dict()
|
||||
_SCRAPE_CACHE: Dict[str, List[Event]] = dict()
|
||||
|
||||
__py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'
|
||||
USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})'
|
||||
|
@ -46,8 +48,15 @@ def _parse_recurring(event: cal.Event, start: datetime, end: datetime, duration:
|
|||
return occurences
|
||||
|
||||
|
||||
def _parse_calendar(name: str, calendar: cal.Calendar, start: datetime, end: datetime) -> List[Event]:
|
||||
def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime):
|
||||
global _SCRAPE_CACHE
|
||||
events = []
|
||||
|
||||
opener: urllib.request.OpenerDirector = config.get_url_opener()
|
||||
with opener.open(config.url) as response:
|
||||
data = response.read().decode('utf-8')
|
||||
calendar = cal.Calendar.from_ical(data)
|
||||
|
||||
for element in calendar.walk():
|
||||
if element.name == "VEVENT":
|
||||
dtstart = element.get('dtstart').dt
|
||||
|
@ -70,23 +79,22 @@ def _parse_calendar(name: str, calendar: cal.Calendar, start: datetime, end: dat
|
|||
for occurence in occurences:
|
||||
if start <= occurence < end:
|
||||
events.append(Event(name, element, occurence, occurence + duration))
|
||||
return events
|
||||
_SCRAPE_CACHE[name] = events
|
||||
|
||||
|
||||
def scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime) -> List[Event]:
|
||||
def scrape_calendar(name: str, config: CalendarConfig):
|
||||
tz = get_config().tz
|
||||
now: datetime = datetime.now(tz)
|
||||
start_delta: Duration = get_config().start_delta
|
||||
end_delta: Duration = get_config().end_delta
|
||||
start: datetime = now + start_delta
|
||||
end: datetime = now + end_delta
|
||||
_scrape_calendar(name, config, start, end)
|
||||
cron = Timer(config.interval.totimedelta(start=now).total_seconds(),
|
||||
lambda: scrape_calendar(name, config))
|
||||
cron.start()
|
||||
|
||||
|
||||
def get_calendar(name: str):
|
||||
global _SCRAPE_CACHE
|
||||
now: datetime = datetime.now(tz=get_config().tz)
|
||||
if get_config().cache.total_seconds() > 0 and name in _SCRAPE_CACHE:
|
||||
cache_timeout, cached = _SCRAPE_CACHE[name]
|
||||
if now < cache_timeout:
|
||||
print('serving cached')
|
||||
return cached
|
||||
print('doing request')
|
||||
|
||||
opener: urllib.request.OpenerDirector = config.get_url_opener()
|
||||
with opener.open(config.url) as response:
|
||||
data = response.read().decode('utf-8')
|
||||
calendar = cal.Calendar.from_ical(data)
|
||||
parsed: List[Event] = _parse_calendar(name, calendar, start, end)
|
||||
_SCRAPE_CACHE[name] = now + get_config().cache, parsed
|
||||
return parsed
|
||||
return _SCRAPE_CACHE.get(name, [])
|
||||
|
|
|
@ -27,6 +27,7 @@ class CalendarConfig:
|
|||
|
||||
def __init__(self, config: Dict[str, Any], config_path: str) -> None:
|
||||
self._url: str = _keycheck('url', config, str, config_path)
|
||||
self._scrape_interval: Duration = _parse_timedelta('interval', config, config_path, default_value='PT15M')
|
||||
self._ca: Optional[str] = _keycheck('ca', config, str, config_path, optional=True)
|
||||
auth: Dict[str, Any] = _keycheck('auth', config, dict, config_path, default_value={'type': 'none'})
|
||||
self._authtype: str = _keycheck('type', auth, str, f'{config_path}.auth',
|
||||
|
@ -56,6 +57,10 @@ class CalendarConfig:
|
|||
def url(self) -> str:
|
||||
return self._url
|
||||
|
||||
@property
|
||||
def interval(self) -> Duration:
|
||||
return self._scrape_interval
|
||||
|
||||
def get_url_opener(self) -> urllib.request.OpenerDirector:
|
||||
|
||||
if self._authtype == 'tls':
|
||||
|
@ -89,7 +94,6 @@ class Config:
|
|||
self._tz: pytz.tzinfo = _parse_timezone('tz', config, '', default_value='UTC')
|
||||
self._start_delta: Duration = _parse_timedelta('start_delta', config, '', default_value='PT')
|
||||
self._end_delta: Duration = _parse_timedelta('end_delta', config, '', default_value='P30D')
|
||||
self._cache: Duration = _parse_timedelta('cache', config, '', default_value='PT', force_positive=True)
|
||||
self._calendars: Dict[str, CalendarConfig] = self._parse_calendars_config('calendars', config, '')
|
||||
self._key_replace = _parse_key_replace('key_replace', config, '')
|
||||
self._value_replace = _parse_value_replace('value_replace', config, '')
|
||||
|
@ -125,10 +129,6 @@ class Config:
|
|||
def end_delta(self) -> Duration:
|
||||
return self._end_delta
|
||||
|
||||
@property
|
||||
def cache(self) -> Duration:
|
||||
return self._cache
|
||||
|
||||
@property
|
||||
def calendars(self) -> Dict[str, CalendarConfig]:
|
||||
return self._calendars
|
||||
|
|
|
@ -38,7 +38,7 @@ class Event(Metric):
|
|||
for attr in _ATTRIBUTES:
|
||||
tmp[attr] = event.get(attr, '')
|
||||
substitution_keys = set(_ATTRIBUTES)
|
||||
substitution_keys.update(['start', 'end'])
|
||||
substitution_keys.update(tmp.keys())
|
||||
substitution_keys.update(get_config().key_replace.keys())
|
||||
substitution_keys.update(get_config().value_replace.keys())
|
||||
for attr in substitution_keys:
|
||||
|
|
|
@ -2,6 +2,7 @@ import sys
|
|||
|
||||
import bottle
|
||||
|
||||
from icalendar_timeseries_server.cal import scrape_calendar
|
||||
from icalendar_timeseries_server.config import load_config, load_default_config, get_config
|
||||
|
||||
# Contains decorated bottle handler function for /api/v1/query
|
||||
|
@ -17,7 +18,10 @@ def main():
|
|||
else:
|
||||
print(f'Can only read one config file, got "{" ".join(sys.argv[1:])}"')
|
||||
exit(1)
|
||||
bottle.run(host=get_config().addr, port=get_config().port)
|
||||
config = get_config()
|
||||
for calname in config.calendars.keys():
|
||||
scrape_calendar(calname, config.calendars[calname])
|
||||
bottle.run(host=config.addr, port=get_config().port)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Reference in a new issue