icalendar-timeseries-server/icalendar_timeseries_server/cal.py

from typing import Dict, List, Iterable

import sys
import urllib.request
import logging
from datetime import datetime, date, timedelta
from threading import Lock, Timer

from dateutil import rrule
from icalendar import cal
from isodate import Duration

from icalendar_timeseries_server import __version__
from icalendar_timeseries_server.config import get_config, CalendarConfig
from icalendar_timeseries_server.event import Event
from icalendar_timeseries_server.todo import Todo


_EVENT_SCRAPE_CACHE: Dict[str, List[Event]] = dict()
_TODO_SCRAPE_CACHE: Dict[str, List[Todo]] = dict()
_SCRAPE_CACHE_LOCK: Lock = Lock()

__py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'
USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})'


def _parse_recurring(event: cal.Event, start: datetime, end: datetime, duration: timedelta) -> List[datetime]:
    occurences: List[datetime] = []

    evstart = event.get('dtstart').dt
    if isinstance(evstart, date) and not isinstance(evstart, datetime):
        evstart = datetime(evstart.year, evstart.month, evstart.day, tzinfo=start.tzinfo)
    # First occurence lies in the future; no need to process further
    if evstart >= end:
        return occurences

    # Extract recurrence rules from ical
    ical_lines = event.to_ical().decode('utf-8').split('\r\n')
    recurrence = '\n'.join(
        [x for x in ical_lines
         if x.startswith('RRULE') or x.startswith('RDATE') or x.startswith('EXRULE') or x.startswith('EXDATE')])
    # Create a generator that yields a timestamp for each recurrence
    generator = rrule.rrulestr(recurrence, dtstart=evstart)

    # Generate an event entry for each occurence of the event
    for dt in generator:
        # Skip past occurences and break once the the event lies too far in the future
        if dt + duration < start:
            continue
        if dt > end:
            break
        # Create an event entry
        occurences.append(dt)
    return occurences


def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime):
    global _EVENT_SCRAPE_CACHE, _TODO_SCRAPE_CACHE, _SCRAPE_CACHE_LOCK
    events = []
    todos = []

    opener: urllib.request.OpenerDirector = config.get_url_opener()
    with opener.open(config.url) as response:
        data = response.read().decode('utf-8')
    calendar = cal.Calendar.from_ical(data)

    for element in calendar.walk():
        if element.name == "VEVENT":
            dtstart = element.get('dtstart').dt
            # Apparently datetime is a subclass of date...
            if isinstance(dtstart, date) and not isinstance(dtstart, datetime):
                dtstart = datetime(dtstart.year, dtstart.month, dtstart.day, tzinfo=start.tzinfo)
            # Process either end timestamp or duration, if present
            if 'dtend' in element:
                evend = element.get('dtend').dt
                if isinstance(evend, date) and not isinstance(evend, datetime):
                    evend = datetime(evend.year, evend.month, evend.day, tzinfo=start.tzinfo)
                duration = evend - dtstart
            elif 'duration' in element:
                duration = element.get('duration').dt
            else:
                duration = timedelta(0)
            if element.get('rrule') is not None or element.get('rdate') is not None:
                occurences: Iterable[datetime] = _parse_recurring(element, start, end, duration)
            else:
                occurences = [dtstart]
            for occurence in occurences:
                if start <= occurence + duration and occurence < end:
                    events.append(Event(name, element, occurence, occurence + duration))
        elif element.name == "VTODO":
            dtstart = element.get('dtstamp').dt
            duration = timedelta(0)
            if 'dtstart' in element:
                dtstart = element.get('dtstart').dt
                if 'duration' in element:
                    duration = element.get('duration').dt
            todos.append(Todo(name, element, dtstart, dtstart + duration))

    with _SCRAPE_CACHE_LOCK:
        _EVENT_SCRAPE_CACHE[name] = events
        _TODO_SCRAPE_CACHE[name] = todos


def scrape_calendar(name: str, config: CalendarConfig, retry: int):
    # Get current time in configured timezone
    tz = get_config().tz
    now: datetime = datetime.now(tz)
    # Only scrape at most once a minute
    interval = max(int(config.interval.totimedelta(start=now).total_seconds()), 60)
    # Compute interval for which to return events
    start_delta: Duration = get_config().start_delta
    end_delta: Duration = get_config().end_delta
    start: datetime = now + start_delta
    end: datetime = now + end_delta
    # Scrape and parse the calendar
    try:
        _scrape_calendar(name, config, start, end)
        # Reschedule calendar scraping
        cron = Timer(interval, lambda: scrape_calendar(name, config, 0))
    except BaseException:
        # reschedule with exponential backoff, but no more than the regular scrape interval
        backoff_seconds = min(60 * 2**retry, interval)
        logging.exception(f'An error occurred while scraping the calendar endpoint "{name}" '
                          f'({config.url}), retrying in {backoff_seconds}s.')
        cron = Timer(backoff_seconds, lambda: scrape_calendar(name, config, retry+1))
    cron.start()


def start_scrape_calendar(name: str, config: CalendarConfig):
    # Schedule first calendar scraping
    cron = Timer(0, lambda: scrape_calendar(name, config, retry=0))
    cron.start()


def get_calendar_events(name: str):
    global _EVENT_SCRAPE_CACHE
    with _SCRAPE_CACHE_LOCK:
        return _EVENT_SCRAPE_CACHE.get(name, [])


def get_calendar_todos(name: str):
    global _TODO_SCRAPE_CACHE
    with _SCRAPE_CACHE_LOCK:
        return _TODO_SCRAPE_CACHE.get(name, [])
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`from typing import Dict, List, Iterable`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00
			`import sys`
			`import urllib.request`
Use a logger rather than print statements. Unfortunately, bottle.py logs to stderr/stdout on its own. 2019-09-01 23:13:24 +02:00			`import logging`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`from datetime import datetime, date, timedelta`
Secure all critical sections using _SCRAPE_CACHE with a lock. 2019-08-21 13:52:25 +02:00			`from threading import Lock, Timer`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00
			`from dateutil import rrule`
			`from icalendar import cal`
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`from isodate import Duration`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00
			`from icalendar_timeseries_server import __version__`
			`from icalendar_timeseries_server.config import get_config, CalendarConfig`
			`from icalendar_timeseries_server.event import Event`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`from icalendar_timeseries_server.todo import Todo`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00

Implement todo exporting 2020-11-06 03:30:47 +01:00			`_EVENT_SCRAPE_CACHE: Dict[str, List[Event]] = dict()`
			`_TODO_SCRAPE_CACHE: Dict[str, List[Todo]] = dict()`
Secure all critical sections using _SCRAPE_CACHE with a lock. 2019-08-21 13:52:25 +02:00			`_SCRAPE_CACHE_LOCK: Lock = Lock()`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00
			`__py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}'`
			`USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})'`


			`def _parse_recurring(event: cal.Event, start: datetime, end: datetime, duration: timedelta) -> List[datetime]:`
			`occurences: List[datetime] = []`

			`evstart = event.get('dtstart').dt`
Fix type confusion bug in recurring events 2020-06-19 00:35:12 +02:00			`if isinstance(evstart, date) and not isinstance(evstart, datetime):`
			`evstart = datetime(evstart.year, evstart.month, evstart.day, tzinfo=start.tzinfo)`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`# First occurence lies in the future; no need to process further`
			`if evstart >= end:`
			`return occurences`

			`# Extract recurrence rules from ical`
			`ical_lines = event.to_ical().decode('utf-8').split('\r\n')`
			`recurrence = '\n'.join(`
			`[x for x in ical_lines`
			`if x.startswith('RRULE') or x.startswith('RDATE') or x.startswith('EXRULE') or x.startswith('EXDATE')])`
			`# Create a generator that yields a timestamp for each recurrence`
			`generator = rrule.rrulestr(recurrence, dtstart=evstart)`

			`# Generate an event entry for each occurence of the event`
			`for dt in generator:`
			`# Skip past occurences and break once the the event lies too far in the future`
			`if dt + duration < start:`
			`continue`
			`if dt > end:`
			`break`
			`# Create an event entry`
			`occurences.append(dt)`
			`return occurences`


WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime):`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`global _EVENT_SCRAPE_CACHE, _TODO_SCRAPE_CACHE, _SCRAPE_CACHE_LOCK`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`events = []`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`todos = []`
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00
			`opener: urllib.request.OpenerDirector = config.get_url_opener()`
v0.5: Exponential backoff for retrys 2022-02-19 03:29:27 +01:00			`with opener.open(config.url) as response:`
			`data = response.read().decode('utf-8')`
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`calendar = cal.Calendar.from_ical(data)`

Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`for element in calendar.walk():`
			`if element.name == "VEVENT":`
			`dtstart = element.get('dtstart').dt`
Fix: Keep showing events that already started, but have not finished yet 2019-09-21 14:43:48 +02:00			`# Apparently datetime is a subclass of date...`
			`if isinstance(dtstart, date) and not isinstance(dtstart, datetime):`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`dtstart = datetime(dtstart.year, dtstart.month, dtstart.day, tzinfo=start.tzinfo)`
			`# Process either end timestamp or duration, if present`
			`if 'dtend' in element:`
			`evend = element.get('dtend').dt`
Fix: Keep showing events that already started, but have not finished yet 2019-09-21 14:43:48 +02:00			`if isinstance(evend, date) and not isinstance(evend, datetime):`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`evend = datetime(evend.year, evend.month, evend.day, tzinfo=start.tzinfo)`
			`duration = evend - dtstart`
			`elif 'duration' in element:`
			`duration = element.get('duration').dt`
			`else:`
			`duration = timedelta(0)`
			`if element.get('rrule') is not None or element.get('rdate') is not None:`
			`occurences: Iterable[datetime] = _parse_recurring(element, start, end, duration)`
			`else:`
			`occurences = [dtstart]`
			`for occurence in occurences:`
Fix: Keep showing events that already started, but have not finished yet 2019-09-21 14:43:48 +02:00			`if start <= occurence + duration and occurence < end:`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00			`events.append(Event(name, element, occurence, occurence + duration))`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`elif element.name == "VTODO":`
			`dtstart = element.get('dtstamp').dt`
			`duration = timedelta(0)`
			`if 'dtstart' in element:`
			`dtstart = element.get('dtstart').dt`
			`if 'duration' in element:`
			`duration = element.get('duration').dt`
			`todos.append(Todo(name, element, dtstart, dtstart + duration))`

Secure all critical sections using _SCRAPE_CACHE with a lock. 2019-08-21 13:52:25 +02:00			`with _SCRAPE_CACHE_LOCK:`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`_EVENT_SCRAPE_CACHE[name] = events`
			`_TODO_SCRAPE_CACHE[name] = todos`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00

v0.5: Exponential backoff for retrys 2022-02-19 03:29:27 +01:00			`def scrape_calendar(name: str, config: CalendarConfig, retry: int):`
Update unit tests to work with per-calendar scrape intervals. 2019-08-21 13:40:16 +02:00			`# Get current time in configured timezone`
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`tz = get_config().tz`
			`now: datetime = datetime.now(tz)`
v0.5: Exponential backoff for retrys 2022-02-19 03:29:27 +01:00			`# Only scrape at most once a minute`
			`interval = max(int(config.interval.totimedelta(start=now).total_seconds()), 60)`
Update unit tests to work with per-calendar scrape intervals. 2019-08-21 13:40:16 +02:00			`# Compute interval for which to return events`
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00			`start_delta: Duration = get_config().start_delta`
			`end_delta: Duration = get_config().end_delta`
			`start: datetime = now + start_delta`
			`end: datetime = now + end_delta`
Update unit tests to work with per-calendar scrape intervals. 2019-08-21 13:40:16 +02:00			`# Scrape and parse the calendar`
v0.5: Exponential backoff for retrys 2022-02-19 03:29:27 +01:00			`try:`
			`_scrape_calendar(name, config, start, end)`
			`# Reschedule calendar scraping`
			`cron = Timer(interval, lambda: scrape_calendar(name, config, 0))`
			`except BaseException:`
			`# reschedule with exponential backoff, but no more than the regular scrape interval`
			`backoff_seconds = min(60 * 2**retry, interval)`
			`logging.exception(f'An error occurred while scraping the calendar endpoint "{name}" '`
			`f'({config.url}), retrying in {backoff_seconds}s.')`
			`cron = Timer(backoff_seconds, lambda: scrape_calendar(name, config, retry+1))`
			`cron.start()`
Rename project to icalendar-timeseries-server 2019-08-20 00:24:51 +02:00
WIP: Implement regular scraping of the calendar sources in the background, instead of blocking in the foreground process 2019-08-20 15:53:51 +02:00
Move first scrape from foreground to background 2019-08-21 13:51:43 +02:00			`def start_scrape_calendar(name: str, config: CalendarConfig):`
			`# Schedule first calendar scraping`
v0.5: Exponential backoff for retrys 2022-02-19 03:29:27 +01:00			`cron = Timer(0, lambda: scrape_calendar(name, config, retry=0))`
Move first scrape from foreground to background 2019-08-21 13:51:43 +02:00			`cron.start()`


Implement todo exporting 2020-11-06 03:30:47 +01:00			`def get_calendar_events(name: str):`
			`global _EVENT_SCRAPE_CACHE`
			`with _SCRAPE_CACHE_LOCK:`
			`return _EVENT_SCRAPE_CACHE.get(name, [])`


			`def get_calendar_todos(name: str):`
			`global _TODO_SCRAPE_CACHE`
Secure all critical sections using _SCRAPE_CACHE with a lock. 2019-08-21 13:52:25 +02:00			`with _SCRAPE_CACHE_LOCK:`
Implement todo exporting 2020-11-06 03:30:47 +01:00			`return _TODO_SCRAPE_CACHE.get(name, [])`