from typing import Dict, List, Iterable import sys import urllib.request import logging from datetime import datetime, date, timedelta from threading import Lock, Timer from dateutil import rrule from icalendar import cal from isodate import Duration from icalendar_timeseries_server import __version__ from icalendar_timeseries_server.config import get_config, CalendarConfig from icalendar_timeseries_server.event import Event from icalendar_timeseries_server.todo import Todo _EVENT_SCRAPE_CACHE: Dict[str, List[Event]] = dict() _TODO_SCRAPE_CACHE: Dict[str, List[Todo]] = dict() _SCRAPE_CACHE_LOCK: Lock = Lock() __py_version: str = f'{sys.version_info.major}.{sys.version_info.minor}.{sys.version_info.micro}' USER_AGENT: str = f'icalendar-timeseries-server/{__version__} (Python/{__py_version})' def _parse_recurring(event: cal.Event, start: datetime, end: datetime, duration: timedelta) -> List[datetime]: occurences: List[datetime] = [] evstart = event.get('dtstart').dt if isinstance(evstart, date) and not isinstance(evstart, datetime): evstart = datetime(evstart.year, evstart.month, evstart.day, tzinfo=start.tzinfo) # First occurence lies in the future; no need to process further if evstart >= end: return occurences # Extract recurrence rules from ical ical_lines = event.to_ical().decode('utf-8').split('\r\n') recurrence = '\n'.join( [x for x in ical_lines if x.startswith('RRULE') or x.startswith('RDATE') or x.startswith('EXRULE') or x.startswith('EXDATE')]) # Create a generator that yields a timestamp for each recurrence generator = rrule.rrulestr(recurrence, dtstart=evstart) # Generate an event entry for each occurence of the event for dt in generator: # Skip past occurences and break once the the event lies too far in the future if dt + duration < start: continue if dt > end: break # Create an event entry occurences.append(dt) return occurences def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: datetime): global _EVENT_SCRAPE_CACHE, _TODO_SCRAPE_CACHE, _SCRAPE_CACHE_LOCK events = [] todos = [] opener: urllib.request.OpenerDirector = config.get_url_opener() with opener.open(config.url) as response: data = response.read().decode('utf-8') calendar = cal.Calendar.from_ical(data) for element in calendar.walk(): if element.name == "VEVENT": dtstart = element.get('dtstart').dt # Apparently datetime is a subclass of date... if isinstance(dtstart, date) and not isinstance(dtstart, datetime): dtstart = datetime(dtstart.year, dtstart.month, dtstart.day, tzinfo=start.tzinfo) # Process either end timestamp or duration, if present if 'dtend' in element: evend = element.get('dtend').dt if isinstance(evend, date) and not isinstance(evend, datetime): evend = datetime(evend.year, evend.month, evend.day, tzinfo=start.tzinfo) duration = evend - dtstart elif 'duration' in element: duration = element.get('duration').dt else: duration = timedelta(0) if element.get('rrule') is not None or element.get('rdate') is not None: occurences: Iterable[datetime] = _parse_recurring(element, start, end, duration) else: occurences = [dtstart] for occurence in occurences: if start <= occurence + duration and occurence < end: events.append(Event(name, element, occurence, occurence + duration)) elif element.name == "VTODO": dtstart = element.get('dtstamp').dt duration = timedelta(0) if 'dtstart' in element: dtstart = element.get('dtstart').dt if 'duration' in element: duration = element.get('duration').dt todos.append(Todo(name, element, dtstart, dtstart + duration)) with _SCRAPE_CACHE_LOCK: _EVENT_SCRAPE_CACHE[name] = events _TODO_SCRAPE_CACHE[name] = todos def scrape_calendar(name: str, config: CalendarConfig, retry: int): # Get current time in configured timezone tz = get_config().tz now: datetime = datetime.now(tz) # Only scrape at most once a minute interval = max(int(config.interval.totimedelta(start=now).total_seconds()), 60) # Compute interval for which to return events start_delta: Duration = get_config().start_delta end_delta: Duration = get_config().end_delta start: datetime = now + start_delta end: datetime = now + end_delta # Scrape and parse the calendar try: _scrape_calendar(name, config, start, end) # Reschedule calendar scraping cron = Timer(interval, lambda: scrape_calendar(name, config, 0)) except BaseException: # reschedule with exponential backoff, but no more than the regular scrape interval backoff_seconds = min(60 * 2**retry, interval) logging.exception(f'An error occurred while scraping the calendar endpoint "{name}" ' f'({config.url}), retrying in {backoff_seconds}s.') cron = Timer(backoff_seconds, lambda: scrape_calendar(name, config, retry+1)) cron.start() def start_scrape_calendar(name: str, config: CalendarConfig): # Schedule first calendar scraping cron = Timer(0, lambda: scrape_calendar(name, config, retry=0)) cron.start() def get_calendar_events(name: str): global _EVENT_SCRAPE_CACHE with _SCRAPE_CACHE_LOCK: return _EVENT_SCRAPE_CACHE.get(name, []) def get_calendar_todos(name: str): global _TODO_SCRAPE_CACHE with _SCRAPE_CACHE_LOCK: return _TODO_SCRAPE_CACHE.get(name, [])