From 90b80ae5299502da749dfacf5d543bcfff65c1aa Mon Sep 17 00:00:00 2001 From: s3lph Date: Sat, 19 Feb 2022 03:29:27 +0100 Subject: [PATCH] v0.5: Exponential backoff for retrys --- CHANGELOG.md | 12 ++++++++++ icalendar_timeseries_server/__init__.py | 2 +- icalendar_timeseries_server/cal.py | 30 ++++++++++++++----------- 3 files changed, 30 insertions(+), 14 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 202c38b..73d9cd9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,18 @@ # iCalendar Timeseries Server Changelog + +## Version 0.5 + +### Changes + + +- Retry calendar scraping with exponential backoff. + + + + + ## Version 0.4.1 diff --git a/icalendar_timeseries_server/__init__.py b/icalendar_timeseries_server/__init__.py index 489c7e1..27f4493 100644 --- a/icalendar_timeseries_server/__init__.py +++ b/icalendar_timeseries_server/__init__.py @@ -1,2 +1,2 @@ -__version__ = '0.4.1' +__version__ = '0.5' diff --git a/icalendar_timeseries_server/cal.py b/icalendar_timeseries_server/cal.py index 8a6e11c..925fa09 100644 --- a/icalendar_timeseries_server/cal.py +++ b/icalendar_timeseries_server/cal.py @@ -60,12 +60,8 @@ def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: da todos = [] opener: urllib.request.OpenerDirector = config.get_url_opener() - try: - with opener.open(config.url) as response: - data = response.read().decode('utf-8') - except BaseException: - logging.exception(f'An error occurred while scraping the calendar endpoint "{name}" ({config.url})') - return + with opener.open(config.url) as response: + data = response.read().decode('utf-8') calendar = cal.Calendar.from_ical(data) for element in calendar.walk(): @@ -105,26 +101,34 @@ def _scrape_calendar(name: str, config: CalendarConfig, start: datetime, end: da _TODO_SCRAPE_CACHE[name] = todos -def scrape_calendar(name: str, config: CalendarConfig): +def scrape_calendar(name: str, config: CalendarConfig, retry: int): # Get current time in configured timezone tz = get_config().tz now: datetime = datetime.now(tz) - # Reschedule calendar scraping - cron = Timer(config.interval.totimedelta(start=now).total_seconds(), - lambda: scrape_calendar(name, config)) - cron.start() + # Only scrape at most once a minute + interval = max(int(config.interval.totimedelta(start=now).total_seconds()), 60) # Compute interval for which to return events start_delta: Duration = get_config().start_delta end_delta: Duration = get_config().end_delta start: datetime = now + start_delta end: datetime = now + end_delta # Scrape and parse the calendar - _scrape_calendar(name, config, start, end) + try: + _scrape_calendar(name, config, start, end) + # Reschedule calendar scraping + cron = Timer(interval, lambda: scrape_calendar(name, config, 0)) + except BaseException: + # reschedule with exponential backoff, but no more than the regular scrape interval + backoff_seconds = min(60 * 2**retry, interval) + logging.exception(f'An error occurred while scraping the calendar endpoint "{name}" ' + f'({config.url}), retrying in {backoff_seconds}s.') + cron = Timer(backoff_seconds, lambda: scrape_calendar(name, config, retry+1)) + cron.start() def start_scrape_calendar(name: str, config: CalendarConfig): # Schedule first calendar scraping - cron = Timer(0, lambda: scrape_calendar(name, config)) + cron = Timer(0, lambda: scrape_calendar(name, config, retry=0)) cron.start()