Implement rate limiting according to Nominatim Usage Policy
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful

This commit is contained in:
s3lph 2022-10-27 01:21:38 +02:00
parent f983a84ef7
commit 639c602692
Signed by: s3lph
GPG key ID: 8AC98A811E5BEFF5
2 changed files with 7 additions and 4 deletions

View file

@ -56,7 +56,7 @@ A run without cache takes some time as it fetches all data from their sources:
1. Get a list of countries in Europe and federal states in Germany from [Wikidata](https://wikidata.org). 1. Get a list of countries in Europe and federal states in Germany from [Wikidata](https://wikidata.org).
1. Download all these countries' and states' border shapes from Wikimedia Commons. 1. Download all these countries' and states' border shapes from Wikimedia Commons.
1. Get a list of Erfas & Chaostreffs from the doku.ccc.de Semantic Mediawiki API. 1. Get a list of Erfas & Chaostreffs from the doku.ccc.de Semantic Mediawiki API.
1. Resolve all POIs' (Erfas & Chaostreffs) street addresses' to geocoordinates using [Nominatim](https://nominatim.org/). 1. Resolve all POIs' (Erfas & Chaostreffs) street addresses' to geocoordinates using [Nominatim](https://nominatim.org/), rate limited to 1 request per second as per Nominatims usage policy.
**Use the cache whenevery you don't need to update country borders or hackerspace locations. Otherwise, both Wikidata and Nominatim will start rate limiting pretty quickly.** (And caching is actually required by [Nominatim's acceptable use policy](https://operations.osmfoundation.org/policies/nominatim/)). **Use the cache whenevery you don't need to update country borders or hackerspace locations. Otherwise, both Wikidata and Nominatim will start rate limiting pretty quickly.** (And caching is actually required by [Nominatim's acceptable use policy](https://operations.osmfoundation.org/policies/nominatim/)).

View file

@ -17,6 +17,7 @@ import math
from lxml import etree from lxml import etree
import pyproj import pyproj
from geopy import Nominatim from geopy import Nominatim
from geopy.extra.rate_limiter import RateLimiter
import tqdm import tqdm
import cairosvg import cairosvg
from PIL import Image, ImageFont from PIL import Image, ImageFont
@ -282,7 +283,10 @@ class Erfa(Drawable):
@classmethod @classmethod
def address_lookup(cls, attr): def address_lookup(cls, attr):
locator = Nominatim(user_agent=USER_AGENT) # Nominatim's Usage Policy requires rate limiting to 1 request per seconds
nominatim = Nominatim(user_agent=USER_AGENT)
geocode = RateLimiter(nominatim.geocode, min_delay_seconds=1)
number = attr['Chaostreff-Physical-Housenumber'] number = attr['Chaostreff-Physical-Housenumber']
street = attr['Chaostreff-Physical-Address'] street = attr['Chaostreff-Physical-Address']
zipcode = attr['Chaostreff-Physical-Postcode'] zipcode = attr['Chaostreff-Physical-Postcode']
@ -303,7 +307,7 @@ class Erfa(Drawable):
formats.insert(0, f'{street[0]} {number[0]}, {zipcode[0]} {acity[0]}, {country}') formats.insert(0, f'{street[0]} {number[0]}, {zipcode[0]} {acity[0]}, {country}')
for fmt in formats: for fmt in formats:
response = locator.geocode(fmt) response = geocode(fmt)
if response is not None: if response is not None:
return response.longitude, response.latitude return response.longitude, response.latitude
@ -384,7 +388,6 @@ class Erfa(Drawable):
erfa = cls.from_api(ns, name, attr['printouts'], radius) erfa = cls.from_api(ns, name, attr['printouts'], radius)
erfas.append(erfa) erfas.append(erfa)
except BaseException as e: except BaseException as e:
breakpoint()
print(e) print(e)
continue continue