Skip to content

Commit

Permalink
tld: ccTLD data moved to its own wiki page
Browse files Browse the repository at this point in the history
  • Loading branch information
dgw committed May 27, 2021
1 parent 12f8640 commit c5b6148
Showing 1 changed file with 30 additions and 24 deletions.
54 changes: 30 additions & 24 deletions sopel/modules/tld.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,10 @@ def _strptime_as_utc(time_string):
DEFAULT_CACHE_DATETIME = _strptime_as_utc(DEFAULT_CACHE_TIME)

IANA_LIST_URI = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt'
WIKI_PAGE_NAME = 'List_of_Internet_top-level_domains'

WIKI_PAGE_NAMES = [
'List_of_Internet_top-level_domains',
'Country_code_top-level_domain',
]
r_tld = re.compile(r'^\.(\S+)')
r_idn = re.compile(r'^(xn--[A-Za-z0-9]+)')

Expand Down Expand Up @@ -250,30 +252,34 @@ def _update_tld_data(bot, which, force=False):
bot.memory['tld_list_cache'] = tld_list
bot.memory['tld_list_cache_updated'] = now
elif which == 'data':
try:
# https://www.mediawiki.org/wiki/Special:MyLanguage/API:Get_the_contents_of_a_page
tld_response = requests.get(
"https://en.wikipedia.org/w/api.php",
params={
"action": "parse",
"format": "json",
"prop": "text",
"utf8": 1,
"formatversion": 2,
"page": WIKI_PAGE_NAME,
},
).json()
tld_data = tld_response["parse"]["text"]
# py <3.5 needs ValueError instead of more specific json.decoder.JSONDecodeError
except (requests.exceptions.RequestException, ValueError, KeyError):
# Log error and continue life; it'll be fine
LOGGER.warning(
"Error fetching TLD data from Wikipedia; will try again later.",
exc_info=True)
return
data_pages = []
for title in WIKI_PAGE_NAMES:
try:
# https://www.mediawiki.org/wiki/Special:MyLanguage/API:Get_the_contents_of_a_page
tld_response = requests.get(
"https://en.wikipedia.org/w/api.php",
params={
"action": "parse",
"format": "json",
"prop": "text",
"utf8": 1,
"formatversion": 2,
"page": title,
},
).json()
data_pages.append(tld_response["parse"]["text"])
# py <3.5 needs ValueError instead of more specific json.decoder.JSONDecodeError
except (requests.exceptions.RequestException, ValueError, KeyError):
# Log error and continue life; it'll be fine
LOGGER.warning(
'Error fetching TLD data from "%s" on Wikipedia; will try again later.',
title, exc_info=True)
return

parser = WikipediaTLDListParser()
parser.feed(tld_data)
for page in data_pages:
parser.feed(page)

tld_data = parser.get_processed_data()

bot.memory['tld_data_cache'] = tld_data
Expand Down

0 comments on commit c5b6148

Please sign in to comment.