diff --git a/sopel/modules/tld.py b/sopel/modules/tld.py index d228c5f005..694222171e 100644 --- a/sopel/modules/tld.py +++ b/sopel/modules/tld.py @@ -40,8 +40,10 @@ def _strptime_as_utc(time_string): DEFAULT_CACHE_DATETIME = _strptime_as_utc(DEFAULT_CACHE_TIME) IANA_LIST_URI = 'https://data.iana.org/TLD/tlds-alpha-by-domain.txt' -WIKI_PAGE_NAME = 'List_of_Internet_top-level_domains' - +WIKI_PAGE_NAMES = [ + 'List_of_Internet_top-level_domains', + 'Country_code_top-level_domain', +] r_tld = re.compile(r'^\.(\S+)') r_idn = re.compile(r'^(xn--[A-Za-z0-9]+)') @@ -250,30 +252,34 @@ def _update_tld_data(bot, which, force=False): bot.memory['tld_list_cache'] = tld_list bot.memory['tld_list_cache_updated'] = now elif which == 'data': - try: - # https://www.mediawiki.org/wiki/Special:MyLanguage/API:Get_the_contents_of_a_page - tld_response = requests.get( - "https://en.wikipedia.org/w/api.php", - params={ - "action": "parse", - "format": "json", - "prop": "text", - "utf8": 1, - "formatversion": 2, - "page": WIKI_PAGE_NAME, - }, - ).json() - tld_data = tld_response["parse"]["text"] - # py <3.5 needs ValueError instead of more specific json.decoder.JSONDecodeError - except (requests.exceptions.RequestException, ValueError, KeyError): - # Log error and continue life; it'll be fine - LOGGER.warning( - "Error fetching TLD data from Wikipedia; will try again later.", - exc_info=True) - return + data_pages = [] + for title in WIKI_PAGE_NAMES: + try: + # https://www.mediawiki.org/wiki/Special:MyLanguage/API:Get_the_contents_of_a_page + tld_response = requests.get( + "https://en.wikipedia.org/w/api.php", + params={ + "action": "parse", + "format": "json", + "prop": "text", + "utf8": 1, + "formatversion": 2, + "page": title, + }, + ).json() + data_pages.append(tld_response["parse"]["text"]) + # py <3.5 needs ValueError instead of more specific json.decoder.JSONDecodeError + except (requests.exceptions.RequestException, ValueError, KeyError): + # Log error and continue life; it'll be fine + LOGGER.warning( + 'Error fetching TLD data from "%s" on Wikipedia; will try again later.', + title, exc_info=True) + return parser = WikipediaTLDListParser() - parser.feed(tld_data) + for page in data_pages: + parser.feed(page) + tld_data = parser.get_processed_data() bot.memory['tld_data_cache'] = tld_data