diff --git a/documentation/main.md b/documentation/main.md index 4dd66a4..5e12e06 100644 --- a/documentation/main.md +++ b/documentation/main.md @@ -128,20 +128,39 @@ values of the form: {"iso3": "AFG", "pcode": "AF01", "name": "Kabul"} Method *setup_from_libhxl_dataset* takes a libhxl Dataset object, while -*setup_from_url* takes a URL which defaults to the global p-codes dataset on -HDX. +*setup_from_url* takes a URL which defaults to a resource in the global p-codes +dataset on HDX. These methods also have optional parameter *countryiso3s* which is a tuple or list of country ISO3 codes to be read or None if all countries are desired. Examples of usage: + AdminLevel.looks_like_pcode("YEM123") # returns True + AdminLevel.looks_like_pcode("Yemen") # returns False + AdminLevel.looks_like_pcode("YEME123") # returns False adminlevel = AdminLevel(config) adminlevel.setup_from_admin_info(admin_info, countryiso3s=("YEM",)) adminlevel.get_pcode("YEM", "YEM030", logname="test") # returns ("YE30", True) adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع") # returns ("YE30", False) adminlevel.get_pcode("YEM", "Al Dhale"e / الضالع", fuzzy_match=False) # returns (None, True) +There is basic admin 1 p-code length conversion by default. A more advanced +p-code length conversion can be activated by calling *load_pcode_formats* +which takes a URL that defaults to a resource in the global p-codes dataset on +HDX: + + admintwo.load_pcode_formats() + admintwo.get_pcode("YEM", "YEM30001") # returns ("YE3001", True) + +The length conversion can be further enhanced by supplying either parent +AdminLevel objects in a list or lists of p-codes per parent admin level: + + admintwo.set_parent_admins_from_adminlevels([adminone]) + admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True) + admintwo.set_parent_admins([adminone.pcodes]) + admintwo.get_pcode("NER", "NE00409") # returns ("NER004009", True) + ## Currencies Various functions support the conversion of monetary amounts to USD. Note that the diff --git a/src/hdx/location/adminlevel.py b/src/hdx/location/adminlevel.py index 5bd8ccd..2ad967d 100755 --- a/src/hdx/location/adminlevel.py +++ b/src/hdx/location/adminlevel.py @@ -1,4 +1,5 @@ import logging +import re from typing import Dict, List, Optional, Tuple import hxl @@ -9,6 +10,7 @@ from hdx.location.country import Country from hdx.location.names import clean_name from hdx.location.phonetics import Phonetics +from hdx.utilities.dictandlist import dict_of_sets_add from hdx.utilities.text import multiple_replace from hdx.utilities.typehint import ListTuple @@ -36,8 +38,11 @@ class AdminLevel: admin_level_overrides (Dict): Countries at other admin levels. """ + pcode_regex = re.compile(r"^([a-zA-Z]{2,3})(\d*)$") _admin_url_default = "https://data.humdata.org/dataset/cb963915-d7d1-4ffa-90dc-31277e24406f/resource/f65bc260-4d8b-416f-ac07-f2433b4d5142/download/global_pcodes_adm_1_2.csv" _admin_url = _admin_url_default + _formats_url_default = "https://data.humdata.org/dataset/cb963915-d7d1-4ffa-90dc-31277e24406f/resource/f1161807-dab4-4331-b7b0-4e5dac56e0e4/download/global_pcode_lengths.csv" + _formats_url = _formats_url_default def __init__( self, @@ -58,10 +63,28 @@ def __init__( self.name_to_pcode = {} self.pcode_to_name = {} self.pcode_to_iso3 = {} + self.pcode_formats = {} + self.zeroes = {} + self.parent_admins = [] self.init_matches_errors() self.phonetics = Phonetics() + @classmethod + def looks_like_pcode(cls, string: str) -> bool: + """Check if a string looks like a p-code using regex matching of format. + Checks for 2 or 3 letter country iso code at start and then numbers. + + Args: + string (str): String to check + + Returns: + bool: Whether string looks like a p-code + """ + if cls.pcode_regex.match(string): + return True + return False + @classmethod def set_default_admin_url(cls, admin_url: Optional[str] = None) -> None: """ @@ -185,6 +208,57 @@ def setup_from_url( admin_info = self.get_libhxl_dataset(admin_url) self.setup_from_libhxl_dataset(admin_info, countryiso3s) + def load_pcode_formats(self, formats_url: str = _formats_url) -> None: + """ + Load p-code formats from a URL. Defaults to global p-codes dataset on HDX. + + Args: + formats_url (str): URL from which to load data. Defaults to global p-codes dataset. + + Returns: + None + """ + formats_info = self.get_libhxl_dataset(formats_url) + for row in formats_info: + pcode_format = [int(row.get("#country+len"))] + for admin_no in range(1, 4): + length = row.get(f"#adm{admin_no}+len") + if not length or "|" in length: + break + pcode_format.append(int(length)) + self.pcode_formats[row.get("#country+code")] = pcode_format + + for pcode in self.pcodes: + countryiso3 = self.pcode_to_iso3[pcode] + for x in re.finditer("0", pcode): + dict_of_sets_add(self.zeroes, countryiso3, x.start()) + + def set_parent_admins(self, parent_admins: List[List]) -> None: + """ + Set parent admins + + Args: + parent_admins (List[List]): List of P-codes per parent admin + + Returns: + None + """ + self.parent_admins = parent_admins + + def set_parent_admins_from_adminlevels( + self, adminlevels: List["AdminLevel"] + ) -> None: + """ + Set parent admins from AdminLevel objects + + Args: + parent_admins (List[AdminLevel]): List of parent AdminLevel objects + + Returns: + None + """ + self.parent_admins = [adminlevel.pcodes for adminlevel in adminlevels] + def get_pcode_list(self) -> List[str]: """Get list of all pcodes @@ -229,15 +303,139 @@ def init_matches_errors(self) -> None: self.ignored = set() self.errors = set() + def convert_admin_pcode_length( + self, countryiso3: str, pcode: str, logname: Optional[str] = None + ) -> Optional[str]: + """Standardise pcode length by country and match to an internal pcode. + Requires that p-code formats be loaded (eg. using load_pcode_formats) + + Args: + countryiso3 (str): ISO3 country code + pcode (str): P code to match + logname (Optional[str]): Identifying name to use when logging. Defaults to None (don't log). + + Returns: + Optional[str]: Matched P code or None if no match + """ + match = self.pcode_regex.match(pcode) + if not match: + return None + pcode_format = self.pcode_formats.get(countryiso3) + if not pcode_format: + if self.get_admin_level(countryiso3) == 1: + return self.convert_admin1_pcode_length( + countryiso3, pcode, logname + ) + return None + countryiso, digits = match.groups() + countryiso_length = len(countryiso) + if countryiso_length > pcode_format[0]: + countryiso2 = Country.get_iso2_from_iso3(countryiso3) + pcode_parts = [countryiso2, digits] + elif countryiso_length < pcode_format[0]: + pcode_parts = [countryiso3, digits] + else: + pcode_parts = [countryiso, digits] + new_pcode = "".join(pcode_parts) + if new_pcode in self.pcodes: + if logname: + self.matches.add( + ( + logname, + countryiso3, + new_pcode, + self.pcode_to_name[new_pcode], + "pcode length conversion-country", + ) + ) + return new_pcode + total_length = sum(pcode_format[: self.admin_level + 1]) + admin_changes = [] + for admin_no in range(1, self.admin_level + 1): + len_new_pcode = len(new_pcode) + if len_new_pcode == total_length: + break + admin_length = pcode_format[admin_no] + pcode_part = pcode_parts[admin_no] + part_length = len(pcode_part) + if part_length == admin_length: + break + pos = sum(pcode_format[:admin_no]) + if part_length < admin_length: + if pos in self.zeroes[countryiso3]: + pcode_parts[admin_no] = f"0{pcode_part}" + admin_changes.append(str(admin_no)) + new_pcode = "".join(pcode_parts) + break + elif part_length > admin_length and admin_no == self.admin_level: + if pcode_part[0] == "0": + pcode_parts[admin_no] = pcode_part[1:] + admin_changes.append(str(admin_no)) + new_pcode = "".join(pcode_parts) + break + if len_new_pcode < total_length: + if admin_length > 2 and pos in self.zeroes[countryiso3]: + pcode_part = f"0{pcode_part}" + if self.parent_admins and admin_no < self.admin_level: + parent_pcode = [ + pcode_parts[i] for i in range(admin_no) + ] + parent_pcode.append(pcode_part[:admin_length]) + parent_pcode = "".join(parent_pcode) + if ( + parent_pcode + not in self.parent_admins[admin_no - 1] + ): + pcode_part = pcode_part[1:] + else: + admin_changes.append(str(admin_no)) + else: + admin_changes.append(str(admin_no)) + elif len_new_pcode > total_length: + if admin_length <= 2 and pcode_part[0] == "0": + pcode_part = pcode_part[1:] + if self.parent_admins and admin_no < self.admin_level: + parent_pcode = [ + pcode_parts[i] for i in range(admin_no) + ] + parent_pcode.append(pcode_part[:admin_length]) + parent_pcode = "".join(parent_pcode) + if ( + parent_pcode + not in self.parent_admins[admin_no - 1] + ): + pcode_part = f"0{pcode_part}" + else: + admin_changes.append(str(admin_no)) + else: + admin_changes.append(str(admin_no)) + pcode_parts[admin_no] = pcode_part[:admin_length] + pcode_parts.append(pcode_part[admin_length:]) + new_pcode = "".join(pcode_parts) + if new_pcode in self.pcodes: + if logname: + admin_changes_str = ",".join(admin_changes) + self.matches.add( + ( + logname, + countryiso3, + new_pcode, + self.pcode_to_name[new_pcode], + f"pcode length conversion-admins {admin_changes_str}", + ) + ) + return new_pcode + return None + def convert_admin1_pcode_length( self, countryiso3: str, pcode: str, logname: Optional[str] = None ) -> Optional[str]: - """Standardise pcode length by country and match to an internal pcode. Only - works for admin1 pcodes. + """Standardise pcode length by country and match to an internal pcode. + Only works for admin1 pcodes. Args: - countryiso3 (str): Iso3 country code - pcode (str): P code for admin one + countryiso3 (str): ISO3 country code + pcode (str): P code for admin one to match logname (Optional[str]): Identifying name to use when logging. Defaults to None (don't log). Returns: @@ -409,23 +607,26 @@ def get_pcode( pcode = self.admin_name_mappings.get(name) if pcode and self.pcode_to_iso3[pcode] == countryiso3: return pcode, True - name_to_pcode = self.name_to_pcode.get(countryiso3) - if name_to_pcode is not None: - pcode = name_to_pcode.get(name.lower()) - if pcode: - return pcode, True - if name in self.pcodes: # name is a pcode - return name, True - if self.get_admin_level(countryiso3) == 1: - pcode = self.convert_admin1_pcode_length( - countryiso3, name, logname + if self.looks_like_pcode(name): + pcode = name.upper() + if pcode in self.pcodes: # name is a p-code + return name, True + # name looks like a p-code, but doesn't match p-codes + # so try adjusting p-code length + pcode = self.convert_admin_pcode_length( + countryiso3, pcode, logname ) - if pcode: - return pcode, True - if not fuzzy_match: - return None, True - pcode = self.fuzzy_pcode(countryiso3, name, logname) - return pcode, False + return pcode, True + else: + name_to_pcode = self.name_to_pcode.get(countryiso3) + if name_to_pcode is not None: + pcode = name_to_pcode.get(name.lower()) + if pcode: + return pcode, True + if not fuzzy_match: + return None, True + pcode = self.fuzzy_pcode(countryiso3, name, logname) + return pcode, False def output_matches(self) -> List[str]: """Output log of matches diff --git a/tests/fixtures/global_pcode_lengths.csv b/tests/fixtures/global_pcode_lengths.csv new file mode 100644 index 0000000..88425b7 --- /dev/null +++ b/tests/fixtures/global_pcode_lengths.csv @@ -0,0 +1,159 @@ +Location,Country Length,Admin 1 Length,Admin 2 Length,Admin 3 Length,Admin 4 Length +#country+code,#country+len,#adm1+len,#adm2+len,#adm3+len,#adm4+len +AFG,2,2,2,, +AGO,2,2,3,3, +ALB,2,2,2,2, +ARE,2,2,,, +ARG,2,3,3,, +ARM,2,2,1,3, +ATG,2,2,,, +AZE,2,8,,, +BDI,3,3,3,, +BEN,2,2,2,, +BES,3,1,,, +BFA,2,2,2,2, +BGD,2,2,2,2,2 +BGR,2,3,3,, +BLR,2,3,3,, +BLZ,2,2,,, +BMU,2,2,1,, +BOL,2,2,2,2, +BRA,2,2,5,, +BRB,2,2,,, +BTN,2,3,2,, +BWA,2,2,2,2, +CAF,2,2,1,1,2 +CHL,2,2,1,2, +CHN,2,3,3,, +CIV,2,2,2,2, +CMR,2,3,3,3, +COD,2,2,2,, +COG,2,2,2,, +COL,2,2,3,, +COM,2,1,1,1, +CPV,2,2,2,, +CRI,2,1,2,2, +CUB,2,2,2,, +CUW,2,2,,, +CYM,2,2,,, +DJI,2,2,2,, +DMA,2,2,,, +DOM,2,2,2,2,2 +DZA,2,3,3,, +ECU,2,2,2,2, +EGY,2,2,2,, +ERI,2,1,2,, +ESH,2,2,,, +ETH,2,2,2,2, +FJI,2,1,2,2, +FSM,2,1,2,, +GAB,3,3,3,, +GEO,2,2,2,, +GHA,2,2,2,, +GIN,2,3,3,2, +GLP,2,2,2,, +GMB,2,2,2,2, +GNB,2,2,2,, +GNQ,2,3,3,, +GRD,2,2,,, +GTM,2,2,2,, +GUF,2,1,3,, +GUY,2,2,2,, +HND,2,2,2,, +HTI,2,2,2,3, +HUN,2,3,3,, +IDN,2,2,2,3, +IRN,2,3,3,, +IRQ,2,3,3,3, +JAM,2,2,,, +JPN,2,2,3,, +KAZ,3,3,3,, +KEN,2,3,3,, +KGZ,2,11,0,0, +KHM,2,2,2,2, +KIR,2,1,2,, +KNA,2,2,,, +KWT,2,2,,, +LAO,2,2,2,, +LBN,2,1,1,, +LBR,2,2,2,, +LBY,2,2,2,, +LCA,2,2,9,, +LKA,2,1,1,2,3 +LSO,2,1,2,, +MAR,2,3,3,7,1 +MDA,2,3,,, +MDG,2,2,7|3,-1|3,3 +MDV,2,3,3,3, +MEX,2,2,3,, +MHL,2,2,2,, +MLI,2,2,2,2, +MMR,3,3,4,, +MNG,2,2,2,, +MOZ,2,2,2,2, +MRT,2,2,2,, +MSR,2,2,,, +MTQ,2,2,2,, +MUS,2,2,,, +MWI,2,1,2,2, +MYS,2,2,2,, +NAM,2,2,2,, +NER,3,3,3,3, +NGA,2,3,3,3, +NIC,2,2,2,, +NPL,2,2,5,, +OMN,2,2,2,, +PAK,2,1,2,2, +PAN,2,2,2,2, +PER,2,2,2,2, +PHL,2,9,0,0,0 +PNG,2,2,2,2, +POL,2,3,3,, +PRI,2,2,,, +PRK,2,2,2,, +PRY,2,2,2,, +PSE,2,2,2,, +QAT,3,3,3,, +ROU,2,3,3,, +RUS,2,3,3,, +RWA,2,1,1,2,2 +SAU,2,2,,, +SDN,2,2,3,, +SEN,2,2,2,2, +SLB,2,2,4,4, +SLE,2,2,2,2,2 +SLV,2,2,2,, +SOM,2,2,2,, +SSD,2,2,2,2, +STP,2,2,2,, +SUR,2,2,2,, +SVK,2,3,3,, +SWZ,2,1,2,, +SXM,2,1,1,, +SYC,2,1,1,4, +SYR,2,2,2,2, +TCA,2,1,2,, +TCD,2,2,2,2, +TGO,2,2,2,2, +THA,2,2,2,2, +TJK,0,7,0,, +TLS,2,2,2,2, +TON,2,1,1,2, +TTO,2,2,,, +TUN,2,1,1,2,2 +TUR,3,3,3,3,3 +TZA,2,2,2,3|4, +UGA,2,1,3,2,2 +UKR,2,2,2,3,3 +URY,2,2,3,, +UZB,2,2,3,, +VCT,2,1,2,, +VEN,2,2,2,2, +VGB,2,2,,, +VIR,2,3,5,, +VNM,2,3,2,, +VUT,2,2,3,, +YEM,2,2,2,2, +ZAF,2,1,2,1,3 +ZMB,2,3,3,3,3 +ZWE,2,2,2,2, diff --git a/tests/hdx/location/test_adminlevel.py b/tests/hdx/location/test_adminlevel.py index f4e7997..3d229b9 100755 --- a/tests/hdx/location/test_adminlevel.py +++ b/tests/hdx/location/test_adminlevel.py @@ -15,7 +15,11 @@ def config(self): @pytest.fixture(scope="function") def url(self): - return "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-country/admin_dataset/tests/fixtures/global_pcodes_adm_1_2.csv" + return "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-country/main/tests/fixtures/global_pcodes_adm_1_2.csv" + + @pytest.fixture(scope="function") + def formats_url(self): + return "https://raw.githubusercontent.com/OCHA-DAP/hdx-python-country/pcode_formats/tests/fixtures/global_pcode_lengths.csv" def test_adminlevel(self, config): adminone = AdminLevel(config) @@ -65,15 +69,27 @@ def test_adminlevel(self, config): True, ) assert adminone.get_pcode("ABC", "NE004", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("ABC", "BLAH", logname="test") == ( None, False, ) config["countries_fuzzy_try"].append("ABC") assert adminone.get_pcode("ABC", "NE004", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("ABC", "BLAH", logname="test") == ( None, False, ) assert adminone.get_pcode("XYZ", "XYZ123", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("XYZ", "BLAH", logname="test") == ( None, False, ) @@ -103,6 +119,15 @@ def test_adminlevel(self, config): "UA74", False, ) + assert adminone.get_pcode( + "UKR", + "Chernihiv Oblast", + fuzzy_match=False, + logname="test", + ) == ( + None, + True, + ) assert adminone.get_pcode("ZWE", "ABCDEFGH", logname="test") == ( None, False, @@ -160,7 +185,7 @@ def test_adminlevel_with_url(self, config, url): adminone = AdminLevel(config) adminone.setup_from_url() assert adminone.get_admin_level("YEM") == 1 - assert len(adminone.get_pcode_list()) == 2553 + assert len(adminone.get_pcode_list()) == 2552 assert adminone.get_pcode_length("YEM") == 4 assert adminone.get_pcode("YEM", "YE30", logname="test") == ( "YE30", @@ -199,15 +224,27 @@ def test_adminlevel_with_url(self, config, url): True, ) assert adminone.get_pcode("ABC", "NE004", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("ABC", "BLAH", logname="test") == ( None, False, ) config["countries_fuzzy_try"].append("ABC") assert adminone.get_pcode("ABC", "NE004", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("ABC", "BLAH", logname="test") == ( None, False, ) assert adminone.get_pcode("XYZ", "XYZ123", logname="test") == ( + None, + True, + ) + assert adminone.get_pcode("XYZ", "BLAH", logname="test") == ( None, False, ) @@ -262,3 +299,193 @@ def test_adminlevel_with_url(self, config, url): "test - Could not find ABC in map names!", "test - NER: Could not find ABCDEFGH in map names!", ] + + def test_adminlevel_pcode_formats(self, config, url, formats_url): + adminone = AdminLevel(config) + adminone.setup_from_url(admin_url=url) + adminone.load_pcode_formats(formats_url=formats_url) + assert adminone.convert_admin_pcode_length("YEM", "YEME123") is None + assert adminone.get_pcode("YEM", "YE30", logname="test") == ( + "YE30", + True, + ) + assert adminone.get_pcode("YEM", "YEM30", logname="test") == ( + "YE30", + True, + ) + assert adminone.get_pcode("YEM", "YEM030", logname="test") == ( + "YE30", + True, + ) + assert adminone.get_pcode("NGA", "NG015", logname="test") == ( + "NG015", + True, + ) + assert adminone.get_pcode("NGA", "NG15", logname="test") == ( + "NG015", + True, + ) + assert adminone.get_pcode("NGA", "NGA015", logname="test") == ( + "NG015", + True, + ) + assert adminone.get_pcode("NER", "NER004", logname="test") == ( + "NER004", + True, + ) + assert adminone.get_pcode("NER", "NE04", logname="test") == ( + "NER004", + True, + ) + assert adminone.get_pcode("NER", "NE004", logname="test") == ( + "NER004", + True, + ) + assert adminone.get_pcode("ABC", "NE004", logname="test") == ( + None, + True, + ) + + admintwo = AdminLevel(config, admin_level=2) + admintwo.setup_from_url(admin_url=url) + assert admintwo.get_pcode( + "YEM", "YE03001", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + + admintwo.load_pcode_formats(formats_url=formats_url) + assert admintwo.get_pcode("YEM", "YE3001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("YEM", "YEM3001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("YEM", "YEM03001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("YEM", "YE301", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("YEM", "YEM30001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("YEM", "YEM030001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("NGA", "NG015001", logname="test") == ( + "NG015001", + True, + ) + assert admintwo.get_pcode("NGA", "NG15001", logname="test") == ( + "NG015001", + True, + ) + assert admintwo.get_pcode("NGA", "NGA015001", logname="test") == ( + "NG015001", + True, + ) + assert admintwo.get_pcode("NGA", "NG1501", logname="test") == ( + "NG015001", + True, + ) + # Algorithm inserts 0 to make NG001501 and hence fails (NG001 is in any + # case a valid admin 1) + assert admintwo.get_pcode( + "NGA", "NG01501", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + # Algorithm can only insert one zero per admin level right now + assert admintwo.get_pcode( + "NGA", "NG0151", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + assert admintwo.get_pcode( + "NGA", "NG151", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + assert admintwo.get_pcode("NER", "NER004009", logname="test") == ( + "NER004009", + True, + ) + assert admintwo.get_pcode("NER", "NE04009", logname="test") == ( + "NER004009", + True, + ) + # Algorithm inserts 0 to make NER000409 and hence fails (it has no + # knowledge that NER000 is an invalid admin 1) + assert admintwo.get_pcode( + "NER", "NE00409", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + + assert admintwo.get_pcode( + "DZA", "DZ009009", fuzzy_match=False, logname="test" + ) == ( + "DZ009009", + True, + ) + assert admintwo.get_pcode( + "DZA", "DZ0090009", fuzzy_match=False, logname="test" + ) == ( + "DZ009009", + True, + ) + + assert admintwo.get_pcode( + "COL", "CO08849", fuzzy_match=False, logname="test" + ) == ( + "CO08849", + True, + ) + # Algorithm removes 0 to make CO80849 and hence fails (it has no + # knowledge that CO80 is an invalid admin 1) + assert admintwo.get_pcode( + "COL", "CO080849", fuzzy_match=False, logname="test" + ) == ( + None, + True, + ) + + admintwo.set_parent_admins_from_adminlevels([adminone]) + # The lookup in admin1 reveals that adding a 0 prefix to the admin1 + # is not a valid admin1 (NER000) so the algorithm tries adding + # the 0 prefix at the admin2 level instead and hence succeeds + assert admintwo.get_pcode("NER", "NE00409", logname="test") == ( + "NER004009", + True, + ) + # The lookup in admin1 reveals that removing the 0 prefix from the + # admin1 is not a valid admin1 (CO80849) so the algorithm tries + # removing the 0 prefix at the admin2 level instead and hence succeeds + assert admintwo.get_pcode( + "COL", "CO080849", fuzzy_match=False, logname="test" + ) == ( + "CO08849", + True, + ) + + admintwo.set_parent_admins([adminone.pcodes]) + assert admintwo.get_pcode("YEM", "YEM03001", logname="test") == ( + "YE3001", + True, + ) + assert admintwo.get_pcode("NGA", "NG1501", logname="test") == ( + "NG015001", + True, + )