diff --git a/census/core.py b/census/core.py index fcd441c..552d6e5 100644 --- a/census/core.py +++ b/census/core.py @@ -36,7 +36,42 @@ def list_or_str(v): return v return [v] +def cast_nulls(func): + """ + Decorator to format null values in API result casting functions. + """ + def null_wrapper(v, cast_nulls): + # This value indicates that there were too few observations to compute + # an estimate. See: + # https://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html + if str(v) == '-666666666': + if cast_nulls is True: + return None + else: + raise NullValueException('Unhandled coded value: ', str(v)) + else: + return func(v) + return null_wrapper + +@cast_nulls +def to_str(v): + """ + Cast an API result to a string. + """ + return str(v) + +@cast_nulls +def to_float(v): + """ + Cast an API result to a float. + """ + return float(v) + +@cast_nulls def float_or_str(v): + """ + Try casting an API result to a float, and fall back to a string. + """ try: return float(v) except ValueError: @@ -92,6 +127,10 @@ class UnsupportedYearException(CensusException): pass +class NullValueException(Exception): + pass + + class Client(object): endpoint_url = 'https://api.census.gov/data/%s/%s' definitions_url = 'https://api.census.gov/data/%s/%s/variables.json' @@ -158,6 +197,10 @@ def get(self, fields, geo, year=None, **kwargs): @retry_on_transient_error def query(self, fields, geo, year=None, **kwargs): + cast_nulls = kwargs.get('cast_nulls', True) + if cast_nulls not in [True, False]: + raise CensusException('cast_nulls argument must be True or False') + if year is None: year = self.default_year @@ -187,10 +230,30 @@ def query(self, fields, geo, year=None, **kwargs): headers = data.pop(0) types = [self._field_type(header, year) for header in headers] - results = [{header : (cast(item) if item is not None else None) - for header, cast, item - in zip(headers, types, d)} - for d in data] + results = [] + error = False + for d in data: + result = {} + for header, cast, item in zip(headers, types, d): + if item is not None: + try: + var_value = cast(item, cast_nulls) + except NullValueException: + # This value needs to raise an error, but we need the + # rest of the result values for context, so flag the + # error and continue the iteration + error = True + var_value = item + else: + var_value = None + result[header] = var_value + if error: + msg = 'Null estimate code found: ' + str(result) + msg += '\nSee the Census documentation for more information:' + msg += '\nhttps://www.census.gov/data/developers/data-sets/acs-1year/notes-on-acs-estimate-and-annotation-values.html' + raise CensusException(msg) + else: + results.append(result) return results elif resp.status_code == 204: @@ -204,17 +267,17 @@ def _field_type(self, field, year): url = self.definition_url % (year, self.dataset, field) resp = self.session.get(url) - types = {"fips-for" : str, - "fips-in" : str, + types = {"fips-for" : to_str, + "fips-in" : to_str, "int" : float_or_str, - "float": float, - "string": str} + "float": to_float, + "string": to_str} if resp.status_code == 200: predicate_type = resp.json().get("predicateType", "string") return types[predicate_type] else: - return str + return to_str @supported_years() def us(self, fields, **kwargs): diff --git a/census/tests/test_census.py b/census/tests/test_census.py index ed09652..23228b6 100644 --- a/census/tests/test_census.py +++ b/census/tests/test_census.py @@ -8,7 +8,7 @@ import requests from census.core import ( - Census, UnsupportedYearException) + Census, UnsupportedYearException, CensusException) KEY = os.environ.get('CENSUS_KEY', '') @@ -121,6 +121,67 @@ def test_la_canada_2015(self): ) +class TestCodedValues(CensusTestCase): + """ + Tests for handling coded values, like -666666666 and -999999999. + """ + def test_handle_666666666(self): + """ + Test the default behavior of handling -666666666 values, which is to + cast them to null. + """ + # This call should return a value of -666666666 + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016) + self.assertEqual(return_val[0]['B19081_001E'], None) + + def test_handle_666666666_as_error(self): + """ + Test raising an error for -666666666 values. + """ + with self.assertRaises(CensusException): + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016, + cast_nulls=False) + + def test_handle_666666666_as_null(self): + """ + Test casting -666666666 values to null. + """ + return_val = self._client.acs5.state_county_tract('B19081_001E', + 42, + 101, + '989100', + year=2016, + cast_nulls=True) + self.assertEqual(return_val[0]['B19081_001E'], None) + + def test_bad_cast_nulls_argument(self): + """ + Test that an error gets raised for poorly-formated cast_nulls argument. + """ + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls='foobar') + + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls=None) + + with self.assertRaises(CensusException): + return_val = self._client.acs5.state('NAME', + Census.ALL, + cast_nulls=10) + + class TestEndpoints(CensusTestCase): def check_endpoints(self, client_name, tests, **kwargs):