diff --git a/README.md b/README.md index 664be3e4c..d0770f871 100644 --- a/README.md +++ b/README.md @@ -59,12 +59,15 @@ Contains the JSON schemas used to validate the feeds in the integration tests. | - country_code | Text |Required | ISO 3166-1 alpha-2 code designating the country where the feed service is located. For a list of valid codes [see here](https://en.wikipedia.org/wiki/List_of_ISO_3166_country_codes). | | - subdivision_name | Text |Optional | ISO 3166-2 subdivision name designating the subdivision (e.g province, state, region) where the feed service is located. For a list of valid names [see here](https://unece.org/trade/uncefact/unlocode-country-subdivisions-iso-3166-2).| | - municipality | Text |Optional | Primary municipality in which the feed service is located.| -| - bounding_box | Object|System generated | Bounding box of the feed when it was first added to the catalog. Contains `minimum_latitude`, `maximum_latitude`, `minimum_longitude`, `maximum_longitude` and `extracted_on` fields. If the bounding box information displays as "null", you can check any potential feed errors with [the GTFS validator](https://github.com/MobilityData/gtfs-validator). | +| - bounding_box | Object|System generated | The bounding box of the feed and metadata about the GTFS archive when it was first added or updated in the catalog. Contains `minimum_latitude`, `maximum_latitude`, `minimum_longitude`, `maximum_longitude`, `extracted_on`, `extracted_filesize`, `extracted_date_start`, and `extracted_date_end` fields. If the bounding box information displays as "null", you can check any potential feed errors with [the GTFS validator](https://github.com/MobilityData/gtfs-validator). | --minimum_latitude | Latitude | System generated | The minimum latitude for the feed's bounding box. | --maximum_latitude | Latitude | System generated | The maximum latitude for the feed's bounding box. | --minimum_longitude | Longitude | System generated | The minimum longitude for the feed's bounding box. | --maximum_longitude | Longitude | System generated | The maximum longitude for the feed's bounding box. -| --extracted_on | Date and Time | System generated | The date and timestamp the bounding box was extracted on in UTC. +| --extracted_on | Date and Time | System generated | The date and timestamp when the bounding box was extracted on in UTC. +| --extracted_filesize | Integer | System generated | The filesize in bytes of the GTFS archive when the bounding box was extracted. +| --extracted_calendar_start | Date | System generated | The date earliest covered by calendar/calendar_dates when the bounding box was extracted. +| --extracted_calendar_end | Date | System generated | The date latest covered by calendar/calendar_dates when the bounding box was extracted. | provider | Text | Required | A commonly used name for the transit provider included in the feed. | | feed_contact_email | Text | Optional | The contact information for the data producer of the feed, discovered via feed_info.feed_contact_email in the feed, the provider's website, or the Mobility Database contributor form. | | name | Text |Optional | An optional description of the feed, e.g to specify if the feed is an aggregate of multiple providers, or which network is represented by the feed. | diff --git a/schemas/gtfs_schedule_source_schema.json b/schemas/gtfs_schedule_source_schema.json index fb6395d66..4167251c7 100644 --- a/schemas/gtfs_schedule_source_schema.json +++ b/schemas/gtfs_schedule_source_schema.json @@ -42,58 +42,45 @@ }, "bounding_box": { "type": "object", - "description": "This is the bounding box of the data source when it was first added to the catalog. It includes the date and timestamp the bounding box was extracted in UTC.", - "oneOf": [ - { - "properties": { - "minimum_latitude": { - "type": "number", - "minimum": -90, - "maximum": 90 - }, - "maximum_latitude": { - "type": "number", - "minimum": -90, - "maximum": 90 - }, - "minimum_longitude": { - "type": "number", - "minimum": -180, - "maximum": 180 - }, - "maximum_longitude": { - "type": "number", - "minimum": -180, - "maximum": 180 - }, - "extracted_on": { - "type": "string", - "format": "date-time" - } - } + "description": "This is the bounding box of the data source and metadata when it was first added to the catalog. It includes the date and timestamp the bounding box was extracted in UTC, filesize in bytes, and calendar daterange.", + "properties": { + "minimum_latitude": { + "type": ["number", "null"], + "minimum": -90, + "maximum": 90 }, - { - "properties": { - "minimum_latitude": { - "type": "null" - }, - "maximum_latitude": { - "type": "null" - }, - "minimum_longitude": { - "type": "null" - }, - "maximum_longitude": { - "type": "null" - }, - "extracted_on": { - "type": "string", - "format": "date-time" - } - } + "maximum_latitude": { + "type": ["number", "null"], + "minimum": -90, + "maximum": 90 + }, + "minimum_longitude": { + "type": ["number", "null"], + "minimum": -180, + "maximum": 180 + }, + "maximum_longitude": { + "type": ["number", "null"], + "minimum": -180, + "maximum": 180 + }, + "extracted_on": { + "type": "string", + "format": "date-time" + }, + "extracted_filesize": { + "type": ["number", "null"] + }, + "extracted_calendar_start": { + "type": ["string", "null"], + "format": "date" + }, + "extracted_calendar_end": { + "type": ["string", "null"], + "format": "date" } - ], - "required": ["minimum_latitude", "maximum_latitude", "minimum_longitude", "maximum_longitude", "extracted_on"] + }, + "required": ["minimum_latitude", "maximum_latitude", "minimum_longitude", "maximum_longitude", "extracted_on", "extracted_filesize", "extracted_calendar_start", "extracted_calendar_end"] } }, "required": ["country_code", "bounding_box"] diff --git a/scripts/export_to_csv.py b/scripts/export_to_csv.py index 9d5b3b7c2..26129f6d1 100644 --- a/scripts/export_to_csv.py +++ b/scripts/export_to_csv.py @@ -28,6 +28,9 @@ 'location.bounding_box.minimum_longitude', 'location.bounding_box.maximum_longitude', 'location.bounding_box.extracted_on', + 'location.bounding_box.extracted_filesize', + 'location.bounding_box.extracted_calendar_start', + 'location.bounding_box.extracted_calendar_end', 'status', 'features', 'redirect.id', diff --git a/tools/constants.py b/tools/constants.py index d1b7ffabd..e1565a334 100644 --- a/tools/constants.py +++ b/tools/constants.py @@ -7,7 +7,9 @@ STOP_LON = "stop_lon" START_SERVICE_AREA_ID = "start_service_area_id" START_SERVICE_AREA_RADIUS = "start_service_area_radius" +START_DATE = "start_date" END_DATE = "end_date" +DATE = "date" GTFS_DATE_FORMAT = "%Y%m%d" PATHWAYS_TXT = "pathways.txt" FARES_ATTRIBUTES_TXT = "fares_attributes.txt" @@ -75,6 +77,9 @@ MINIMUM_LONGITUDE = "minimum_longitude" MAXIMUM_LONGITUDE = "maximum_longitude" EXTRACTED_ON = "extracted_on" +EXTRACTED_FILESIZE= "extracted_filesize" +EXTRACTED_CALENDAR_START = "extracted_calendar_start" +EXTRACTED_CALENDAR_END = "extracted_calendar_end" URLS = "urls" DIRECT_DOWNLOAD = "direct_download" LICENSE = "license" diff --git a/tools/helpers.py b/tools/helpers.py index 1a0791175..24028055e 100644 --- a/tools/helpers.py +++ b/tools/helpers.py @@ -9,6 +9,10 @@ from unidecode import unidecode import uuid from tools.constants import ( + START_DATE, + END_DATE, + DATE, + GTFS_DATE_FORMAT, STOP_LAT, STOP_LON, MDB_ARCHIVES_LATEST_URL_TEMPLATE, @@ -292,6 +296,21 @@ def create_filename( extension=extension, ) +def is_gtfs_yyyymmdd_format(string): + """ + Determines if the given string is in standard GTFS YYYYMMDD date format. + + Args: + string (str): Date string to test against. + + Returns: + bool: True if can be parsed as a standard GTFS YYYYMMDD date string. + """ + try: + datetime.datetime.strptime(string, GTFS_DATE_FORMAT) + return True + except ValueError: + return False def normalize(string): """ @@ -335,6 +354,15 @@ def get_iso_time(): .isoformat() ) +def get_filesize(path): + """ + Gets the filesize of the given file path. + + Returns: + int: Filesize in bytes of the given file path. + """ + return os.stat(path).st_size + ######################### # GTFS SPECIFIC FUNCTIONS @@ -405,3 +433,41 @@ def extract_gtfs_bounding_box(file_path): maximum_longitude = stops[STOP_LON].dropna().max() if stops_are_present else None return minimum_latitude, maximum_latitude, minimum_longitude, maximum_longitude + +def extract_gtfs_calendar_range(file_path): + """ + Extracts the min and max dates of a GTFS source using the `calendar` & `calendar_dates` files from the GTFS dataset. + + This function loads a GTFS dataset and determines the earliest (min) and latest (max) date referenced + based on the calendar and calendar_dates in the dataset. + + Args: + file_path (str): The file path to the GTFS dataset. + + Returns: + tuple: A tuple with the minimum and maximum calendar dates formatted in standard YYYY-MM-DD format. + + Notes: + If both calendar and calendar_dates files are missing or columns are invalid, returned value will be a tuple with 2 None values. + """ + dataset = load_gtfs(file_path) + dates = [] + + if dataset.calendar is not None: + dates.append(dataset.calendar[START_DATE]) + dates.append(dataset.calendar[END_DATE]) + if dataset.calendar_dates is not None: + dates.append(dataset.calendar_dates[DATE]) + if len(dates) == 0: + return None, None + + all_dates = pd.concat(dates).dropna() + filtered_dates = all_dates[all_dates.apply(is_gtfs_yyyymmdd_format)] + if len(filtered_dates) == 0: + return None, None + + min_date_yyyymmdd = filtered_dates.min() + max_date_yyyymmdd = filtered_dates.max() + min_date = datetime.datetime.strptime(min_date_yyyymmdd, GTFS_DATE_FORMAT).strftime('%Y-%m-%d') + max_date = datetime.datetime.strptime(max_date_yyyymmdd, GTFS_DATE_FORMAT).strftime('%Y-%m-%d') + return min_date, max_date diff --git a/tools/representations.py b/tools/representations.py index c7101fe0a..4ea06bab1 100644 --- a/tools/representations.py +++ b/tools/representations.py @@ -6,7 +6,9 @@ is_readable, load_gtfs, extract_gtfs_bounding_box, + extract_gtfs_calendar_range, get_iso_time, + get_filesize, create_latest_url, to_json, create_filename, @@ -30,6 +32,9 @@ MINIMUM_LONGITUDE, MAXIMUM_LONGITUDE, EXTRACTED_ON, + EXTRACTED_FILESIZE, + EXTRACTED_CALENDAR_START, + EXTRACTED_CALENDAR_END, URLS, DIRECT_DOWNLOAD, LICENSE, @@ -492,7 +497,10 @@ class GtfsScheduleSource(Source): bbox_max_lat (float): Maximum latitude of the bounding box. bbox_min_lon (float): Minimum longitude of the bounding box. bbox_max_lon (float): Maximum longitude of the bounding box. - bbox_extracted_on (str): Date when the bounding box was extracted. + bbox_extracted_on (str): Date-time when the bounding box was extracted. + bbox_extracted_filesize (int): Filesize in bytes when the bounding box was extracted. + bbox_extracted_calendar_start (str): Date earliest covered by calendar/calendar_dates when the bounding box was extracted. + bbox_extracted_calendar_end (str): Date latest covered by calendar/calendar_dates when the bounding box was extracted. latest_url (str): URL for the latest version of the GTFS data. feed_contact_email (str, optional): Contact email for the GTFS feed. redirects (list): List of redirect URLs, if any. @@ -529,6 +537,9 @@ def __init__(self, **kwargs): self.bbox_min_lon = bounding_box.pop(MINIMUM_LONGITUDE) self.bbox_max_lon = bounding_box.pop(MAXIMUM_LONGITUDE) self.bbox_extracted_on = bounding_box.pop(EXTRACTED_ON) + self.bbox_extracted_filesize = bounding_box.pop(EXTRACTED_FILESIZE, None) + self.bbox_extracted_calendar_start = bounding_box.pop(EXTRACTED_CALENDAR_START, None) + self.bbox_extracted_calendar_end = bounding_box.pop(EXTRACTED_CALENDAR_END, None) urls = kwargs.pop(URLS, {}) self.latest_url = urls.pop(LATEST) self.feed_contact_email = kwargs.pop(FEED_CONTACT_EMAIL, None) @@ -548,6 +559,9 @@ def __str__(self): MINIMUM_LONGITUDE: self.bbox_min_lon, MAXIMUM_LONGITUDE: self.bbox_max_lon, EXTRACTED_ON: self.bbox_extracted_on, + EXTRACTED_FILESIZE: self.bbox_extracted_filesize, + EXTRACTED_CALENDAR_START: self.bbox_extracted_calendar_start, + EXTRACTED_CALENDAR_END: self.bbox_extracted_calendar_end, DIRECT_DOWNLOAD: self.direct_download_url, AUTHENTICATION_TYPE: self.authentication_type, AUTHENTICATION_INFO: self.authentication_info_url, @@ -624,6 +638,9 @@ def update(self, **kwargs): self.bbox_max_lon, ) = extract_gtfs_bounding_box(file_path=dataset_path) self.bbox_extracted_on = get_iso_time() + self.bbox_extracted_filesize = get_filesize(dataset_path) + self.bbox_extracted_calendar_start, self.bbox_extracted_calendar_end = extract_gtfs_calendar_range(dataset_path) + # Delete the downloaded dataset because we don't need it anymore os.remove(dataset_path) @@ -689,6 +706,8 @@ def build(cls, **kwargs): maximum_longitude, ) = extract_gtfs_bounding_box(file_path=dataset_path) extracted_on = get_iso_time() + extracted_filesize = get_filesize(dataset_path) + extracted_calendar_start, extracted_calendar_end = extract_gtfs_calendar_range(dataset_path) # Delete the downloaded dataset because we don't need it anymore os.remove(dataset_path) @@ -719,6 +738,9 @@ def build(cls, **kwargs): minimum_longitude=minimum_longitude, maximum_longitude=maximum_longitude, extracted_on=extracted_on, + extracted_calendar_start=extracted_calendar_start, + extracted_calendar_end=extracted_calendar_end, + extracted_filesize=extracted_filesize, latest=latest, **kwargs, ) @@ -745,6 +767,9 @@ def schematize(cls, **kwargs): MINIMUM_LONGITUDE: kwargs.pop(MINIMUM_LONGITUDE), MAXIMUM_LONGITUDE: kwargs.pop(MAXIMUM_LONGITUDE), EXTRACTED_ON: kwargs.pop(EXTRACTED_ON), + EXTRACTED_FILESIZE: kwargs.pop(EXTRACTED_FILESIZE), + EXTRACTED_CALENDAR_START: kwargs.pop(EXTRACTED_CALENDAR_START), + EXTRACTED_CALENDAR_END: kwargs.pop(EXTRACTED_CALENDAR_END), }, }, URLS: { diff --git a/tools/tests/test_helpers.py b/tools/tests/test_helpers.py index 9d51010ab..050b5a971 100644 --- a/tools/tests/test_helpers.py +++ b/tools/tests/test_helpers.py @@ -11,8 +11,12 @@ get_iso_time, load_gtfs, extract_gtfs_bounding_box, + extract_gtfs_calendar_range, STOP_LAT, STOP_LON, + START_DATE, + END_DATE, + DATE, to_json, from_json, normalize, @@ -362,6 +366,83 @@ def test_extract_gtfs_bounding_box_stops_present(self, mock_load_gtfs): under_test = extract_gtfs_bounding_box(file_path=self.test_path) self.assertEqual(under_test, test_bounding_box) + @patch("tools.helpers.load_gtfs") + def test_extract_gtfs_calendar_range_no_calendar_or_calendar_dates(self, mock_load_gtfs): + test_return_min_max = (None, None) + test_calendar = None + type(mock_load_gtfs.return_value).calendar = test_calendar + test_calendar_dates = None + type(mock_load_gtfs.return_value).calendar_dates = test_calendar_dates + under_test = extract_gtfs_calendar_range(file_path=self.test_path) + self.assertEqual(under_test, test_return_min_max) + + @patch("tools.helpers.load_gtfs") + def test_extract_gtfs_calendar_range_invalid_calendar(self, mock_load_gtfs): + test_return_min_max = (None, None) + test_calendar = pd.DataFrame( + { + # Note: only YYYYMMDD valid per GTFS spec; YYYY-MM-DD & nil values dropped + START_DATE: ["2024-02-30", pd.NA], + END_DATE: ["2034-02-01", pd.NA] + } + ) + type(mock_load_gtfs.return_value).calendar = test_calendar + test_calendar_dates = None + type(mock_load_gtfs.return_value).calendar_dates = test_calendar_dates + under_test = extract_gtfs_calendar_range(file_path=self.test_path) + self.assertEqual(under_test, test_return_min_max) + + @patch("tools.helpers.load_gtfs") + def test_extract_gtfs_calendar_range_only_calendar(self, mock_load_gtfs): + test_return_min_max = ('2010-01-02', '2032-04-09') + test_calendar = pd.DataFrame( + { + # Note: only YYYYMMDD valid per GTFS spec; YYYY-MM-DD & nil values dropped + START_DATE: ["20100102", "20230702", "20230402", "2024-02-30", pd.NA], + END_DATE: ["20140104", "20230709", "20320409", "2034-02-01", pd.NA] + } + ) + type(mock_load_gtfs.return_value).calendar = test_calendar + test_calendar_dates = None + type(mock_load_gtfs.return_value).calendar_dates = test_calendar_dates + under_test = extract_gtfs_calendar_range(file_path=self.test_path) + self.assertEqual(under_test, test_return_min_max) + + @patch("tools.helpers.load_gtfs") + def test_extract_gtfs_calendar_range_only_calendar_dates(self, mock_load_gtfs): + test_return_min_max = ('2021-07-02', '2029-04-02') + test_calendar = None + type(mock_load_gtfs.return_value).calendar = test_calendar + test_calendar_dates = pd.DataFrame( + { + # Note: only YYYYMMDD valid per GTFS spec; YYYY-MM-DD & nil values dropped + DATE: ["20240102", "20210702", "20290402", "2027-02-30", pd.NA], + } + ) + type(mock_load_gtfs.return_value).calendar_dates = test_calendar_dates + under_test = extract_gtfs_calendar_range(file_path=self.test_path) + self.assertEqual(under_test, test_return_min_max) + + @patch("tools.helpers.load_gtfs") + def test_extract_gtfs_calendar_range_both_calendar_and_calendar_dates(self, mock_load_gtfs): + test_return_min_max = ('1999-01-02', '2031-07-02') + test_calendar = pd.DataFrame( + { + # Note: only YYYYMMDD valid per GTFS spec; YYYY-MM-DD & nil values dropped + START_DATE: ["19990102", "20230702", "20230402", "2024-02-30", pd.NA], + END_DATE: ["20240104", "20230709", "20230409", "2034-02-01", pd.NA] + } + ) + type(mock_load_gtfs.return_value).calendar = test_calendar + test_calendar_dates = pd.DataFrame( + { + # Note: only YYYYMMDD valid per GTFS spec; YYYY-MM-DD & nil values dropped + DATE: ["20240102", "20310702", "20290402", "2027-02-30", pd.NA], + } + ) + type(mock_load_gtfs.return_value).calendar_dates = test_calendar_dates + under_test = extract_gtfs_calendar_range(file_path=self.test_path) + self.assertEqual(under_test, test_return_min_max) class TestInOutFunctions(TestCase): def setUp(self): diff --git a/tools/tests/test_representations.py b/tools/tests/test_representations.py index 2de38f953..f3c9c05c8 100644 --- a/tools/tests/test_representations.py +++ b/tools/tests/test_representations.py @@ -24,6 +24,9 @@ MINIMUM_LONGITUDE, MAXIMUM_LONGITUDE, EXTRACTED_ON, + EXTRACTED_FILESIZE, + EXTRACTED_CALENDAR_START, + EXTRACTED_CALENDAR_END, DIRECT_DOWNLOAD, LATEST, LICENSE, @@ -303,6 +306,9 @@ def setUp(self): self.test_min_lon = "some_min_lon" self.test_max_lon = "some_max_lon" self.test_extracted_on = "some_extraction_time" + self.test_extracted_filesize = "some_extraction_filesize" + self.test_extracted_calendar_start = "some_extraction_calendar_start" + self.test_extracted_calendar_end = "some_extraction_calendar_end" self.test_direct_download_url = "some_direct_download_url" self.test_authentication_type = "some_authentication_type" self.test_authentication_info_url = "some_authentication_info_url" @@ -323,6 +329,9 @@ def setUp(self): MINIMUM_LONGITUDE: self.test_min_lon, MAXIMUM_LONGITUDE: self.test_max_lon, EXTRACTED_ON: self.test_extracted_on, + EXTRACTED_FILESIZE: self.test_extracted_filesize, + EXTRACTED_CALENDAR_START: self.test_extracted_calendar_start, + EXTRACTED_CALENDAR_END: self.test_extracted_calendar_end, DIRECT_DOWNLOAD: self.test_direct_download_url, AUTHENTICATION_TYPE: self.test_authentication_type, AUTHENTICATION_INFO: self.test_authentication_info_url, @@ -349,6 +358,9 @@ def setUp(self): MINIMUM_LONGITUDE: self.test_min_lon, MAXIMUM_LONGITUDE: self.test_max_lon, EXTRACTED_ON: self.test_extracted_on, + EXTRACTED_FILESIZE: self.test_extracted_filesize, + EXTRACTED_CALENDAR_START: self.test_extracted_calendar_start, + EXTRACTED_CALENDAR_END: self.test_extracted_calendar_end, }, }, URLS: { @@ -450,6 +462,8 @@ def test_has_status(self): @patch("tools.representations.os") @patch("tools.representations.get_iso_time") + @patch("tools.representations.get_filesize") + @patch("tools.representations.extract_gtfs_calendar_range") @patch("tools.representations.extract_gtfs_bounding_box") @patch("tools.representations.is_readable") @patch("tools.representations.download_dataset") @@ -458,6 +472,8 @@ def test_update( mock_download_dataset, mock_read_func, mock_bounding_box, + mock_calendar, + mock_filesize, mock_time, mock_os, ): @@ -476,6 +492,9 @@ def test_update( self.assertEqual(under_test.bbox_min_lon, self.test_min_lon) self.assertEqual(under_test.bbox_max_lon, self.test_max_lon) self.assertEqual(under_test.bbox_extracted_on, self.test_extracted_on) + self.assertEqual(under_test.bbox_extracted_filesize, self.test_extracted_filesize) + self.assertEqual(under_test.bbox_extracted_calendar_start, self.test_extracted_calendar_start) + self.assertEqual(under_test.bbox_extracted_calendar_end, self.test_extracted_calendar_end) self.assertEqual(under_test.provider, self.test_provider) self.assertEqual(under_test.name, self.test_name) self.assertEqual(under_test.country_code, self.test_country_code) @@ -491,6 +510,9 @@ def test_update( test_min_lon = "another_min_lon" test_max_lon = "another_max_lon" test_extracted_on = "another_extraction_time" + test_extracted_filesize = "another_extraction_filesize" + test_extracted_calendar_start = "another_extraction_calendar_start" + test_extracted_calendar_end = "another_extraction_calendar_end" test_provider = "another_provider" test_name = "another_name" test_country_code = "another_country_code" @@ -506,6 +528,11 @@ def test_update( test_max_lon, ) mock_time.return_value = test_extracted_on + mock_filesize.return_value = test_extracted_filesize + mock_calendar.return_value = ( + test_extracted_calendar_start, + test_extracted_calendar_end + ) under_test = instance.update( **{ PROVIDER: test_provider, @@ -531,6 +558,9 @@ def test_update( self.assertEqual(under_test.bbox_min_lon, test_min_lon) self.assertEqual(under_test.bbox_max_lon, test_max_lon) self.assertEqual(under_test.bbox_extracted_on, test_extracted_on) + self.assertEqual(under_test.bbox_extracted_filesize, test_extracted_filesize) + self.assertEqual(under_test.bbox_extracted_calendar_start, test_extracted_calendar_start) + self.assertEqual(under_test.bbox_extracted_calendar_end, test_extracted_calendar_end) self.assertEqual(under_test.provider, test_provider) self.assertEqual(under_test.name, test_name) self.assertEqual(under_test.country_code, test_country_code) @@ -544,6 +574,8 @@ def test_update( @patch("tools.representations.create_latest_url") @patch("tools.representations.create_filename") @patch("tools.representations.get_iso_time") + @patch("tools.representations.get_filesize") + @patch("tools.representations.extract_gtfs_calendar_range") @patch("tools.representations.extract_gtfs_bounding_box") @patch("tools.representations.is_readable") @patch("tools.representations.download_dataset") @@ -552,6 +584,8 @@ def test_build( mock_download_dataset, mock_read_func, mock_bounding_box, + mock_calendar, + mock_filesize, mock_time, mock_filename, mock_latest_url, @@ -571,6 +605,11 @@ def test_build( "some_max_lon", ) mock_time.return_value = "some_time" + mock_filesize.return_value = "some_filesize" + mock_calendar.return_value = ( + "some_calendar_start", + "some_calendar_end", + ) mock_filename.return_value = "some_filename" mock_latest_url.return_value = "some_latest_url" mock_schema.return_value = deepcopy(self.test_schema) @@ -580,6 +619,9 @@ def test_build( del self.test_kwargs[MINIMUM_LONGITUDE] del self.test_kwargs[MAXIMUM_LONGITUDE] del self.test_kwargs[EXTRACTED_ON] + del self.test_kwargs[EXTRACTED_FILESIZE] + del self.test_kwargs[EXTRACTED_CALENDAR_START] + del self.test_kwargs[EXTRACTED_CALENDAR_END] del self.test_kwargs[LATEST] under_test = GtfsScheduleSource.build(**self.test_kwargs) self.assertIsNotNone(under_test)