diff --git a/notebooks/gtfs/check_unmatched_id_warnings.py b/notebooks/gtfs/check_unmatched_id_warnings.py index 21c064c4..6e0c4f1d 100644 --- a/notebooks/gtfs/check_unmatched_id_warnings.py +++ b/notebooks/gtfs/check_unmatched_id_warnings.py @@ -46,8 +46,8 @@ feed.trips, pd.DataFrame( { - "service_id": [101], - "route_id": [20304], + "service_id": ["101023"], + "route_id": ["2030445"], "trip_id": ["VJbedb4cfd0673348e017d42435abbdff3ddacbf89"], "trip_headsign": ["Newport"], "block_id": [np.nan], @@ -70,8 +70,7 @@ feed.routes, pd.DataFrame( { - "service_id": [101], - "route_id": [20304], + "route_id": ["20304"], "agency_id": ["OL5060"], "route_short_name": ["X145"], "route_long_name": [np.nan], @@ -91,3 +90,5 @@ # the pipeline and therefore the user will be made aware. It is also flagged # as an error which means that 'the GTFS is violated' # (https://mrcagney.github.io/gtfs_kit_docs/). + +# %% diff --git a/tests/gtfs/test_unmatched_id_warnings.py b/tests/gtfs/test_unmatched_id_warnings.py deleted file mode 100644 index 68f05bd7..00000000 --- a/tests/gtfs/test_unmatched_id_warnings.py +++ /dev/null @@ -1,152 +0,0 @@ -"""Tests for inserting invalid data into various GTFS tables.""" -import pytest -import pandas as pd -import numpy as np - -from transport_performance.gtfs.validation import GtfsInstance - - -@pytest.fixture(scope="function") -def gtfs_fixture(): - """Test GtfsInstance() fixture.""" - gtfs = GtfsInstance() - return gtfs - - -@pytest.mark.runinteg -class TestUmatchedIDWarnings(object): - """Tests for unmatch ID warnings in GTFS data.""" - - def test_unmatched_id_warnings_calendar(self, gtfs_fixture): - """Tests for unmatched IDs in the calendar table.""" - # insert invalid data - gtfs_fixture.feed.calendar = pd.concat( - [ - gtfs_fixture.feed.calendar, - pd.DataFrame( - { - "service_id": [101], - "monday": [0], - "tuesday": [0], - "wednesday": [0], - "thursday": [0], - "friday": [0], - "saturday": [0], - "sunday": [0], - "start_date": ["20200104"], - "end_date": ["20230301"], - } - ), - ], - axis=0, - ) - - expected_message = { - "type": "error", - "message": "Invalid service_id; maybe has extra space characters", - "table": "calendar", - "rows": [0], - } - - # ensure raised errors match expected errors - assert expected_message == gtfs_fixture.is_valid().iloc[0].to_dict(), ( - "GTFS validation failed to identify an unmatched ID in " - "the calendar table" - ) - - @pytest.mark.runinteg - def test_unmatched_id_warnings_trips(self, gtfs_fixture): - """Tests for unmatched IDs in the trips table.""" - # insert invalid data - gtfs_fixture.feed.trips = pd.concat( - [ - gtfs_fixture.feed.trips, - pd.DataFrame( - { - "service_id": [101], - "route_id": [20304], - "trip_id": [ - "VJbedb4cfd0673348e017d42435abbdff3ddacbf89" - ], - "trip_headsign": ["Newport"], - "block_id": [np.nan], - "shape_id": [ - "RPSPc4c99ac6aff7e4648cbbef785f88427a48efa80f" - ], - "wheelchair_accessible": [0], - "trip_direction_name": [np.nan], - "vehicle_journey_code": ["VJ109"], - } - ), - ], - axis=0, - ) - - expected_errors = { - "type": {1: "error", 2: "error", 9: "warning"}, - "message": { - 1: "Undefined route_id", - 2: "Undefined service_id", - 9: "Trip has no stop times", - }, - "table": {1: "trips", 2: "trips", 9: "trips"}, - "rows": {1: [0], 2: [0], 9: [0]}, - } - - found_errors = ( - gtfs_fixture.is_valid() - .reset_index(drop=True) - .iloc[[1, 2, 9]] - .to_dict() - ) - - # ensure raised errors match expected errors - assert expected_errors == found_errors, ( - "GTFS validation failed to identify an unmatched IDs and " - "invalid data in the trips table" - ) - - @pytest.mark.runinteg - def test_unmatched_id_warnings_routes(self, gtfs_fixture): - """Tests for unmatched IDs in the routes table.""" - # insert invalid data - gtfs_fixture.feed.routes = pd.concat( - [ - gtfs_fixture.feed.routes, - pd.DataFrame( - { - "service_id": [101], - "route_id": [20304], - "agency_id": ["OL5060"], - "route_short_name": ["X145"], - "route_long_name": [np.nan], - "route_type": [200], - } - ), - ], - axis=0, - ) - - expected_errors = { - "type": {0: "error", 1: "error", 6: "warning"}, - "message": { - 0: "Invalid route_id; maybe has extra space characters", - 1: "Undefined agency_id", - 6: "Route has no trips", - }, - "table": {0: "routes", 1: "routes", 6: "routes"}, - "rows": {0: [0], 1: [0], 6: [0]}, - } - - found_errors = ( - gtfs_fixture.is_valid() - .reset_index(drop=True) - .iloc[[0, 1, 6]] - .to_dict() - ) - - # ensure raised errors match expected errors - assert expected_errors == found_errors, ( - "GTFS validation failed to identify an unmatched IDs and invalid " - "data in the routes table" - ) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index 40b9c3e8..038d988f 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -117,6 +117,149 @@ def test_is_valid(self, gtfs_fixture): found_cols == exp_cols ).all(), f"Expected columns {exp_cols}. Found: {found_cols}" + @pytest.mark.runinteg + def test_trips_unmatched_ids(self, gtfs_fixture): + """Tests to evaluate gtfs-klt's reaction to invalid IDs in trips. + + Parameters + ---------- + gtfs_fixture : GtfsInstance + a GtfsInstance test fixure + + """ + feed = gtfs_fixture.feed + + # add row to tripas table with invald trip_id, route_id, service_id + feed.trips = pd.concat( + [ + feed.trips, + pd.DataFrame( + { + "service_id": ["101023"], + "route_id": ["2030445"], + "trip_id": [ + "VJbedb4cfd0673348e017d42435abbdff3ddacbf89" + ], + "trip_headsign": ["Newport"], + "block_id": [np.nan], + "shape_id": [ + "RPSPc4c99ac6aff7e4648cbbef785f88427a48efa80f" + ], + "wheelchair_accessible": [0], + "trip_direction_name": [np.nan], + "vehicle_journey_code": ["VJ109"], + } + ), + ], + axis=0, + ) + + # assert different errors/warnings haave been raised + new_valid = feed.validate() + assert ( + len(new_valid[new_valid.message == "Undefined route_id"]) == 1 + ), "gtfs-kit failed to recognise invalid route_id" + assert ( + len(new_valid[new_valid.message == "Undefined service_id"]) == 1 + ), "gtfs-kit failed to recognise invalid service_id" + assert ( + len(new_valid[new_valid.message == "Trip has no stop times"]) == 1 + ), "gtfs-kit failed to recognise invalid service_id" + assert len(new_valid) == 10, "Validation table not expected size" + + @pytest.mark.runinteg + def test_routes_unmatched_ids(self, gtfs_fixture): + """Tests to evaluate gtfs-klt's reaction to invalid IDs in routes. + + Parameters + ---------- + gtfs_fixture : GtfsInstance + a GtfsInstance test fixure + + """ + feed = gtfs_fixture.feed + + # add row to tripas table with invald trip_id, route_id, service_id + feed.routes = pd.concat( + [ + feed.routes, + pd.DataFrame( + { + "route_id": ["20304"], + "agency_id": ["OL5060"], + "route_short_name": ["X145"], + "route_long_name": [np.nan], + "route_type": [200], + } + ), + ], + axis=0, + ) + + # assert different errors/warnings haave been raised + new_valid = feed.validate() + assert ( + len(new_valid[new_valid.message == "Undefined agency_id"]) == 1 + ), "gtfs-kit failed to recognise invalid agency_id" + assert ( + len(new_valid[new_valid.message == "Route has no trips"]) == 1 + ), "gtfs-kit failed to recognise that there are routes with no trips" + assert len(new_valid) == 9, "Validation table not expected size" + + @pytest.mark.runinteg + def test_unmatched_service_id_behaviour(self, gtfs_fixture): + """Tests to evaluate gtfs-klt's reaction to invalid IDs in calendar. + + Parameters + ---------- + gtfs_fixture : GtfsInstance + a GtfsInstance test fixure + + Notes + ----- + 'gtfs-kit' does not care about invalid service IDs in the calendar # + table. The Calendar table can have data with any service_id as long as + the datatypes are correct. + This is can be seen in this unit tests as the test is testing this + functionality. + + """ + feed = gtfs_fixture.feed + original_error_count = len(feed.validate()) + + # introduce a dummy row with a non matching service_id + feed.calendar = pd.concat( + [ + feed.calendar, + pd.DataFrame( + { + "service_id": ["1018872"], + "monday": [0], + "tuesday": [0], + "wednesday": [0], + "thursday": [0], + "friday": [0], + "saturday": [0], + "sunday": [0], + "start_date": ["20200104"], + "end_date": ["20230301"], + } + ), + ], + axis=0, + ) + new_error_count = len(feed.validate()) + assert ( + new_error_count == original_error_count + ), "Unrecognised error in validaation table" + + # drop a row from the calendar table + feed.calendar.drop(3, inplace=True) + new_valid = feed.validate() + assert ( + len(new_valid[new_valid.message == "Undefined service_id"]) == 1 + ), "gtfs-kit failed to identify missing service_id" + @patch("builtins.print") def test_print_alerts_defence(self, mocked_print, gtfs_fixture): """Check defensive behaviour of print_alerts()."""