Skip to content

Commit

Permalink
Having no accepted requests no longer crashes analytics (#257)
Browse files Browse the repository at this point in the history
* Having no accepted requests no longer crashes analytics

* Modified groupby calls according to pandas FutureWarning

* Having no accepted requests no longer crashes analytics, part II
  • Loading branch information
fxjung authored Apr 10, 2024
1 parent 2a8e733 commit 47762de
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 20 deletions.
2 changes: 1 addition & 1 deletion src/ridepy/util/analytics/requests.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def _create_transportation_requests_dataframe(
# If transportation as submitted were submitted, calculate more properties.
# NOTE that these properties might equally well be computed using the
# inferred requests, but in case of differences between the requests
# the resulting change in behavior might not intended. Therefore so far
# the resulting change in behavior might not be intended. Therefore, so far
# we only compute these quantities if transportation_requests are submitted.

# - direct travel time
Expand Down
10 changes: 9 additions & 1 deletion src/ridepy/util/analytics/stops.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,15 @@ def _add_locations_to_stoplist_dataframe(*, reqs, stops, space) -> pd.DataFrame:

# use the requests' locations and reshape them into a DateFrame indexed by
# `request_id` and `delta_occupancy`
locations = reqs.loc[:, ("accepted", ["origin", "destination"])]
if "accepted" in reqs.columns:
locations = reqs.loc[:, ("accepted", ["origin", "destination"])]
else:
locations = pd.DataFrame(
index=reqs.index,
columns=pd.MultiIndex.from_product(
[["accepted"], ["origin", "destination"]]
),
)
locations.columns = locations.columns.droplevel(0).rename("delta_occupancy")
locations = locations.stack().rename("location")

Expand Down
45 changes: 27 additions & 18 deletions src/ridepy/util/analytics/vehicle.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import numpy as np
import pandas as pd


Expand Down Expand Up @@ -51,24 +52,32 @@ def get_vehicle_quantities(stops: pd.DataFrame, requests: pd.DataFrame) -> pd.Da
total_dist_driven = stops.groupby("vehicle_id")["dist_to_next"].sum()
total_time_driven = stops.groupby("vehicle_id")["time_to_next"].sum()

avg_direct_dist = serviced_requests.groupby(("serviced", "vehicle_id")).apply(
lambda gdf: gdf.submitted.direct_travel_distance.mean()
)

avg_direct_time = serviced_requests.groupby(("serviced", "vehicle_id")).apply(
lambda gdf: gdf.submitted.direct_travel_time.mean()
)

total_direct_dist = serviced_requests.groupby(("serviced", "vehicle_id")).apply(
lambda gdf: gdf.submitted.direct_travel_distance.sum()
)

total_direct_time = serviced_requests.groupby(("serviced", "vehicle_id")).apply(
lambda gdf: gdf.submitted.direct_travel_time.sum()
)

efficiency_dist = total_direct_dist / total_dist_driven
efficiency_time = total_direct_time / total_time_driven
if not serviced_requests.empty:
avg_direct_dist = serviced_requests.groupby(
("serviced", "vehicle_id"), group_keys=False
).apply(lambda gdf: gdf.submitted.direct_travel_distance.mean())

avg_direct_time = serviced_requests.groupby(
("serviced", "vehicle_id"), group_keys=False
).apply(lambda gdf: gdf.submitted.direct_travel_time.mean())

total_direct_dist = serviced_requests.groupby(
("serviced", "vehicle_id"), group_keys=False
).apply(lambda gdf: gdf.submitted.direct_travel_distance.sum())

total_direct_time = serviced_requests.groupby(
("serviced", "vehicle_id"), group_keys=False
).apply(lambda gdf: gdf.submitted.direct_travel_time.sum())

efficiency_dist = total_direct_dist / total_dist_driven
efficiency_time = total_direct_time / total_time_driven
else:
avg_direct_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)
avg_direct_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)
total_direct_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)
total_direct_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)
efficiency_dist = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)
efficiency_time = pd.Series(np.nan, index=avg_occupancy.index, dtype=float)

res = pd.DataFrame(
dict(
Expand Down
23 changes: 23 additions & 0 deletions test/test_analytics.py
Original file line number Diff line number Diff line change
Expand Up @@ -939,3 +939,26 @@ def test_get_stops_and_requests_with_actual_simulation():

assert len(stops) == 2020
assert len(requests) == 1000


def test_get_stops_and_requests_with_actual_simulation_none_accepted():
space = Euclidean1D()
rg = RandomRequestGenerator(rate=10, space=space)
transportation_requests = list(it.islice(rg, 1000))

fs = SlowSimpleFleetState(
initial_locations={k: 0 for k in range(10)},
seat_capacities=0,
space=space,
dispatcher=BruteForceTotalTravelTimeMinimizingDispatcher(),
vehicle_state_class=VehicleState,
)

events = list(fs.simulate(transportation_requests))

stops, requests = get_stops_and_requests(events=events, space=space)

assert len(stops) == 10 * 2 # only initial and final stops
assert len(requests) == 1000

vehicle_quantities = get_vehicle_quantities(stops, requests)

0 comments on commit 47762de

Please sign in to comment.