Skip to content

Commit

Permalink
Use compact numeric IDs for GTFS objects, shrinking a London-wide fil…
Browse files Browse the repository at this point in the history
…e from 2GB down to 1.3GB
  • Loading branch information
dabreegster committed May 24, 2024
1 parent d1b95a2 commit b0b7097
Show file tree
Hide file tree
Showing 8 changed files with 117 additions and 50 deletions.
2 changes: 1 addition & 1 deletion backend/src/graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ impl Graph {
for a in &self.amenities {
features.push(a.to_gj(&self.mercator));
}
for s in self.gtfs.stops.values() {
for s in &self.gtfs.stops {
features.push(s.to_gj(&self.mercator));
}

Expand Down
62 changes: 62 additions & 0 deletions backend/src/gtfs/ids.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
use std::collections::BTreeMap;

use anyhow::Result;
use serde::{Deserialize, Serialize};

/// The full string IDs used in GTFS
pub mod orig_ids {
use serde::{Deserialize, Serialize};

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StopID(String);

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct TripID(String);
}

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StopID(pub usize);

#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct TripID(pub usize);

impl CheapID for StopID {
fn new(x: usize) -> Self {
Self(x)
}
}
impl CheapID for TripID {
fn new(x: usize) -> Self {
Self(x)
}
}

pub trait CheapID: Copy {
fn new(x: usize) -> Self;
}

#[derive(Serialize, Deserialize)]
pub struct IDMapping<K: Ord, V> {
orig_to_cheap: BTreeMap<K, V>,
// We don't need to store the inverse. It's more convenient for each object to own that.
}

impl<K: Clone + std::fmt::Debug + Ord, V: CheapID> IDMapping<K, V> {
pub fn new() -> Self {
Self {
orig_to_cheap: BTreeMap::new(),
}
}

pub fn insert_new(&mut self, orig: K) -> Result<V> {
let cheap = V::new(self.orig_to_cheap.len());
if self.orig_to_cheap.insert(orig.clone(), cheap).is_some() {
bail!("IDMapping::insert_new has duplicate input for {:?}", orig);
}
Ok(cheap)
}

pub fn get(&self, orig: &K) -> Option<V> {
self.orig_to_cheap.get(orig).cloned()
}
}
27 changes: 12 additions & 15 deletions backend/src/gtfs/mod.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
use std::collections::BTreeMap;
use std::time::Duration;

use chrono::NaiveTime;
Expand All @@ -7,22 +6,26 @@ use geojson::{Feature, Geometry};
use serde::{Deserialize, Serialize};
use utils::Mercator;

use self::ids::orig_ids;
pub use self::ids::{StopID, TripID};
use crate::graph::RoadID;

mod ids;
mod scrape;

// TODO cheap numeric IDs, later
// TODO days of the week, exceptions, etc. a daily model for now.

#[derive(Serialize, Deserialize)]
pub struct GtfsModel {
pub stops: BTreeMap<StopID, Stop>,
pub trips: BTreeMap<TripID, Trip>,
// Indexed by StopID and TripID
pub stops: Vec<Stop>,
pub trips: Vec<Trip>,
}

#[derive(Serialize, Deserialize)]
pub struct Stop {
pub name: String,
pub orig_id: orig_ids::StopID,
pub point: Point,
pub road: RoadID,
// Sorted by time1
Expand All @@ -44,26 +47,20 @@ pub struct Trip {
pub stop_sequence: Vec<(StopID, NaiveTime)>,
}

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct StopID(String);

#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Serialize, Deserialize)]
pub struct TripID(String);

impl GtfsModel {
pub fn empty() -> Self {
Self {
stops: BTreeMap::new(),
trips: BTreeMap::new(),
stops: Vec::new(),
trips: Vec::new(),
}
}

/// Starting from a stop at some time, find all the next trips going somewhere, waiting up to
/// max_wait.
pub fn trips_from(&self, stop1: &StopID, time: NaiveTime, max_wait: Duration) -> Vec<NextStep> {
// TODO Improve with compact IDs, binary search, etc
pub fn trips_from(&self, stop1: StopID, time: NaiveTime, max_wait: Duration) -> Vec<NextStep> {
// TODO Binary search
let mut results = Vec::new();
for next_step in &self.stops[stop1].next_steps {
for next_step in &self.stops[stop1.0].next_steps {
// These are sorted by time, so give up after we've seen enough
if next_step.time1 > time + max_wait {
break;
Expand Down
53 changes: 29 additions & 24 deletions backend/src/gtfs/scrape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,16 @@ use geo::{Contains, Point};
use serde::Deserialize;
use utils::Mercator;

use super::ids::{orig_ids, IDMapping};
use super::{GtfsModel, NextStep, Stop, StopID, Trip, TripID};
use crate::graph::RoadID;

impl GtfsModel {
/// Takes a path to a GTFS directory
pub fn parse(dir_path: &str, mercator: &Mercator) -> Result<GtfsModel> {
println!("Scraping stops.txt");
let mut stops: BTreeMap<StopID, Stop> = BTreeMap::new();
let mut stop_ids: IDMapping<orig_ids::StopID, StopID> = IDMapping::new();
let mut stops: Vec<Stop> = Vec::new();
for rec in
csv::Reader::from_reader(File::open(format!("{dir_path}/stops.txt"))?).deserialize()
{
Expand All @@ -26,20 +28,19 @@ impl GtfsModel {
continue;
}

stops.insert(
rec.stop_id,
Stop {
name: rec.stop_name,
point: mercator.to_mercator(&point),
next_steps: Vec::new(),
// Dummy value, fill out later
road: RoadID(0),
},
);
stop_ids.insert_new(rec.stop_id.clone())?;
stops.push(Stop {
name: rec.stop_name,
orig_id: rec.stop_id,
point: mercator.to_mercator(&point),
next_steps: Vec::new(),
// Dummy value, fill out later
road: RoadID(0),
});
}

let mut trips: BTreeMap<TripID, Trip> = BTreeMap::new();
println!("Scraping stop_times.txt");
let mut trips_table: BTreeMap<orig_ids::TripID, Trip> = BTreeMap::new();
for rec in csv::Reader::from_reader(File::open(format!("{dir_path}/stop_times.txt"))?)
.deserialize()
{
Expand All @@ -50,34 +51,38 @@ impl GtfsModel {
};

// Skip out-of-bounds stops
if !stops.contains_key(&rec.stop_id) {
let Some(stop_id) = stop_ids.get(&rec.stop_id) else {
continue;
}
};

trips
trips_table
.entry(rec.trip_id)
.or_insert_with(|| Trip {
stop_sequence: Vec::new(),
})
.stop_sequence
.push((rec.stop_id, arrival_time));
.push((stop_id, arrival_time));
}

// Produce a compact Trips vec
let trips: Vec<Trip> = trips_table.into_values().collect();

// Precompute the next steps from each stop
for (trip_id, trip) in &trips {
for (idx, trip) in trips.iter().enumerate() {
let trip_id = TripID(idx);
for pair in trip.stop_sequence.windows(2) {
let (stop1, time1) = &pair[0];
let (stop2, time2) = &pair[1];
stops.get_mut(&stop1).unwrap().next_steps.push(NextStep {
stops[stop1.0].next_steps.push(NextStep {
time1: *time1,
trip: trip_id.clone(),
stop2: stop2.clone(),
trip: trip_id,
stop2: *stop2,
time2: *time2,
});
}
}

for stop in stops.values_mut() {
for stop in &mut stops {
stop.next_steps.sort_by_key(|x| x.time1);
}

Expand All @@ -87,15 +92,15 @@ impl GtfsModel {

#[derive(Deserialize)]
struct StopRow {
stop_id: StopID,
stop_id: orig_ids::StopID,
stop_name: String,
stop_lon: f64,
stop_lat: f64,
}

#[derive(Deserialize)]
struct StopTimeRow {
trip_id: TripID,
stop_id: StopID,
trip_id: orig_ids::TripID,
stop_id: orig_ids::StopID,
arrival_time: String,
}
2 changes: 2 additions & 0 deletions backend/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#[macro_use]
extern crate anyhow;
#[macro_use]
extern crate log;

use std::sync::Once;
Expand Down
1 change: 0 additions & 1 deletion backend/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ fn main() -> Result<()> {
println!("Usage: osm.pbf [gtfs directory]");
std::process::exit(1);
}
// TODO Enable a simple logger backend

let timer = Timer::new("build graph", None);
let osm_bytes = std::fs::read(&args[1])?;
Expand Down
8 changes: 5 additions & 3 deletions backend/src/scrape.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ use crate::graph::{
AmenityID, Direction, Graph, Intersection, IntersectionID, IntersectionLocation, Mode, Road,
RoadID,
};
use crate::gtfs::GtfsModel;
use crate::gtfs::{GtfsModel, StopID};
use crate::route::Router;
use crate::timer::Timer;

Expand Down Expand Up @@ -284,10 +284,12 @@ fn snap_stops(roads: &mut Vec<Road>, gtfs: &mut GtfsModel, timer: &mut Timer) {
);

timer.step("find closest roads per stop");
for (stop_id, stop) in &mut gtfs.stops {
// TODO Make an iterator method that returns the IDs too
for (idx, stop) in gtfs.stops.iter_mut().enumerate() {
let stop_id = StopID(idx);
if let Some(r) = closest_road.nearest_neighbor(&stop.point.into()) {
// TODO Limit how far away we snap, or use the boundary polygon
roads[r.data.0].stops.push(stop_id.clone());
roads[r.data.0].stops.push(stop_id);
stop.road = r.data;
} else {
// TODO Need to get rid of the stop
Expand Down
12 changes: 6 additions & 6 deletions backend/src/transit_route.rs
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ pub fn route(graph: &Graph, start: IntersectionID, end: IntersectionID) -> Resul
PathStep::Transit { stop1, stop2, .. } => {
let mut f = Feature::from(Geometry::from(&graph.mercator.to_wgs84(
&LineString::new(vec![
graph.gtfs.stops[stop1].point.into(),
graph.gtfs.stops[stop2].point.into(),
graph.gtfs.stops[stop1.0].point.into(),
graph.gtfs.stops[stop2.0].point.into(),
]),
)));
f.set_property("kind", "transit");
Expand Down Expand Up @@ -89,19 +89,19 @@ pub fn route(graph: &Graph, start: IntersectionID, end: IntersectionID) -> Resul
for next_step in
graph
.gtfs
.trips_from(stop1, current.cost, Duration::from_secs(30 * 60))
.trips_from(*stop1, current.cost, Duration::from_secs(30 * 60))
{
// TODO Here's the awkwardness -- arrive at both the intersections for that
// road
let stop2_road = &graph.roads[graph.gtfs.stops[&next_step.stop2].road.0];
let stop2_road = &graph.roads[graph.gtfs.stops[next_step.stop2.0].road.0];
for i in [stop2_road.src_i, stop2_road.dst_i] {
if let Entry::Vacant(entry) = backrefs.entry(i) {
entry.insert((
current.value,
PathStep::Transit {
stop1: stop1.clone(),
stop1: *stop1,
trip: next_step.trip.clone(),
stop2: next_step.stop2.clone(),
stop2: next_step.stop2,
},
));
queue.push(PriorityQueueItem::new(next_step.time2, i));
Expand Down

0 comments on commit b0b7097

Please sign in to comment.