forked from toddwschneider/nyc-taxi-data
-
Notifications
You must be signed in to change notification settings - Fork 0
/
import_fhv_trip_data.sh
executable file
·29 lines (23 loc) · 1.11 KB
/
import_fhv_trip_data.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
#!/bin/bash
year_month_regex="tripdata_([0-9]{4})-([0-9]{2})"
fhv_schema_pre_2017="(dispatching_base_num,pickup_datetime,pickup_location_id)"
fhv_schema_2017_h1="(dispatching_base_num,pickup_datetime,dropoff_datetime,pickup_location_id,dropoff_location_id)"
fhv_schema_2017_h2="(dispatching_base_num,pickup_datetime,dropoff_datetime,pickup_location_id,dropoff_location_id,shared_ride)"
for filename in data/fhv_tripdata*.csv; do
[[ $filename =~ $year_month_regex ]]
year=${BASH_REMATCH[1]}
month=$((10#${BASH_REMATCH[2]}))
if [ $year -lt 2017 ]; then
schema=$fhv_schema_pre_2017
elif [ $year -eq 2017 ] && [ $month -lt 7 ]; then
schema=$fhv_schema_2017_h1
else
schema=$fhv_schema_2017_h2
fi
echo "`date`: beginning load for ${filename}"
cat $filename | psql nyc-taxi-data -c "COPY fhv_trips_staging ${schema} FROM stdin CSV HEADER;"
echo "`date`: finished raw load for ${filename}"
psql nyc-taxi-data -f setup_files/populate_fhv_trips.sql
echo "`date`: loaded trips for ${filename}"
done;
psql nyc-taxi-data -c "CREATE INDEX ON fhv_trips USING BRIN (pickup_datetime) WITH (pages_per_range = 32);"