-
Notifications
You must be signed in to change notification settings - Fork 6
/
create_nyc_taxi_schema.sql
149 lines (137 loc) · 4.01 KB
/
create_nyc_taxi_schema.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
CREATE EXTENSION postgis;
CREATE TABLE green_tripdata_staging (
id serial primary key,
vendor_id varchar,
lpep_pickup_datetime varchar,
lpep_dropoff_datetime varchar,
store_and_fwd_flag varchar,
rate_code_id varchar,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count varchar,
trip_distance varchar,
fare_amount varchar,
extra varchar,
mta_tax varchar,
tip_amount varchar,
tolls_amount varchar,
ehail_fee varchar,
improvement_surcharge varchar,
total_amount varchar,
payment_type varchar,
trip_type varchar,
junk1 varchar,
junk2 varchar
);
/*
N.B. junk columns are there because green_tripdata file headers are
inconsistent with the actual data, e.g. header says 20 or 21 columns per row,
but data actually has 22 or 23 columns per row, which COPY doesn't like.
junk1 and junk2 should always be null
*/
CREATE TABLE yellow_tripdata_staging (
id serial primary key,
vendor_id varchar,
tpep_pickup_datetime varchar,
tpep_dropoff_datetime varchar,
passenger_count varchar,
trip_distance varchar,
pickup_longitude numeric,
pickup_latitude numeric,
rate_code_id varchar,
store_and_fwd_flag varchar,
dropoff_longitude numeric,
dropoff_latitude numeric,
payment_type varchar,
fare_amount varchar,
extra varchar,
mta_tax varchar,
tip_amount varchar,
tolls_amount varchar,
improvement_surcharge varchar,
total_amount varchar
);
CREATE TABLE uber_trips_staging (
id serial primary key,
pickup_datetime timestamp without time zone,
pickup_latitude numeric,
pickup_longitude numeric,
base_code varchar
);
CREATE TABLE uber_trips_2015 (
id serial primary key,
dispatching_base_num varchar,
pickup_datetime timestamp without time zone,
affiliated_base_num varchar,
location_id integer,
nyct2010_ntacode varchar
);
CREATE TABLE uber_taxi_zone_lookups (
location_id integer primary key,
borough varchar,
zone varchar,
nyct2010_ntacode varchar
);
CREATE TABLE cab_types (
id serial primary key,
type varchar
);
INSERT INTO cab_types (type) SELECT 'yellow';
INSERT INTO cab_types (type) SELECT 'green';
INSERT INTO cab_types (type) SELECT 'uber';
CREATE TABLE trips (
id serial primary key,
cab_type_id integer,
vendor_id varchar,
pickup_datetime timestamp without time zone,
dropoff_datetime timestamp without time zone,
store_and_fwd_flag char(1),
rate_code_id integer,
pickup_longitude numeric,
pickup_latitude numeric,
dropoff_longitude numeric,
dropoff_latitude numeric,
passenger_count integer,
trip_distance numeric,
fare_amount numeric,
extra numeric,
mta_tax numeric,
tip_amount numeric,
tolls_amount numeric,
ehail_fee numeric,
improvement_surcharge numeric,
total_amount numeric,
payment_type varchar,
trip_type integer,
pickup_nyct2010_gid integer,
dropoff_nyct2010_gid integer
);
SELECT AddGeometryColumn('trips', 'pickup', 4326, 'POINT', 2);
SELECT AddGeometryColumn('trips', 'dropoff', 4326, 'POINT', 2);
CREATE TABLE central_park_weather_observations_raw (
station_id varchar,
station_name varchar,
date date,
precipitation_tenths_of_mm numeric,
snow_depth_mm numeric,
snowfall_mm numeric,
max_temperature_tenths_degrees_celsius numeric,
min_temperature_tenths_degrees_celsius numeric,
average_wind_speed_tenths_of_meters_per_second numeric
);
CREATE INDEX index_weather_observations ON central_park_weather_observations_raw (date);
CREATE VIEW central_park_weather_observations AS
SELECT
date,
precipitation_tenths_of_mm / (100 * 2.54) AS precipitation,
snow_depth_mm / (10 * 2.54) AS snow_depth,
snowfall_mm / (10 * 2.54) AS snowfall,
max_temperature_tenths_degrees_celsius * 9 / 50 + 32 AS max_temperature,
min_temperature_tenths_degrees_celsius * 9 / 50 + 32 AS min_temperature,
CASE
WHEN average_wind_speed_tenths_of_meters_per_second >= 0
THEN average_wind_speed_tenths_of_meters_per_second / 10 * (100 * 60 * 60) / (2.54 * 12 * 5280)
END AS average_wind_speed
FROM central_park_weather_observations_raw;