-
Notifications
You must be signed in to change notification settings - Fork 0
/
legacy_datasources.py
114 lines (78 loc) · 2.57 KB
/
legacy_datasources.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
import os
import orca
import pandas as pd
import yaml
from urbansim.utils import misc
#####################
# INJECTABLES
#####################
@orca.injectable('settings', cache=True)
def settings():
with open(os.path.join(misc.configs_dir(), "legacy_settings.yaml")) as f:
settings = yaml.load(f)
orca.settings = settings
return settings
@orca.injectable('store', cache=True)
def hdfstore(settings):
return pd.HDFStore(
os.path.join(misc.data_dir(), settings["store"]),
mode='r')
@orca.injectable('net_store', cache=True)
def net_store(settings):
return pd.HDFStore(
os.path.join(misc.data_dir(), settings["net_store"]),
mode='r')
#####################
# TABLES
#####################
@orca.table('households', cache=True)
def households(store):
df = store['households']
df = df[df.building_id > 0]
p = store['parcels']
b = store['buildings']
b['luz'] = misc.reindex(p.luz_id, b.parcel_id)
df['base_luz'] = misc.reindex(b.luz, df.building_id)
df['segmentation_col'] = 1
return df
@orca.table('buildings', cache=True)
def buildings(store):
df = store['assessor_transactions']
df["index"] = df.index
df.drop_duplicates(subset='index', keep='last', inplace=True)
del df["index"]
df.index.name = 'building_id'
return df
@orca.table('parcels', cache=True)
def parcels(store):
df = store['parcels']
df['acres'] = df.parcel_acres
# Delete duplicate index (parcel_id)
df['rownum'] = df.index
df = df.drop_duplicates(subset='rownum', keep='last')
del df['rownum']
return df
@orca.table('jobs', cache=True)
def jobs(store):
df = store['jobs']
df = df[df.building_id > 0]
return df
#####################
# BROADCASTS
#####################
orca.broadcast('parcels', 'buildings', cast_index=True, onto_on='parcel_id')
orca.broadcast('buildings', 'households', cast_index=True, onto_on='building_id')
orca.broadcast('buildings', 'jobs', cast_index=True, onto_on='building_id')
orca.broadcast('nodes', 'buildings', cast_index=True, onto_on='node_id')
#####################
# VIRTUAL COLUMNS
#####################
@orca.column('households', 'node_id', cache=True)
def node_id(households, buildings):
return misc.reindex(buildings.node_id, households.building_id)
@orca.column('buildings', 'node_id', cache=True)
def node_id(buildings, parcels):
return misc.reindex(parcels.node_id, buildings.parcel_id)
@orca.column('jobs', 'node_id', cache=True)
def node_id(jobs, buildings):
return misc.reindex(buildings.node_id, jobs.building_id)