Skip to content

Commit

Permalink
Merge pull request #62 from SEL-Columbia/vr2262-issue-57-float-index-…
Browse files Browse the repository at this point in the history
…with-guard

Redoing PR for fixes #57 and #60 (and maybe #59)
  • Loading branch information
chrisnatali committed Apr 29, 2016
2 parents f319db2 + a064a38 commit c18d663
Show file tree
Hide file tree
Showing 6 changed files with 162 additions and 38 deletions.
8 changes: 4 additions & 4 deletions demo_sequencer.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
from sequencer import NetworkPlan
from sequencer.Models import EnergyMaximizeReturn

csv = '/Users/blogle/Downloads/3305/metrics-local.csv'
shp = '/Users/blogle/Downloads/3305/networks-proposed.shp'
csv = 'data/sumaila/input/metrics-local.csv'
shp = 'data/sumaila/input/networks-proposed.shp'

nwp = NetworkPlan(shp, csv, prioritize='Population')
nwp = NetworkPlan.from_files(shp, csv, prioritize='Population')
model = EnergyMaximizeReturn(nwp)

results = model.sequence()
model.output('/Users/blogle/Desktop/output/')
model.output('output')
2 changes: 2 additions & 0 deletions sequencer/Models.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,3 +12,5 @@ def _strip_cols(self):
def sequence(self):
super(EnergyMaximizeReturn, self).sequence()
self._strip_cols()
# return to be consistent with parent
return self.output_frame
79 changes: 54 additions & 25 deletions sequencer/NetworkPlan.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import fiona
import numpy as np
import networkx as nx
from scipy.sparse import csr_matrix
import scipy.sparse.csgraph as graph
import pandas as pd
import logging
import copy
Expand All @@ -19,31 +17,32 @@ class NetworkPlan(object):
NetworkPlan containing NetworkPlanner proposed network and
accompanying nodal metrics
Parameters
----------
shp : file or string (File, directory, or filename to read).
csv : string or file handle / StringIO.
Example
----------
NetworkPlan('/Users/blogle/Downloads/1643/networks-proposed.shp',
'/Users/blogle/Downloads/1643/metrics-local.csv')
"""
TOL = .5 # meters at the equator, tolerance is stricter towards the poles

def __init__(self, shp, csv, **kwargs):
self.shp_p, self.csv_p = shp, csv
self.priority_metric = kwargs['prioritize'] if 'prioritize' in kwargs else 'population'
def __init__(self, network, metrics, **kwargs):
self.priority_metric = kwargs.get('prioritize', 'population')
self.proj = kwargs.get('proj', 'utm')

logger.info('Asserting Input Projections Match')
self._assert_proj_match(shp, csv)
# FIXME:
# Remove the dependency that sequencer has on the
# original metrics file (this is terrible coupling)
# see sequencer:_clean_results()
self._original_metrics = metrics

self._init_helper(network, metrics)


def _init_helper(self, network, metrics):
"""
All initialization (cleaning up metrics, network, etc)
"""

# Load in and align input data
logger.info('Aligning Network Nodes With Input Metrics')
self._network, self._metrics = prep_data( nx.read_shp(shp),
pd.read_csv(csv, header=1),
loc_tol = self.TOL
)
self._network, self._metrics = prep_data(network,
metrics,
loc_tol = self.TOL)

self.coord_values = self.coords.values()

Expand All @@ -63,6 +62,33 @@ def __init__(self, shp, csv, **kwargs):
#Fillna values with Zero
self._metrics = self.metrics.fillna(0)


@classmethod
def from_files(cls, shp, csv, **kwargs):
"""
Parameters
----------
shp : file or string (File, directory, or filename to read).
csv : string or file handle / StringIO.
Example
----------
NetworkPlan.from_files('networks-proposed.shp',
'metrics-local.csv')
"""

logger.info('Asserting Input Projections Match')

cls._assert_proj_match(shp, csv)
# Use fiona to open the shapefile as this includes the projection type

shapefile = fiona.open(shp)
# Pass along the projection
kwargs['proj'] = shapefile.crs['proj']

return cls(nx.read_shp(shp), pd.read_csv(csv, header=1), **kwargs)

@classmethod
def _assert_proj_match(self, shp, csv):
"""Ensure that the projections match before continuing"""
# Use fiona to open the shapefile as this includes the projection type
Expand All @@ -83,9 +109,8 @@ def _assert_proj_match(self, shp, csv):
logger.error("csv and shp Projections Don't Match")
raise AssertionError("csv and shapefile Projections Don't Match")

# Save the state of the projection
self.proj = shapefile.crs['proj']



def assert_is_tree(self):

in_degree = self.network.in_degree()
Expand Down Expand Up @@ -254,12 +279,16 @@ def network(self):
"""returns the DiGraph Object representation of the graph"""
return self._network

@property
def original_metrics(self):
"""returns the original (unprocessed) metrics data_frame"""
return self._original_metrics

@property
def metrics(self):
"""returns the nodal metrics Pandas DataFrame"""
return self._metrics


def download_scenario(scenario_number, directory_name=None, username=None, password=None,
np_url='http://networkplanner.modilabs.org/'):

Expand Down Expand Up @@ -322,4 +351,4 @@ def write_file(name):
csv = os.path.join(directory_name, 'metrics-local.csv')
shp = os.path.join(directory_name, 'network-proposed.shp')

return NetworkPlan(shp, csv)
return NetworkPlan.from_files(shp, csv)
15 changes: 10 additions & 5 deletions sequencer/Sequencer.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,13 +127,18 @@ def _sequence(self):
def upstream_distance(self, node):
"""Computes the edge distance from a node to it's parent"""
parent = self.parent(node)
if parent != None:
if parent is not None:
return self.networkplan._distance(parent, node)
return 0.0

def sequence(self):
self.results = pd.DataFrame(self._sequence()).set_index('Sequence..Far.sighted.sequence')
"""
Compute the sequence (aka rank) of nodes and edges
This modifies the NetworkPlan member (so make a deep copy if you
need the original)
"""
self.results = pd.DataFrame(self._sequence(), dtype=object).set_index('Sequence..Far.sighted.sequence')
# Post process for output
self._build_node_wkt()
self._build_edge_wkt()
Expand Down Expand Up @@ -235,7 +240,7 @@ def _build_edge_wkt(self):
r = self.results
# Iterate through the nodes and their parent
for rank, fnode, tnode in zip(r.index, r['Sequence..Upstream.id'], r['Sequence..Vertex.id']):
if not np.isnan(fnode):
if fnode is not None:
# Set the edge attributes with those found in sequencing
self.networkplan.network.edge[fnode][tnode]['rank'] = int(rank)
self.networkplan.network.edge[fnode][tnode]['distance'] = float(self.networkplan._distance(fnode, tnode))
Expand Down Expand Up @@ -289,8 +294,8 @@ def _clean_results(self):
"""This joins the sequenced results on the metrics dataframe and reappends the dropped rows"""

logger.info('Joining Sequencer Results on Input Metrics')

orig = pd.read_csv(self.networkplan.csv_p, header=1)
# FIXME: Remove this dependency on original_metrics
orig = self.networkplan.original_metrics
orig.columns = parse_cols(orig)
self.networkplan.metrics.index.name = 'Sequence..Vertex.id'
sequenced_metrics = pd.merge(self.networkplan.metrics.reset_index(), self.results.reset_index(), on='Sequence..Vertex.id')
Expand Down
86 changes: 85 additions & 1 deletion sequencer/Tests/Test_Suite.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,90 @@ def gen_data():

return metrics, network.to_directed()


def gen_data_with_fakes():
"""
generate network and metrics where some of the network
nodes do not have corresponding metrics records
This should be sufficient for tests requiring fake nodes
network looks like (fake node starred, demand in parens)
6*
|
|
0(100) 3(12)
/ \ / \
/ \ / \
1(50) 2(25) 4(6) 5(3)
Also returns edge_rank: dict of edge -> rank
"""

# create disjoint graph with 2 trees, one rooted by a fake node
network = nx.graph.Graph()
edges = ((0, 1), (0, 2), (3, 4), (3, 5))
network.add_edges_from(edges)

# now add fake root to tree at 3
network.add_edge(6, 3)

# set coordinates (roughly match diagram above)
base_coord = np.array([10, 10])
fake_coord = np.array([20, 9])
coord_dict = {0: base_coord,
1: base_coord + [-1, 1],
2: base_coord + [1, 1],
3: fake_coord + [0, 1],
4: fake_coord + [-1, 2],
5: fake_coord + [1, 2],
6: fake_coord}

nx.set_node_attributes(network, 'coords', coord_dict)
# now set the metrics dataframe without the fake node
metrics_data = {'Demand...Projected.nodal.demand.per.year':
[100, 50, 25, 12, 6, 3],
'Population': [100, 50, 25, 12, 6, 3]}

metrics = DataFrame(metrics_data)
# Note, we skip fake node here
metrics['X'] = [ coord_dict[i][0] for i in range(6) ]
metrics['Y'] = [ coord_dict[i][1] for i in range(6) ]

# assign expected ranks to nodes, edges (the sequence)
# note:
# - ranks are 1-based and originally assigned to nodes
# - edges are assigned rank based on the "to" node
# - fake nodes are skipped when assigning rank
# (See Sequencer.sequencer._sequence for details)
node_rank = {0: 1, 1: 2, 2: 3, 3: 4, 4: 5, 5: 6}
edge_rank = {(0, 1): 2, (0, 2): 3, (6, 3): 4, (3, 4): 5, (3, 5): 6}
return metrics, network, node_rank, edge_rank


def test_sequencer_with_fakes():
"""
Make sure we work with fake nodes
"""

# for now, just make sure it runs without exceptions
metrics, network, node_rank, edge_rank = gen_data_with_fakes()
nwp = NetworkPlan(network, metrics, prioritize='Population', proj='wgs4')
model = EnergyMaximizeReturn(nwp)
results = model.sequence()

node_ids = results['Sequence..Vertex.id']
sequence_ids = results['Sequence..Far.sighted.sequence']
actual_node_rank = dict(zip(node_ids, sequence_ids))
actual_edge_rank = {k: v['rank'] for k, v in
model.networkplan.network.edge.iteritems()}
assert node_rank == actual_node_rank,\
"Node sequencing is not what was expected"
assert edge_rank == actual_edge_rank,\
"Edge sequencing is not what was expected"


class TestNetworkPlan(NetworkPlan):

def __init__(self):
Expand Down Expand Up @@ -144,7 +228,7 @@ def test_sequencer_compare():
input_dir = "data/sumaila/input"
csv_file = os.path.join(input_dir, "metrics-local.csv")
shp_file = os.path.join(input_dir, "networks-proposed.shp")
nwp = NetworkPlan(shp_file, csv_file, prioritize='Population')
nwp = NetworkPlan.from_files(shp_file, csv_file, prioritize='Population')
model = EnergyMaximizeReturn(nwp)

model.sequence()
Expand Down
10 changes: 7 additions & 3 deletions sequencer/Utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import numpy as np
from numpy import sin, cos, pi, arcsin, sqrt
import string
import collections

def prep_data(network, metrics, loc_tol=.5):
"""
Expand All @@ -15,7 +16,12 @@ def prep_data(network, metrics, loc_tol=.5):
"""

# convert the node names from coords to integers, cache the coords as attrs
network = nx.convert_node_labels_to_integers(network, label_attribute='coords')
# but ONLY if the nodes are themselves collections (which is the default for
# networkx shapefile import)
# otherwise, assume the coords attribute exists
if(len(network.nodes()) > 0 and
isinstance(network.nodes()[0], collections.Iterable)):
network = nx.convert_node_labels_to_integers(network, label_attribute='coords')

# convert special characters to dot notation
metrics.columns = parse_cols(metrics)
Expand All @@ -28,7 +34,6 @@ def prep_data(network, metrics, loc_tol=.5):

# cast coords to tuples (hashable)
node_df['coords'] = node_df['coords'].apply(tuple)
metrics['m_coords'] = metrics['m_coords'].apply(tuple)

# build a vector of all the coordinates in the metrics dataframe
coords_vec = np.vstack(metrics['m_coords'].values)
Expand All @@ -46,7 +51,6 @@ def fuzzy_match(coord):

# cast the coordinates back to tuples (hashable)
node_df['m_coords'] = node_df['m_coords'].apply(tuple)
metrics['m_coords'] = metrics['m_coords'].apply(tuple)

# now that we have identical metric coords in both node_df and metrics join on that column
metrics = pd.merge(metrics, node_df, on='m_coords', left_index=True).sort()
Expand Down

0 comments on commit c18d663

Please sign in to comment.