forked from rkistner/chinese-postman
-
Notifications
You must be signed in to change notification settings - Fork 0
/
postman.py
executable file
·402 lines (319 loc) · 13.9 KB
/
postman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
#!/usr/bin/env python2.7
from __future__ import print_function
import os
import sys
import tempfile
import subprocess
__author__ = 'Ralf Kistner'
# Use the bundled networkx egg
if __name__ == '__main__':
nxegg = 'lib/networkx-1.7-py2.7.egg'
else:
nxegg = os.path.join(os.path.dirname(__file__), 'lib/networkx-1.7-py2.7.egg')
sys.path.append(nxegg)
import csv
import networkx as nx
import xml.dom.minidom as minidom
from distutils.version import LooseVersion
if LooseVersion(nx.__version__) < LooseVersion('1.11'):
write_dot = nx.write_dot
else:
try:
import pygraphviz
from networkx.drawing.nx_agraph import write_dot
except ImportError:
try:
if LooseVersion(nx.__version__) == LooseVersion('1.11'):
import pydotplus
else:
import pydot
from networkx.drawing.nx_pydot import write_dot
except ImportError:
from networkx.drawing.nx_agraph import write_dot
# Below constant is for changes introduced in NetworkX 2.1rc1, but
# LooseVersion thinks '2.1rc1' is higher than '2.1'. Change to '2.1rc1'
# when switching to pkg_resources.parse_version/packaging.version.parse.
_NX_BELOW_2_DOT_1 = (LooseVersion(nx.__version__) < LooseVersion('2.1'))
def pairs(lst, circular=False):
"""
Loop through all pairs of successive items in a list.
>>> list(pairs([1, 2, 3, 4]))
[(1, 2), (2, 3), (3, 4)]
>>> list(pairs([1, 2, 3, 4], circular=True))
[(1, 2), (2, 3), (3, 4), (4, 1)]
"""
i = iter(lst)
first = prev = item = next(i)
for item in i:
yield prev, item
prev = item
if circular:
yield item, first
def import_csv_graph(file):
"""
Example:
import_csv_graph(open("test.csv", "rb"))
Does not handle multigraphs yet (more than 1 edge between the same 2 nodes).
Each row must have the following (in this order):
* Start node ID
* End node ID
* Length in meters
* Edge name or ID
* Start longitude, for example 18.4167
* Start latitude, for example -33.9167
* End longitude
* End latitude
"""
reader = csv.reader(file)
graph = nx.Graph()
for row_number, row in enumerate(reader):
try:
start_node = row[0]
end_node = row[1]
length = float(row[2])
id = row[3]
start_lon, start_lat, end_lon, end_lat = map(float, row[4:8])
graph.add_edge(start_node, end_node, weight=length, id=id, label=id)
# We keep the GPS coordinates as strings
graph.node[start_node]['longitude'] = start_lon
graph.node[start_node]['latitude'] = start_lat
graph.node[end_node]['longitude'] = end_lon
graph.node[end_node]['latitude'] = end_lat
except ValueError:
print("Skipping input row %d" % (row_number+1))
return graph
def specify_positions(graph):
lat_min = min([data['latitude'] for n, data in graph.nodes(data=True)])
lat_max = max([data['latitude'] for n, data in graph.nodes(data=True)])
lon_min = min([data['longitude'] for n, data in graph.nodes(data=True)])
lon_max = max([data['longitude'] for n, data in graph.nodes(data=True)])
for node, data in graph.nodes(data=True):
latitude = data['latitude']
longitude = data['longitude']
y = (latitude - lat_min) / (lat_max - lat_min) * 1000
x = (longitude - lon_min) / (lon_max - lon_min) * 1000
graph.node[node]['pos'] = "%d,%d" % (int(x), int(y))
def make_png(graph, path):
fd, dotfile = tempfile.mkstemp(prefix="graph", suffix=".dot")
try:
dfile = dotfile
write_dot(graph, dfile)
subprocess.call(['neato', '-n2', '-Tpng', '-o', path, dfile])
finally:
os.remove(dotfile)
def graph_components(graph):
# The graph may contain multiple components, but we can only handle one connected component. If the graph contains
# more than one connected component, we only use the largest one.
components = list(graph.subgraph(c) for c in nx.connected_components(graph))
components.sort(key=lambda c: c.size(), reverse=True)
return components
def odd_graph(graph):
"""
Given a graph G, construct a graph containing only the vertices with odd degree from G. The resulting graph is
fully connected, with each weight being the shortest path between the nodes in G.
Complexity: O(V'*(E + V log(V)) )
"""
result = nx.Graph()
odd_nodes = [n for n in graph.nodes() if graph.degree(n) % 2 == 1]
for u in odd_nodes:
# We calculate the shortest paths twice here, but the overall performance hit is low
paths = nx.shortest_path(graph, source=u, weight='weight')
lengths = nx.shortest_path_length(graph, source=u, weight='weight')
for v in odd_nodes:
if u <= v:
# We only add each edge once
continue
# The edge weights are negative for the purpose of max_weight_matching (we want the minimum weight)
result.add_edge(u, v, weight=-lengths[v], path=paths[v])
return result
def as_gpx(graph, track_list, name=None):
"""
Convert a list of tracks to GPX format
Example:
>>> g = nx.Graph()
>>> g.add_node(1, latitude="31.1", longitude="-18.1")
>>> g.add_node(2, latitude="31.2", longitude="-18.2")
>>> g.add_node(3, latitude="31.3", longitude="-18.3")
>>> print(as_gpx(g, [{'points': [1,2,3]}]))
<?xml version="1.0" ?><gpx version="1.0"><trk><name>Track 1</name><number>1</number><trkseg><trkpt lat="31.1" lon="-18.1"><ele>1</ele></trkpt><trkpt lat="31.2" lon="-18.2"><ele>2</ele></trkpt><trkpt lat="31.3" lon="-18.3"><ele>3</ele></trkpt></trkseg></trk></gpx>
"""
doc = minidom.Document()
root = doc.createElement("gpx")
root.setAttribute("version", "1.0")
doc.appendChild(root)
if name:
gpx_name = doc.createElement("name")
gpx_name.appendChild(doc.createTextNode(name))
root.appendChild(gpx_name)
for i, track in enumerate(track_list):
nr = i+1
track_name = track.get('name') or ("Track %d" % nr)
trk = doc.createElement("trk")
trk_name = doc.createElement("name")
trk_name.appendChild(doc.createTextNode(track_name))
trk.appendChild(trk_name)
trk_number = doc.createElement("number")
trk_number.appendChild(doc.createTextNode(str(nr)))
trk.appendChild(trk_number)
trkseg = doc.createElement("trkseg")
for u in track['points']:
longitude = graph.node[u].get('longitude')
latitude = graph.node[u].get('latitude')
trkpt = doc.createElement("trkpt")
trkpt.setAttribute("lat", str(latitude))
trkpt.setAttribute("lon", str(longitude))
ele = doc.createElement("ele")
ele.appendChild(doc.createTextNode(str(u)))
trkpt.appendChild(ele)
trkseg.appendChild(trkpt)
trk.appendChild(trkseg)
root.appendChild(trk)
return doc.toxml()
def write_csv(graph, nodes, out):
writer = csv.writer(out)
writer.writerow(["Start Node", "End Node", "Segment Length", "Segment ID", "Start Longitude", "Start Latitude", "End Longitude", "End Latitude"])
for u, v in pairs(nodes, False):
length = graph[u][v]['weight']
id = graph[u][v]['id']
start_latitude = graph.node[u].get('latitude')
start_longitude = graph.node[u].get('longitude')
end_latitude = graph.node[v].get('latitude')
end_longitude = graph.node[v].get('longitude')
writer.writerow([u, v, length, id, start_longitude, start_latitude, end_longitude, end_latitude])
def edge_sum(graph):
total = 0
for u, v, data in graph.edges(data=True):
total += data['weight']
return total
def matching_cost(graph, matching):
# Calculate the cost of the additional edges
cost = 0
for u, v in (matching.items() if _NX_BELOW_2_DOT_1 else matching):
if _NX_BELOW_2_DOT_1 and (v <= u):
continue
data = graph[u][v]
cost += abs(data['weight'])
return cost
def find_matchings(graph, n=5):
"""
Find the n best matchings for a graph. The best matching is guaranteed to be the best, but the others are only
estimates.
A matching is a subset of edges in which no node occurs more than once.
The result may contain less than n matchings.
See https://networkx.github.io/documentation/stable/reference/algorithms/generated/networkx.algorithms.matching.max_weight_matching.html
"""
best_matching = nx.max_weight_matching(graph, True)
matchings = [best_matching]
for u, v in (best_matching.items() if _NX_BELOW_2_DOT_1 else best_matching):
if _NX_BELOW_2_DOT_1 and (v <= u):
continue
# Remove the matching
smaller_graph = nx.Graph(graph)
smaller_graph.remove_edge(u, v)
matching = nx.max_weight_matching(smaller_graph, True)
if len(matching) > 0:
# We may get an empty matching if there is only one edge (that we removed).
matchings.append(matching)
matching_costs = [(matching_cost(graph, matching), matching) for matching in matchings]
matching_costs.sort(key=lambda k: k[0])
# HACK: The above code end up giving duplicates of the same path, even though the matching is different. To prevent
# this, we remove matchings with the same cost.
final_matchings = []
last_cost = None
for cost, matching in matching_costs:
if cost == last_cost:
continue
last_cost = cost
final_matchings.append((cost, matching))
return final_matchings
def build_eulerian_graph(graph, odd, matching):
"""
Build an Eulerian graph from a matching. The result is a MultiGraph.
"""
# Copy the original graph to a multigraph (so we can add more edges between the same nodes)
eulerian_graph = nx.MultiGraph(graph)
# For each matched pair of odd vertices, connect them with the shortest path between them
for u, v in (matching.items() if _NX_BELOW_2_DOT_1 else matching):
if _NX_BELOW_2_DOT_1 and (v <= u):
# With max_weight_matching of NetworkX <2.1 each matching occurs twice in the matchings: (u => v) and (v => u). We only count those where v > u
continue
edge = odd[u][v]
path = edge['path'] # The shortest path between the two nodes, calculated in odd_graph()
# Add each segment in this path to the graph again
for p, q in pairs(path):
eulerian_graph.add_edge(p, q, weight=graph[p][q]['weight'])
return eulerian_graph
def eulerian_circuit(graph):
"""
Given an Eulerian graph, find one eulerian circuit. Returns the circuit as a list of nodes, with the first and
last node being the same.
"""
circuit = list(nx.eulerian_circuit(graph))
nodes = []
for u, v in circuit:
nodes.append(u)
# Close the loop
nodes.append(circuit[0][0])
return nodes
def chinese_postman_paths(graph, n=5):
"""
Given a graph, return a list of node id's forming the shortest chinese postman path.
"""
# Find all the nodes with an odd degree, and create a graph containing only them
odd = odd_graph(graph)
# Find the best matching of pairs of odd nodes
matchings = find_matchings(odd, n)
paths = []
for cost, matching in matchings[:n]:
# Copy the original graph to a multigraph (so we can add more edges between the same nodes)
eulerian_graph = build_eulerian_graph(graph, odd, matching)
# Now that we have an eulerian graph, we can calculate the eulerian circuit
nodes = eulerian_circuit(eulerian_graph)
paths.append((eulerian_graph, nodes))
return paths
def single_chinese_postman_path(graph):
"""
Given a graph, return a list of node id's forming the shortest chinese postman path.
If we assume V' (number of nodes with odd degree) is at least some constant fraction of V (total number of nodes),
say 10%, the overall complexity is O(V^3).
"""
# Build a fully-connected graph containing only the odd edges. Complexity: O(V'*(E + V log(V)) )
odd = odd_graph(graph)
# Find the best matching of pairs of odd nodes. Complexity: O(V'^3)
matching = nx.max_weight_matching(odd, True)
# Complexity of the remainder is less approximately O(E)
eulerian_graph = build_eulerian_graph(graph, odd, matching)
nodes = eulerian_circuit(eulerian_graph)
return eulerian_graph, nodes
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser()
parser.add_argument("input", help="input CSV file", type=argparse.FileType('r'))
parser.add_argument("--gpx", help="GPX output file", type=argparse.FileType('w'))
parser.add_argument("--csv", help="CSV output file", type=argparse.FileType('w'))
parser.add_argument("--png", help="PNG output file", type=argparse.FileType('wb'))
args = parser.parse_args()
graph = import_csv_graph(args.input)
components = graph_components(graph)
if len(components) == 0:
raise ValueError("No graph components found; check input file")
# Only use the largest component
component = components[0]
paths = chinese_postman_paths(component, n=5)
for eulerian_graph, nodes in paths:
in_length = edge_sum(graph)/1000.0
path_length = edge_sum(eulerian_graph)/1000.0
duplicate_length = path_length - in_length
print("Total length of roads: %.3f km" % in_length)
print("Total length of path: %.3f km" % path_length)
print("Length of sections visited twice: %.3f km" % duplicate_length)
print("Node sequence:", nodes)
print()
eulerian_graph, nodes = paths[0] # The best one
if args.gpx:
args.gpx.write(as_gpx(graph, [{'points': nodes}]))
if args.csv:
write_csv(graph, nodes, args.csv)
if args.png:
specify_positions(eulerian_graph)
make_png(eulerian_graph, args.png.name)