-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcut_timeseries.py
120 lines (101 loc) · 4.07 KB
/
cut_timeseries.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import pandas as pd
from datetime import timedelta
from sys import argv
import networkx as nx
import argparse
from choice_functions import *
from decay_functions import *
from repair_ml import reset_graph, make_graph
import pylab
def estimate_explore(df, start=0, G=None, strategy='rank', decay_type='exp', decay=0.01, ghost=False):
df.sort('dt', inplace=True)
sources = list(df['source'])
dests = list(df['dest'])
dts = list(df['dt'])
assert len(sources) == len(dests)
assert start <= len(sources)
if G == None:
G = make_graph(sources, dests, ghost)
else:
reset_graph(G)
steps = 0
explore_steps = 0
G2 = G.copy()
G[sources[0]][dests[0]]['weight'] += 1
if decay_type == 'linear':
G[sources[0]][dests[0]]['units'].append(1)
for i in xrange(1, len(sources)):
curr = dts[i]
prev = curr
if i > 0:
prev = dts[i - 1]
source = sources[i]
dest = dests[i]
if i <= start:
explore_step = is_explore(G, source, dest, strategy)
if explore_step != None:
steps += 1
if explore_step == True:
explore_steps += 1
if curr != prev:
diff = curr - prev
seconds = diff.total_seconds()
G = G2
decay_graph(G, decay_type, decay, seconds)
G2 = G.copy()
G2[source][dest]['weight'] += 1
#print curr, get_weights(G)
return explore_steps, steps
def time_series_explore(strategies, decay_types, sheets, decay=0.01, window=-1, ghost=False):
for strategy in strategies:
print strategy
for decay_type in decay_types:
print decay_type
for sheet in sheets:
print sheet
choices = 'reformated_counts%s.csv' % sheet
choices = pd.read_csv(choices, header=None, names=['source', 'dest', 'dt'], skipinitialspace=True)
choices['dt'] = pd.to_datetime(choices['dt'])
choices.sort('dt', inplace=True)
delta = None
if window != -1:
assert window > 0
delta = timedelta(minutes=window)
timestamps = list(choices['dt'])
explore_probs = []
for i in xrange(len(timestamps)):
if i > 0 and timestamps[i] == timestamps[i - 1]:
continue
'''
upper_lim = timestamps[len(timestamps) - 1]
if delta != None:
upper_lim = timestamp + delta
df = choices[(choices['dt'] >= timestamp) & (choices['dt'] <= upper_lim)]
'''
#if len(df.index) == 0:
# continue
explore_steps, steps = estimate_explore(choices, i, None, strategy, decay_type, decay, ghost)
if steps > 0:
explore_prob = float(explore_steps) / steps
explore_probs.append(explore_prob)
pylab.plot(range(len(explore_probs)), explore_probs)
print "show"
pylab.show()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('sheets', nargs='+')
parser.add_argument('-s', '--strategies', choices=STRATEGY_CHOICES, nargs='+', required=True)
parser.add_argument('-dt', '--decay_types', choices=DECAY_CHOICES, nargs='+', required=True)
parser.add_argument('-d', '--decay', type=float, default=0.01)
parser.add_argument('-w', '--window', type=int, default=-1)
parser.add_argument('-g', '--ghost', action='store_true')
args = parser.parse_args()
sheets = args.sheets
strategies = args.strategies
decay_types = args.decay_types
decay = args.decay
window = args.window
ghost = args.ghost
time_series_explore(strategies, decay_types, sheets, decay, window, ghost)
if __name__ == '__main__':
main()