-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathbitcoin.py
167 lines (131 loc) · 5.01 KB
/
bitcoin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
import statsmodels.formula.api as smf
import sklearn.metrics as sm
import pandas as pd
import numpy as np
import math
import sys
# The path to the data folder should be given as input
if len(sys.argv) != 2:
print('bitcoin.py <path to data folder>')
sys.exit(1)
data_path = sys.argv[1]
# Reading the vectors from the given csv files
train1_90 = pd.read_csv(data_path+'/train1_90.csv')
train1_180 = pd.read_csv(data_path+'/train1_180.csv')
train1_360 = pd.read_csv(data_path+'/train1_360.csv')
train2_90 = pd.read_csv(data_path+'/train2_90.csv')
train2_180 = pd.read_csv(data_path+'/train2_180.csv')
train2_360 = pd.read_csv(data_path+'/train2_360.csv')
test_90 = pd.read_csv(data_path+'/test_90.csv')
test_180 = pd.read_csv(data_path+'/test_180.csv')
test_360 = pd.read_csv(data_path+'/test_360.csv')
def computeDelta(wt, X, Xi):
"""
This function computes equation 6 of the paper, but with the euclidean distance
replaced by the similarity function given in Equation 9.
Parameters
----------
wt : int
This is the constant c at the top of the right column on page 4.
X : A row of Panda Dataframe
Corresponds to (x, y) in Equation 6.
Xi : Panda Dataframe
Corresponds to a dataframe of (xi, yi) in Equation 6.
Returns
-------
float
The output of equation 6, a prediction of the average price change.
"""
# YOUR CODE GOES HERE
a=0
for i in range(len(Xi)):
#a = a + (Xi.iloc)
leng = len(Xi.iloc[i])
leng-=1
tempx = X[:-1]
tempxi = Xi.loc[i][:-1]
xm = np.mean(tempx)
xs= np.std(tempx)
Xim = np.mean(tempxi)
Xis = np.std(tempxi)
s = 0
for j in range(len(tempx)):
s= s+ (tempx[j]-xm)*(tempxi[j]-Xim)
s = s/(len(tempx)*xs*Xis)
a = a + (Xi.iloc[i][leng]*math.exp(wt * s))
b=0
for i in range(len(Xi)):
#a = a + (Xi.iloc)
leng = len(Xi.iloc[i])
leng-=1
tempx = X[:-1]
tempxi = Xi.loc[i][:-1]
xm = np.mean(tempx)
xs= np.std(tempx)
Xim = np.mean(tempxi)
Xis = np.std(tempxi)
s = 0
for j in range(len(tempx)):
s= s+ (tempx[j]-xm)*(tempxi[j]-Xim)
s = s/(len(tempx)*xs*Xis)
b = b+ (math.exp(wt * s))
return a/b
pass
# Perform the Bayesian Regression to predict the average price change for each dataset of train2 using train1 as input.
# These will be used to estimate the coefficients (w0, w1, w2, and w3) in equation 8.
weight = 2 # This constant was not specified in the paper, but we will use 2.
trainDeltaP90 = np.empty(0)
trainDeltaP180 = np.empty(0)
trainDeltaP360 = np.empty(0)
for i in xrange(0,len(train1_90.index)) :
trainDeltaP90 = np.append(trainDeltaP90, computeDelta(weight,train2_90.iloc[i],train1_90))
for i in xrange(0,len(train1_180.index)) :
trainDeltaP180 = np.append(trainDeltaP180, computeDelta(weight,train2_180.iloc[i],train1_180))
for i in xrange(0,len(train1_360.index)) :
trainDeltaP360 = np.append(trainDeltaP360, computeDelta(weight,train2_360.iloc[i],train1_360))
# Actual deltaP values for the train2 data.
trainDeltaP = np.asarray(train2_360[['Yi']])
trainDeltaP = np.reshape(trainDeltaP, -1)
# Combine all the training data
d = {'deltaP': trainDeltaP,
'deltaP90': trainDeltaP90,
'deltaP180': trainDeltaP180,
'deltaP360': trainDeltaP360 }
trainData = pd.DataFrame(d)
# Feed the data: [deltaP, deltaP90, deltaP180, deltaP360] to train the linear model.
# Use the statsmodels ols function.
# Use the variable name model for your fitted model
model = smf.ols('deltaP ~ deltaP90 + deltaP180 + deltaP360', trainData)
model = model.fit()
# Print the weights from the model
print model.params
# Perform the Bayesian Regression to predict the average price change for each dataset of test using train1 as input.
# This should be similar to above where it was computed for train2.
weight=2
testDeltaP90 = np.empty(0)
testDeltaP180 = np.empty(0)
testDeltaP360 = np.empty(0)
for i in xrange(0,len(train1_90.index)) :
testDeltaP90 = np.append(testDeltaP90, computeDelta(weight,test_90.iloc[i],train1_90))
for i in xrange(0,len(train1_180.index)) :
testDeltaP180 = np.append(testDeltaP180, computeDelta(weight,test_180.iloc[i],train1_180))
for i in xrange(0,len(train1_360.index)) :
testDeltaP360 = np.append(testDeltaP360, computeDelta(weight,test_360.iloc[i],train1_360))
# Actual deltaP values for test data.
testDeltaP = np.asarray(test_360[['Yi']])
testDeltaP = np.reshape(testDeltaP, -1)
# Combine all the test data
d = {'deltaP': testDeltaP,
'deltaP90': testDeltaP90,
'deltaP180': testDeltaP180,
'deltaP360': testDeltaP360}
testData = pd.DataFrame(d)
# Predict price variation on the test data set.
result = model.predict(testData)
compare = { 'Actual': testDeltaP,
'Predicted': result }
compareDF = pd.DataFrame(compare)
# Compute the MSE and print the result
MSE = 0.0
MSE = sm.mean_squared_error(testDeltaP, result)
print "The MSE is %f" % (MSE)