-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathML_NN_2.py
143 lines (113 loc) · 4 KB
/
ML_NN_2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# # Nonlinear curve fitting by stochastic gradient descent
# This notebook shows how stochastic gradient descent can help fit an arbitrary function (neural networks essentially do the same, but in much higher dimensions and with many more parameters)
import numpy as np
import matplotlib.pyplot as plt # for plotting
import matplotlib
matplotlib.rcParams['figure.dpi']=300 # highres display
# Define the parametrized nonlinear function
def f(theta,x):
"""
theta are the parameters
x are the input values (can be an array)
"""
return(theta[0]/((x-theta[1])**2+1.0))
# We define the gradient of the parametrized function with respect to its parameters
def f_grad(theta,x):
"""
Return the gradient of f with respect to theta
shape [n_theta,n_samples]
where n_theta=len(theta)
and n_samples=len(x)
"""
return(np.array([
1./((x-theta[1])**2+1.0)
,
2*(x-theta[1])*theta[0]/((x-theta[1])**2+1.0)
]
))
# We define the actual function (the target, to be fitted)
def true_f(x):
return( 3.0/((x-0.5)**2+1.0) )
# Get randomly sampled x values
def samples(nsamples,width=2.0):
return(width*np.random.randn(nsamples))
# Get the average cost function on a grid of 2 parameters
def get_avg_cost(theta0s,theta1s,nsamples):
n0=len(theta0s)
n1=len(theta1s)
C=np.zeros([n0,n1])
for j0 in range(n0):
for j1 in range(n1):
theta=np.array([theta0s[j0],theta1s[j1]])
x=samples(nsamples)
C[j0,j1]=0.5*np.average((f(theta,x)-true_f(x))**2)
return(C)
# cell 3
# take the arbitrary parameters as starting point
theta=np.array([1.5,-2.3])
x=samples(100)
# We illustrate the parametrized function, at sampled points, and
# compare against actual function
plt.scatter(x,f(theta,x),color="orange")
plt.scatter(x,true_f(x),color="blue")
plt.show()
# cell:4
theta0s=np.linspace(-3,6,40)
theta1s=np.linspace(-2,3,40)
C=get_avg_cost(theta0s,theta1s,10000)
nlevels=20
X,Y=np.meshgrid(theta0s,theta1s,indexing='ij')
plt.contourf(X,Y,C,nlevels)
plt.contour(X,Y,C,nlevels,colors="white")
plt.xlabel("theta_0")
plt.ylabel("theta_1")
plt.show()
# cell: 6
# Now we do gradient descent and, for each step, we plot the (sampled) true function vs. the parametrized function
# We also plot the current location of parameters theta
# (over the average cost function)
# import functions for updating display
# (simple animation)
from IPython.display import clear_output
from time import sleep
# take arbitrary parameters as starting point
theta=np.array([-1.0,2.0])
# do many steps of stochastic gradient descent,
# continue showing the comparison!
eta=.2 # "learning rate" (gradient descent step size)
nsamples=10 # stochastic x samples used per step
nsteps=100 # how many steps we take
x_sweep=np.linspace(-4,4,300)
for n in range(nsteps):
#############################
# THE CRUCIAL THREE LINES: #
#############################
x=samples(nsamples) # get random samples
# deviation from true function (vector):
deviation=f(theta,x)-true_f(x)
# do one gradient descent step:
theta-=eta*np.average(deviation[None,:]*f_grad(theta,x),axis=1)
# Now: Plotting
# compare true function (blue) against
# parametrized function (orange)
# blue dots indicate random points where
# the true function was sampled in this step
clear_output(wait=True)
fig,ax=plt.subplots(ncols=2,nrows=1,figsize=(8,2))
nlevels=20
ax[0].contourf(X,Y,C,nlevels)
ax[0].contour(X,Y,C,nlevels,colors="white")
ax[0].scatter([theta[0]],[theta[1]],color="orange")
ax[0].set_xlim(theta0s[0],theta0s[-1])
ax[0].set_ylim(theta1s[0],theta1s[-1])
ax[0].set_xlabel("theta_0")
ax[0].set_ylabel("theta_1")
ax[1].plot(x_sweep,true_f(x_sweep),color="blue")
ax[1].scatter(x,true_f(x),color="blue")
ax[1].plot(x_sweep,f(theta,x_sweep),color="orange")
ax[1].set_xlim(-4,4)
ax[1].set_ylim(0.0,4.0)
ax[1].set_xlabel("x")
ax[1].set_ylabel("f")
plt.show()
sleep(0.2)