ML1
February 13, 2019
1 Question no. 1 Synthetic data generation and simple curve fitting
1.1 a) dataset size=10
In [113]: import numpy as np
In [168]: x=[Link](0,1,10);[Link]()
% store x
x
Stored 'x' (ndarray)
Out[168]: array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,
1.00608680e-01, 1.41989948e-01, 3.02205900e-01, 5.09208602e-01,
5.48487424e-01, 9.16432859e-01])
In [169]: y=[Link](2*[Link]*x)+ [Link](0,0.3,10)
% store y
y
Stored 'y' (ndarray)
Out[169]: array([ 0.03583561, 0.08863262, 0.26436301, 0.73229327, 0.6498295 ,
0.76267081, 1.08759352, 0.10366157, -0.41853198, -0.38551607])
In [170]: % store -r
In [171]: (x,y)
Out[171]: (array([4.56572618e-05, 3.20138948e-02, 7.33332470e-02, 8.22840269e-02,
1.00608680e-01, 1.41989948e-01, 3.02205900e-01, 5.09208602e-01,
5.48487424e-01, 9.16432859e-01]),
array([ 0.03583561, 0.08863262, 0.26436301, 0.73229327, 0.6498295 ,
0.76267081, 1.08759352, 0.10366157, -0.41853198, -0.38551607]))
1
1.2 b)
In [172]: from random import sample
a=sample(range(0,10),8);
% store a
Stored 'a' (list)
In [173]: % store -r
In [174]: index=[i for i in list(range(0,10)) if i not in list(a)]
In [175]: X_test=x[index]; y_test=y[index];
y_test=y_test.reshape(y_test.shape[0],1)
In [176]: X_train=x[a];
X_train
Out[176]: array([0.03201389, 0.5092086 , 0.3022059 , 0.54848742, 0.07333325,
0.10060868, 0.14198995, 0.91643286])
In [177]: y_train=y[a];y_train=y_train.reshape(y_train.shape[0],1)
y_train
Out[177]: array([[ 0.08863262],
[ 0.10366157],
[ 1.08759352],
[-0.41853198],
[ 0.26436301],
[ 0.6498295 ],
[ 0.76267081],
[-0.38551607]])
1.3 c)
In [178]: m=[Link]((X_train.shape[0],10));
for i in range(0,10):
m[:,i]=X_train**i
M=[Link]((X_test.shape[0],10));
for i in range(0,10):
M[:,i]=X_test**i
In [179]: X_train_1=m[:,[0,1]]; X_test_1=M[:,[0,1]]; theta1=[Link]((2,1));
In [180]: def model(X,theta):
return([Link](X,theta))
def cost(X,theta,y):
2
return(sum(([Link](X,theta)-y)**2)[0]/(2*[Link][0]))
def gradient_descent(X,theta,y,alpha):
for i in range(0,100000):
J=cost(X,theta,y)
theta1=theta-(alpha/[Link][0])*([Link](X.T,(model(X,theta)-y)))
J_opt=cost(X,theta1,y)
if(J<=J_opt):
break;
else:
theta=theta1
continue;
print("Number of iterations: ",i);
return(theta)
In [181]: theta_opt1=gradient_descent(X_train_1,theta1,y_train,0.05)
theta_opt1
Number of iterations: 4028
Out[181]: array([[ 0.61901605],
[-1.06674016]])
In [182]: test_error1=cost(X_test_1,theta_opt1,y_test); train_error1=cost(X_train_1,theta_opt1,
test_error1
Out[182]: 0.09511622271301672
In [183]: X_train_2=m[:,[0,1,2]]; X_test_2=M[:,[0,1,2]]; theta2=[Link]((3,1))
In [184]: theta_opt2=gradient_descent(X_train_2,theta2,y_train,0.05)
theta_opt2
Number of iterations: 77664
Out[184]: array([[ 0.45100356],
[ 0.39766551],
[-1.63541815]])
In [185]: test_error2=cost(X_test_2,theta_opt2,y_test); train_error2=cost(X_train_2,theta_opt2,
test_error2
Out[185]: 0.059948244666874276
In [186]: X_train_3=m[:,[0,1,2,3]]; X_test_3=M[:,[0,1,2,3]]; theta3=[Link]((4,1))
In [187]: theta_opt3=gradient_descent(X_train_3,theta3,y_train,0.05)
theta_opt3
3
Number of iterations: 99999
Out[187]: array([[ 0.30054854],
[ 3.03422839],
[-9.49857845],
[ 5.73426157]])
In [188]: test_error3=cost(X_test_3,theta_opt3,y_test); train_error3=cost(X_train_3,theta_opt3,
test_error3
Out[188]: 0.03232230527392706
In [189]: X_train_4=m[:,[0,1,2,3,4]]; X_test_4=M[:,[0,1,2,3,4]]; theta4=[Link]((5,1))
In [190]: theta_opt4=gradient_descent(X_train_4,theta4,y_train,0.05)
theta_opt4
Number of iterations: 99999
Out[190]: array([[ 0.13043307],
[ 5.26976736],
[-11.74893731],
[ -2.00083726],
[ 8.52197539]])
In [191]: test_error4=cost(X_test_4,theta_opt4,y_test); train_error4=cost(X_train_4,theta_opt4,
test_error4
Out[191]: 0.017688444243537934
In [192]: X_train_5=m[:,[0,1,2,3,4,5]]; X_test_5=M[:,[0,1,2,3,4,5]]; theta5=[Link]((6,1))
In [193]: theta_opt5=gradient_descent(X_train_5,theta5,y_train,0.05)
theta_opt5
Number of iterations: 99999
Out[193]: array([[ 0.02671602],
[ 6.41175294],
[-11.73377243],
[ -5.25211259],
[ 3.01348511],
[ 8.43915221]])
In [194]: test_error5=cost(X_test_5,theta_opt5,y_test); train_error5=cost(X_train_5,theta_opt5,
test_error5
4
Out[194]: 0.016944689569137104
In [195]: X_train_6=m[:,[0,1,2,3,4,5,6]]; X_test_6=M[:,[0,1,2,3,4,5,6]]; theta6=[Link]((7,1)
In [196]: theta_opt6=gradient_descent(X_train_6,theta6,y_train,0.05)
theta_opt6
Number of iterations: 99999
Out[196]: array([[ -0.02774013],
[ 6.91790094],
[-11.19503816],
[ -6.58032958],
[ 0.24918351],
[ 4.90843665],
[ 7.43361559]])
In [197]: test_error6=cost(X_test_6,theta_opt6,y_test); train_error6=cost(X_train_6,theta_opt6,
test_error6
Out[197]: 0.019256817795943935
In [198]: X_train_7=m[:,[0,1,2,3,4,5,6,7]]; X_test_7=M[:,[0,1,2,3,4,5,6,7]]; theta7=[Link]((
In [199]: theta_opt7=gradient_descent(X_train_7,theta7,y_train,0.05)
theta_opt7
Number of iterations: 99999
Out[199]: array([[ -0.05590717],
[ 7.13328798],
[-10.65608892],
[ -7.13811592],
[ -1.22017668],
[ 2.92885583],
[ 5.24233003],
[ 6.31298633]])
In [200]: test_error7=cost(X_test_7,theta_opt7,y_test); train_error7=cost(X_train_7,theta_opt7,
test_error7
Out[200]: 0.021328822695781458
In [201]: X_train_8=m[:,[0,1,2,3,4,5,6,7,8]]; X_test_8=M[:,[0,1,2,3,4,5,6,7,8]]; theta8=[Link]
In [202]: theta_opt8=gradient_descent(X_train_8,theta8,y_train,0.05)
theta_opt8
5
Number of iterations: 99999
Out[202]: array([[ -0.07098847],
[ 7.2235418 ],
[-10.22581104],
[ -7.38324589],
[ -2.05894679],
[ 1.74728375],
[ 3.91118967],
[ 4.95068973],
[ 5.31900064]])
In [203]: test_error8=cost(X_test_8,theta_opt8,y_test); train_error8=cost(X_train_8,theta_opt8,
test_error8
Out[203]: 0.022763509050243616
In [204]: X_train_9=m[:,[0,1,2,3,4,5,6,7,8,9]]; X_test_9=M[:,[0,1,2,3,4,5,6,7,8,9]]; theta9=np
In [205]: theta_opt9=gradient_descent(X_train_9,theta9,y_train,0.05)
theta_opt9
Number of iterations: 99999
Out[205]: array([[-0.07949424],
[ 7.2603034 ],
[-9.90234645],
[-7.49645706],
[-2.57160618],
[ 0.99907177],
[ 3.05693338],
[ 4.07070013],
[ 4.45792049],
[ 4.4952223 ]])
In [206]: test_error9=cost(X_test_9,theta_opt9,y_test); train_error9=cost(X_train_9,theta_opt9,
test_error9
Out[206]: 0.023714480158400306
1.4 Question no. 2 Visualization of the dataset and the fitted curves
1.5 a)
In [207]: import [Link] as plt
In [208]: [Link](x,y,'o')
Out[208]: [<[Link].Line2D at 0x5c6272d978>]
6
In [209]: g=[Link](([Link][0],10));
for i in range(0,10):
g[:,i]=x**i
In [210]: g1=g[:,[0,1]];
y1=model(g1,theta_opt1)
[Link](x,y,'o')
[Link](x,y1,marker='o')
Out[210]: [<[Link].Line2D at 0x5c626ddb00>]
7
In [211]: g2=g[:,[0,1,2]];
y2=model(g2,theta_opt2)
[Link](x,y,'o')
[Link](x,y2,marker='o')
Out[211]: [<[Link].Line2D at 0x5c627a9ac8>]
8
In [212]: g3=g[:,[0,1,2,3]];
y3=model(g3,theta_opt3)
[Link](x,y,'o')
[Link](x,y3,marker='o')
Out[212]: [<[Link].Line2D at 0x5c637df6d8>]
In [213]: g4=g[:,[0,1,2,3,4]];
y4=model(g4,theta_opt4)
[Link](x,y,'o')
[Link](x,y4,marker='o')
Out[213]: [<[Link].Line2D at 0x5c627af9b0>]
9
In [214]: g5=g[:,[0,1,2,3,4,5]];
y5=model(g5,theta_opt5)
[Link](x,y,'o')
[Link](x,y5,marker='o')
Out[214]: [<[Link].Line2D at 0x5c63804b00>]
10
In [215]: g6=g[:,[0,1,2,3,4,5,6]];
y6=model(g6,theta_opt6)
[Link](x,y,'o')
[Link](x,y6,marker='o')
Out[215]: [<[Link].Line2D at 0x5c638ee550>]
In [216]: g7=g[:,[0,1,2,3,4,5,6,7]];
y7=model(g7,theta_opt7)
[Link](x,y,'o')
[Link](x,y7,marker='o')
Out[216]: [<[Link].Line2D at 0x5c63959eb8>]
11
In [217]: g8=g[:,[0,1,2,3,4,5,6,7,8]];
y8=model(g8,theta_opt8)
[Link](x,y,'o')
[Link](x,y8,marker='o')
Out[217]: [<[Link].Line2D at 0x5c639c17b8>]
12
In [218]: g9=g[:,[0,1,2,3,4,5,6,7,8,9]];
y9=model(g9,theta_opt9)
[Link](x,y,'o')
[Link](x,y9,marker='o')
Out[218]: [<[Link].Line2D at 0x5c63a06cc0>]
1.6 b)
In [219]: trainerror=[train_error1,train_error2,train_error3,train_error4,train_error5,train_er
testerror=[test_error1,test_error2,test_error3,test_error4,test_error5,test_error6,te
In [220]: n=list(range(1,10))
In [221]: [Link](n,trainerror,marker='o')
[Link](n,testerror,color='r',marker='o')
[Link]('n'); [Link]('error')
[Link](['Training Error','Test Error'])
Out[221]: <[Link] at 0x5c63aaac88>
13
In [225]: test_error4
Out[225]: 0.017688444243537934
In [226]: test_error5
Out[226]: 0.016944689569137104
In [227]: abs(train_error4-test_error4)
Out[227]: 0.007501399085289059
In [228]: abs(train_error5-test_error5)
Out[228]: 0.003355065146649662
1.7 So, from the above plot we can see that at n=5 test error is minimum and the
difference between training and test error is also minimum so n=5 is suitable here.
14