DATA MINING PROJECT PAVITHRAA GOVINDARAJAN 24 OCT 2021 Jupyter Notebook PDF
DATA MINING PROJECT PAVITHRAA GOVINDARAJAN 24 OCT 2021 Jupyter Notebook PDF
In [2]: df = pd.read_csv('bank_marketing_part1_Data-1.csv')
In [3]: df.head()
Out[3]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_paym
In [4]: df.shape
Out[4]: (210, 7)
In [5]: df.info()
<class 'pandas.core.frame.DataFrame'>
dtypes: float64(7)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 1/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [6]: df.describe().T
Out[6]:
count mean std min 25% 50% 75%
In [7]: df.isna().sum()
Out[7]: spending 0
advance_payments 0
probability_of_full_payment 0
current_balance 0
credit_limit 0
min_payment_amt 0
max_spent_in_single_shopping 0
dtype: int64
In [8]: df.duplicated().sum()
Out[8]: 0
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 2/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [9]: sns.pairplot(df)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 3/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [10]: plt.figure(figsize=(7,6))
sns.heatmap(df.corr(),annot=True,fmt=".2f");
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 4/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 5/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 6/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 7/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 8/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
-0.29880602, 2.3289982 ],
-0.24280501, -0.53858174],
-0.22147129, 1.50910692],
...,
-1.3221578 , -0.83023461],
-0.95348449, 0.07123789],
-0.70681338, 0.96047321]])
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 9/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[20]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_pay
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 10/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[26]: array([1, 3, 1, 2, 1, 2, 2, 3, 1, 2, 1, 3, 2, 1, 3, 2, 3, 2, 3, 2, 2, 2,
1, 2, 3, 1, 3, 2, 2, 2, 3, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1, 3, 1, 1,
2, 2, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 3, 2, 2, 3, 3, 1,
1, 3, 1, 2, 3, 2, 1, 1, 2, 1, 3, 2, 1, 3, 3, 3, 3, 1, 2, 3, 3, 1,
1, 2, 3, 1, 3, 2, 2, 1, 1, 1, 2, 1, 2, 1, 3, 1, 3, 1, 1, 2, 2, 1,
3, 3, 1, 2, 2, 1, 3, 3, 2, 1, 3, 2, 2, 2, 3, 3, 1, 2, 3, 3, 2, 3,
3, 1, 2, 1, 1, 2, 1, 3, 3, 3, 2, 2, 3, 2, 1, 2, 3, 2, 3, 2, 3, 3,
3, 3, 3, 2, 3, 1, 1, 2, 1, 1, 1, 2, 1, 3, 3, 3, 3, 2, 3, 1, 1, 1,
3, 3, 1, 2, 3, 3, 3, 3, 1, 1, 3, 3, 3, 2, 3, 3, 2, 1, 3, 1, 1, 2,
1, 2, 3, 1, 3, 2, 1, 3, 1, 3, 1, 3], dtype=int32)
In [28]: df.head()
Out[28]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_paym
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 11/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [29]: df.H_clusters.value_counts().sort_index()
Out[29]: 1 70
2 67
3 73
Out[30]:
spending advance_payments probability_of_full_payment current_balance credit_limit
H_clusters
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 12/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[34]: array([1, 3, 1, 2, 1, 3, 2, 2, 1, 2, 1, 1, 2, 1, 3, 3, 3, 2, 2, 2, 2, 2,
1, 2, 3, 1, 3, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 1, 1, 3, 1, 1,
2, 2, 3, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 2, 2, 1, 3, 2, 2, 1, 3, 1,
1, 3, 1, 2, 3, 2, 1, 1, 2, 1, 3, 2, 1, 3, 3, 3, 3, 1, 2, 1, 1, 1,
1, 3, 3, 1, 3, 2, 2, 1, 1, 1, 2, 1, 3, 1, 3, 1, 3, 1, 1, 2, 3, 1,
1, 3, 1, 2, 2, 1, 3, 3, 2, 1, 3, 2, 2, 2, 3, 3, 1, 2, 3, 3, 2, 3,
3, 1, 2, 1, 1, 2, 1, 3, 3, 3, 2, 2, 2, 2, 1, 2, 3, 2, 3, 2, 3, 1,
3, 3, 2, 2, 3, 1, 1, 2, 1, 1, 1, 2, 1, 3, 3, 2, 3, 2, 3, 1, 1, 1,
3, 2, 3, 2, 3, 2, 3, 3, 1, 1, 3, 1, 3, 2, 3, 3, 2, 1, 3, 1, 1, 2,
1, 2, 3, 3, 3, 2, 1, 3, 1, 3, 3, 1], dtype=int32)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 13/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [36]: df.head()
Out[36]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_paym
In [37]: df.Avg_clusters.value_counts().sort_index()
Out[37]: 1 75
2 70
3 65
Out[38]:
spending advance_payments probability_of_full_payment current_balance credit_lim
Avg_clusters
In [ ]:
In [41]: K_Means.fit(scaled_df)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 14/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [43]: K_Means.labels_
Out[43]: array([2, 0, 2, 1, 2, 1, 1, 0, 2, 1, 2, 0, 1, 2, 0, 1, 0, 1, 1, 1, 1, 1,
2, 1, 0, 2, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 2, 2, 0, 2, 2,
1, 1, 0, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 1, 1, 2, 0, 1, 1, 0, 0, 2,
2, 0, 2, 1, 0, 1, 2, 2, 1, 2, 0, 1, 2, 0, 0, 0, 0, 2, 1, 0, 2, 0,
2, 1, 0, 2, 0, 1, 1, 2, 2, 2, 1, 2, 0, 2, 0, 2, 0, 2, 2, 1, 1, 2,
0, 0, 2, 1, 1, 2, 0, 0, 1, 2, 0, 1, 1, 1, 0, 0, 2, 1, 0, 0, 1, 0,
0, 2, 1, 2, 2, 1, 2, 0, 0, 0, 1, 1, 0, 1, 2, 1, 0, 1, 0, 1, 0, 0,
1, 0, 0, 1, 0, 2, 2, 1, 2, 2, 2, 1, 0, 0, 0, 1, 0, 1, 0, 2, 2, 2,
0, 1, 0, 1, 0, 0, 0, 0, 2, 2, 1, 0, 0, 1, 1, 0, 1, 2, 0, 2, 2, 1,
2, 1, 0, 2, 0, 1, 2, 0, 2, 0, 0, 0])
In [44]: wss = []
In [46]: wss
Out[46]: [1469.9999999999995,
659.1717544870411,
430.65897315130064,
371.38509060801107,
327.2127816566134,
289.315995389595,
262.98186570162267,
241.8189465608603,
223.91254221002728,
206.3961218478669]
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 15/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [47]: plt.plot(range(1,11),wss)
plt.grid()
plt.xlabel('No. of clusters')
plt.ylabel('WCSS')
plt.show()
In [49]: silhouette_score(scaled_df,labels_3,random_state=1)
Out[49]: 0.40072705527512986
Out[50]: 0.46577247686580914
Out[51]: 0.3276547677266192
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 16/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[52]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_paym
Out[54]:
spending advance_payments probability_of_full_payment current_balance credit_limit min_paym
In [55]: df.Clus_kmeans.value_counts().sort_index()
Out[55]: 0 71
1 72
2 67
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 17/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[56]:
spending advance_payments probability_of_full_payment current_balance credit_lim
Clus_kmeans
In [59]: df1.head()
Out[59]:
Product
Age Agency_Code Type Claimed Commision Channel Duration Sales Destin
Name
Customised
0 48 C2B Airlines No 0.70 Online 7 2.51
Plan
Travel Customised
1 36 EPX No 0.00 Online 34 20.00
Agency Plan
Travel Customised
2 39 CWT No 5.94 Online 3 9.90 Am
Agency Plan
Travel Cancellation
3 36 EPX No 0.00 Online 4 26.00
Agency Plan
In [60]: df1.shape
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 18/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [61]: df1.info()
<class 'pandas.core.frame.DataFrame'>
In [62]: df1.isna().sum()
Out[62]: Age 0
Agency_Code 0
Type 0
Claimed 0
Commision 0
Channel 0
Duration 0
Sales 0
Product Name 0
Destination 0
dtype: int64
In [63]: df1.describe().T
Out[63]:
count mean std min 25% 50% 75% max
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 19/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [64]: df1.describe(include='all').T
Out[64]:
count unique top freq mean std min 25% 50% 75%
Age 3000.0 NaN NaN NaN 38.091 10.463518 8.0 32.0 36.0 42.0
Agency_Code 3000 4 EPX 1365 NaN NaN NaN NaN NaN NaN
Travel
Type 3000 2 1837 NaN NaN NaN NaN NaN NaN
Agency
Commision 3000.0 NaN NaN NaN 14.529203 25.481455 0.0 0.0 4.63 17.235
Channel 3000 2 Online 2954 NaN NaN NaN NaN NaN NaN
Duration 3000.0 NaN NaN NaN 70.001333 134.053313 -1.0 11.0 26.5 63.0
Sales 3000.0 NaN NaN NaN 60.249913 70.733954 0.0 20.0 33.0 69.0
Product Customised
3000 5 1136 NaN NaN NaN NaN NaN NaN
Name Plan
Destination 3000 3 ASIA 2465 NaN NaN NaN NaN NaN NaN
In [65]: df1.duplicated().sum()
Out[65]: 139
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 20/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
AGENCY_CODE : 4
JZI 239
CWT 472
C2B 924
EPX 1365
TYPE : 2
Airlines 1163
CLAIMED : 2
Yes 924
No 2076
CHANNEL : 2
Offline 46
Online 2954
PRODUCT NAME : 5
DESTINATION : 3
EUROPE 215
Americas 320
ASIA 2465
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 21/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [67]: plt.figure(figsize=(10,5))
df1[['Age','Commision','Duration','Sales']].boxplot()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 22/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 23/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 24/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 25/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 26/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 27/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
fig.show()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 28/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [77]: sns.pairplot(df1)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 29/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [78]: plt.figure(figsize=(7,6))
sns.heatmap(df1.corr(),annot=True,fmt=".2f");
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 30/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
feature: Agency_Code
[0 2 1 3]
feature: Type
[0 1]
feature: Claimed
['No', 'Yes']
[0 1]
feature: Channel
['Online', 'Offline']
[1 0]
[2 1 0 4 3]
feature: Destination
[0 1 2]
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 31/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [80]: df1.head()
Out[80]:
Product
Age Agency_Code Type Claimed Commision Channel Duration Sales Destination
Name
0 48 0 0 0 0.70 1 7 2.51 2 0
1 36 2 1 0 0.00 1 34 20.00 2 0
2 39 1 1 0 5.94 1 3 9.90 2 1
3 36 2 1 0 0.00 1 4 26.00 1 0
4 33 3 0 0 6.30 1 53 18.00 0 0
In [81]: df1.info()
<class 'pandas.core.frame.DataFrame'>
In [82]: df1.Claimed.value_counts()
Out[82]: 0 2076
1 924
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 32/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[83]:
Age Agency_Code Type Commision Channel Duration Sales Product Name Destination
0 48 0 0 0.70 1 7 2.51 2 0
1 36 2 1 0.00 1 34 20.00 2 0
2 39 1 1 5.94 1 3 9.90 2 1
3 36 2 1 0.00 1 4 26.00 1 0
4 33 3 0 6.30 1 53 18.00 0 0
In [84]: y.head()
Out[84]: 0 0
1 0
2 0
3 0
4 0
In [86]: print('X_train',X_train.shape)
print('X_test',X_test.shape)
print('train_labels',train_labels.shape)
print('test_labels',test_labels.shape)
X_train (2100, 9)
X_test (900, 9)
train_labels (2100,)
test_labels (900,)
Out[89]: DecisionTreeClassifier()
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 33/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [91]: param_grid_dtcl = {
'criterion': ['gini'],
'max_depth': [3, 5, 7, 10,12],
'min_samples_leaf': [20,30,40,50,60],
'min_samples_split': [150,300,450],
}
dtcl = DecisionTreeClassifier(random_state=1)
grid_search_dtcl = GridSearchCV(estimator = dtcl, param_grid = param_grid_dtcl, c
random_state=1)
In [93]: param_grid_dtcl = {
'criterion': ['gini'],
'max_depth': [3.5,4.0,4.5, 5.0,5.5],
'min_samples_leaf': [40, 42, 44,46,48,50,52,54],
'min_samples_split': [250, 270, 280, 290, 300,310],
}
dtcl = DecisionTreeClassifier(random_state=1)
grid_search_dtcl = GridSearchCV(estimator = dtcl, param_grid = param_grid_dtcl, c
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 34/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
min_samples_split=280, random_state=1)
In [96]: ytest_predict_dtcl
ytest_predict_prob_dtcl=best_grid_dtcl.predict_proba(X_test)
ytest_predict_prob_dtcl
pd.DataFrame(ytest_predict_prob_dtcl).head()
Out[96]:
0 1
0 0.887805 0.112195
1 0.432432 0.567568
2 0.432432 0.567568
3 0.208163 0.791837
4 0.937143 0.062857
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 35/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
AUC: 0.825
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 36/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
AUC: 0.792
In [102]: param_grid_rfcl = {
'max_depth': [6],#20,30,40
'max_features': [4],## 7,8,9
'min_samples_leaf': [8],## 50,100
'min_samples_split': [45], ## 60,70
'n_estimators': [100] ## 100,200
}
rfcl = RandomForestClassifier(random_state=1)
grid_search_rfcl = GridSearchCV(estimator = rfcl, param_grid = param_grid_rfcl, c
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 37/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
min_samples_split=45, random_state=1)
In [104]: rf_train_acc=best_grid_rfcl.score(X_train,train_labels)
rf_train_acc
Out[104]: 0.81
In [105]: cart_train_acc=best_grid_dtcl.score(X_train,train_labels)
cart_train_acc
Out[105]: 0.79
In [107]: param_grid_rfcl = {
'max_depth': [6],#20,30,40
'max_features': [4],## 7,8,9
'min_samples_leaf': [8],## 50,100
'min_samples_split': [45], ## 60,70
'n_estimators': [100] ## 100,200
}
rfcl = RandomForestClassifier(random_state=1)
grid_search_rfcl = GridSearchCV(estimator = rfcl, param_grid = param_grid_rfcl, c
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 38/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
min_samples_split=45, random_state=1)
In [109]: grid_search_rfcl.best_params_
Out[109]: {'max_depth': 6,
'max_features': 4,
'min_samples_leaf': 8,
'min_samples_split': 45,
'n_estimators': 100}
In [111]: ytest_predict_rfcl
ytest_predict_prob_rfcl=best_grid_rfcl.predict_proba(X_test)
ytest_predict_prob_rfcl
pd.DataFrame(ytest_predict_prob_rfcl).head()
Out[111]:
0 1
0 0.732980 0.267020
1 0.493807 0.506193
2 0.448772 0.551228
3 0.258665 0.741335
4 0.926248 0.073752
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 39/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 40/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
rf_train_precision 0.72
rf_train_recall 0.6
rf_train_f1 0.65
In [115]: confusion_matrix(train_labels,ytrain_predict_rfcl)
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 41/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [116]: print(classification_report(train_labels,ytrain_predict_rfcl))
rf_train_precision 0.72
rf_train_recall 0.6
rf_train_f1 0.65
In [118]: confusion_matrix(test_labels,ytest_predict_rfcl)
In [119]: rf_test_acc=best_grid_rfcl.score(X_test,test_labels)
rf_test_acc
Out[119]: 0.7733333333333333
In [120]: print(classification_report(test_labels,ytest_predict_rfcl))
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 42/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
rf_test_precision 0.73
rf_test_recall 0.49
rf_test_f1 0.59
In [122]: param_grid_nncl = {
'hidden_layer_sizes': [50,100,200], # 50, 200
'max_iter': [2500,3000,4000], #5000,2500
'solver': ['adam'], #sgd
'tol': [0.01],
}
nncl = MLPClassifier(random_state=1)
grid_search_nncl = GridSearchCV(estimator = nncl, param_grid = param_grid_nncl, c
In [124]: grid_search_nncl.best_params_
In [126]: ytest_predict_nncl
ytest_predict_prob_nncl=best_grid_nncl.predict_proba(X_test)
ytest_predict_prob_nncl
pd.DataFrame(ytest_predict_prob_nncl).head()
Out[126]:
0 1
0 0.828364 0.171636
1 0.627123 0.372877
2 0.526596 0.473404
3 0.327278 0.672722
4 0.924043 0.075957
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 43/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
In [128]: cart_train_acc=best_grid_dtcl.score(X_train,train_labels)
cart_train_acc
Out[128]: 0.79
nn_train_precision 0.67
nn_train_recall 0.51
nn_train_f1 0.57
nn_test_precision 0.72
nn_test_recall 0.43
nn_test_f1 0.54
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 44/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
cart_train_precision 0.67
cart_train_recall 0.59
cart_train_f1 0.63
In [133]: nn_train_acc=best_grid_nncl.score(X_train,train_labels)
nn_train_acc
Out[133]: 0.7757142857142857
In [135]: cart_test_acc=best_grid_dtcl.score(X_test,test_labels)
cart_test_acc
Out[135]: 0.7677777777777778
In [136]: nn_test_acc=best_grid_nncl.score(X_test,test_labels)
nn_test_acc
Out[136]: 0.76
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 45/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
cart_test_precision 0.71
cart_test_recall 0.49
cart_test_f1 0.58
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 46/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 47/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
Out[141]:
CART CART Random Forest Random Forest Neural Network Neural Network
Train Test Train Test Train Test
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 48/49
10/24/21, 7:17 PM DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021 - Jupyter Notebook
localhost:8888/notebooks/Downloads/DATA_MINING_PROJECT___PAVITHRAA_GOVINDARAJAN_24_OCT_2021.ipynb 49/49