Mushroom Classification Using Machine Learning
Mushroom Classification Using Machine Learning
(8124, 23)
Out[ ]:
1. Class distribution
c:\Users\praty\AppData\Local\Programs\Python\Python310\lib\site-packages\seaborn\_decorators.
py:36: FutureWarning: Pass the following variable as a keyword arg: x. From version 0.12, the
only valid positional argument will be `data`, and passing other arguments without an explici
t keyword will result in an error or misinterpretation.
warnings.warn(
2.Feature distributions:
# Loop over all the features and plot a histogram of their values
for col in data.columns[1:]:
sns.histplot(data=data, x=col, hue='class', multiple='stack', bins=20)
plt.title(col)
plt.show()
3.Feature correlations:
No correlations found.
In [ ]: data.head()
Out[ ]: stalk- sta
cap- cap- cap- gill- gill- gill- gill- stalk- stalk- surface- surfa
class bruises odor
shape surface color attachment spacing size color shape root above- belo
ring r
0 p x s n t p f c n k e e s
1 e x s y t a f c b k e c s
2 e b s w t l f c b n e c s
3 p x y w t p f c n n e e s
4 e x s g f n f w b k t e s
# Load data
data = pd.read_csv('mushrooms.csv')
stalk-surface-above-ring stalk-surface-below-ring \
count 8124 8124
unique 4 4
top s s
freq 5176 4936
In [ ]: pd.set_option('display.max_columns',None)
0 p x s n t p f c n k e e s
1 e x s y t a f c b k e c s
2 e b s w t l f c b n e c s
3 p x y w t p f c n n e e s
4 e x s g f n f w b k t e s
# cap-surface: fibrous=f,grooves=g,scaly=y,smooth=s
# cap-color: brown=n,buff=b,cinnamon=c,gray=g,green=r,pink=p,purple=u,red=e,white=w,yello
# bruises: bruises=t,no=f
# odor: almond=a,anise=l,creosote=c,fishy=y,foul=f,musty=m,none=n,pungent=p,spicy=s
# gill-attachment: attached=a,descending=d,free=f,notched=n
# gill-spacing: close=c,crowded=w,distant=d
# gill-size: broad=b,narrow=n
# stalk-shape: enlarging=e,tapering=t
# stalk-root: bulbous=b,club=c,cup=u,equal=e,rhizomorphs=z,rooted=r,missing=?
# stalk-surface-above-ring: fibrous=f,scaly=y,silky=k,smooth=s
# stalk-surface-below-ring: fibrous=f,scaly=y,silky=k,smooth=s
# stalk-color-above-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,
# stalk-color-below-ring: brown=n,buff=b,cinnamon=c,gray=g,orange=o,pink=p,red=e,white=w,
# veil-type: partial=p,universal=u
# veil-color: brown=n,orange=o,white=w,yellow=y
# ring-number: none=n,one=o,two=t
# ring-type: cobwebby=c,evanescent=e,flaring=f,large=l,none=n,pendant=p,sheathing=s,zone=
# spore-print-color: black=k,brown=n,buff=b,chocolate=h,green=r,orange=o,purple=u,white=w
# population: abundant=a,clustered=c,numerous=n,scattered=s,several=v,solitary=y
# habitat: grasses=g,leaves=l,meadows=m,paths=p,urban=u,waste=w,woods=d
2. Check Last 5 Rows of The Dataset
In [ ]: data.tail()
Out[ ]: stalk-
cap- cap- cap- gill- gill- gill- gill- stalk- stalk- surface- s
class bruises odor
shape surface color attachment spacing size color shape root above-
ring
8119 e k s n f n a c b y e ? s
8120 e x s n f n a c b y e ? s
8121 e f s n f n a c b n e ? s
8122 p k y n f y f c n b t ? s
8123 e x s n f n a c b y e ? s
(8124, 23)
Out[ ]:
4. Get Information About Our Dataset Like Total Number Rows, Total
Number of Columns, Datatypes of Each Column And Memory
Requirement
In [ ]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 class 8124 non-null object
1 cap-shape 8124 non-null object
2 cap-surface 8124 non-null object
3 cap-color 8124 non-null object
4 bruises 8124 non-null object
5 odor 8124 non-null object
6 gill-attachment 8124 non-null object
7 gill-spacing 8124 non-null object
8 gill-size 8124 non-null object
9 gill-color 8124 non-null object
10 stalk-shape 8124 non-null object
11 stalk-root 8124 non-null object
12 stalk-surface-above-ring 8124 non-null object
13 stalk-surface-below-ring 8124 non-null object
14 stalk-color-above-ring 8124 non-null object
15 stalk-color-below-ring 8124 non-null object
16 veil-type 8124 non-null object
17 veil-color 8124 non-null object
18 ring-number 8124 non-null object
19 ring-type 8124 non-null object
20 spore-print-color 8124 non-null object
21 population 8124 non-null object
22 habitat 8124 non-null object
dtypes: object(23)
memory usage: 1.4+ MB
class 0
Out[ ]:
cap-shape 0
cap-surface 0
cap-color 0
bruises 0
odor 0
gill-attachment 0
gill-spacing 0
gill-size 0
gill-color 0
stalk-shape 0
stalk-root 0
stalk-surface-above-ring 0
stalk-surface-below-ring 0
stalk-color-above-ring 0
stalk-color-below-ring 0
veil-type 0
veil-color 0
ring-number 0
ring-type 0
spore-print-color 0
population 0
habitat 0
dtype: int64
Out[ ]: stalk-
cap- cap- cap- gill- gill- gill- gill- stalk- stalk- surface-
class bruises odor
shape surface color attachment spacing size color shape root above-
ring
count 8124 8124 8124 8124 8124 8124 8124 8124 8124 8124 8124 8124 8124
unique 2 6 4 10 2 9 2 2 2 12 2 5 4
top e x y n f n f c b b t b s
freq 4208 3656 3244 2284 4748 3528 7914 6812 5612 1728 4608 3776 5176
7. Data Manipulation
In [ ]: data.head()
0 p x s n t p f c n k e e s
1 e x s y t a f c b k e c s
2 e b s w t l f c b n e c s
3 p x y w t p f c n n e e s
4 e x s g f n f w b k t e s
In [ ]: data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8124 entries, 0 to 8123
Data columns (total 23 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 class 8124 non-null object
1 cap-shape 8124 non-null object
2 cap-surface 8124 non-null object
3 cap-color 8124 non-null object
4 bruises 8124 non-null object
5 odor 8124 non-null object
6 gill-attachment 8124 non-null object
7 gill-spacing 8124 non-null object
8 gill-size 8124 non-null object
9 gill-color 8124 non-null object
10 stalk-shape 8124 non-null object
11 stalk-root 8124 non-null object
12 stalk-surface-above-ring 8124 non-null object
13 stalk-surface-below-ring 8124 non-null object
14 stalk-color-above-ring 8124 non-null object
15 stalk-color-below-ring 8124 non-null object
16 veil-type 8124 non-null object
17 veil-color 8124 non-null object
18 ring-number 8124 non-null object
19 ring-type 8124 non-null object
20 spore-print-color 8124 non-null object
21 population 8124 non-null object
22 habitat 8124 non-null object
dtypes: object(23)
memory usage: 1.4+ MB
In [ ]: data.dtypes
class category
Out[ ]:
cap-shape category
cap-surface category
cap-color category
bruises category
odor category
gill-attachment category
gill-spacing category
gill-size category
gill-color category
stalk-shape category
stalk-root category
stalk-surface-above-ring category
stalk-surface-below-ring category
stalk-color-above-ring category
stalk-color-below-ring category
veil-type category
veil-color category
ring-number category
ring-type category
spore-print-color category
population category
habitat category
dtype: object
0 1 5 2 4 1 6 1 0 1 4 0 3 2
1 0 5 2 9 1 0 1 0 0 4 0 2 2
2 0 0 2 8 1 3 1 0 0 5 0 2 2
3 1 5 3 8 1 6 1 0 1 5 0 3 2
4 0 5 2 3 0 5 1 1 0 4 1 3 2
9. Applying PCA
In [ ]: from sklearn.decomposition import PCA
pca1 = PCA(n_components = 7)
pca_fit1 = pca1.fit_transform(X)
10. Splitting The Dataset Into The Training Set And Test Set
In [ ]: from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(pca_fit1,y,test_size=0.20,
random_state=42)
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)
svc = SVC()
svc.fit(X_train,y_train)
dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)
rm = RandomForestClassifier()
rm.fit(X_train,y_train)
gb = GradientBoostingClassifier()
gb.fit(X_train,y_train)
GradientBoostingClassifier()
Out[ ]:
cm = confusion_matrix(y_test,y_pred3)
#Plot the confusion matrix.
sns.heatmap(cm,
annot=True,
fmt='g',
xticklabels=['poisonous','eadible'],
yticklabels=['poisonous','eadible'])
plt.ylabel('Prediction',fontsize=13)
plt.xlabel('Actual',fontsize=13)
plt.title('Confusion Matrix',fontsize=17)
plt.show()
ACC LR 0.8344615384615385
ACC KNN 0.9833846153846154
ACC SVC 0.952
ACC DT 0.9784615384615385
ACC RM 0.9975384615384615
ACC GBC 0.9384615384615385
In [ ]:
In [ ]: final_data
0 LR 83.446154
1 KNN 98.338462
2 SVC 95.200000
3 DT 97.846154
4 RM 99.753846
5 GBC 93.846154
c:\Users\praty\AppData\Local\Programs\Python\Python310\lib\site-packages\seaborn\_decorators.
py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12,
the only valid positional argument will be `data`, and passing other arguments without an exp
licit keyword will result in an error or misinterpretation.
warnings.warn(
<AxesSubplot:xlabel='Models', ylabel='ACC'>
Out[ ]:
RandomForestClassifier()
Out[ ]:
In [ ]: joblib.dump(rf_model,"Mushroom_prediction")
['Mushroom_prediction']
Out[ ]:
In [ ]:
In [ ]: p =model.predict(pca1.transform([[5,2,4,1,6,1,0,1,4,0,3,2,2,7,7,0,2,1,4,2,3,5]]))
c:\Users\praty\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py:450:
UserWarning: X does not have valid feature names, but PCA was fitted with feature names
warnings.warn(
In [ ]: if p[0]==1:
print('Poissonous')
else:
print('Edible')
Poissonous
GUI
In [ ]: from tkinter import *
import joblib
p12=int(e12.get())
p13=int(e13.get())
p14=int(e14.get())
p15=int(e15.get())
p16=int(e16.get())
p17=int(e17.get())
p18=int(e18.get())
p19=int(e19.get())
p20=int(e20.get())
p21=int(e21.get())
p22=int(e22.get())
model = joblib.load('Mushroom_prediction')
result=model.predict(pca1.transform([[p1,p2,p3,p4,p5,p6,
p7,p8,p9,p10,p11,p12,p13,p14,p15,
p16,p17,p18,p19,p20,p21,p22]]))
if result[0] == 0:
Label(master, text="Edible").grid(row=31)
else:
Label(master, text="Poisonous").grid(row=31)
master = Tk()
master.title("Mushroom Classification Using Machine Learning")
Label(master,text="stalk-surface-above-ring:(fibrous=0,scaly=3,silky=1,smooth=2)").grid(row=1
Label(master,text="stalk-surface-below-ring:(fibrous=0,scaly=3,silky=1,smooth=2 \
)").grid(row=13)
Label(master,text="stalk-color-above-ring:(brown=4,buff=0,cinnamon=1,gray=3, \
orange=5,pink=6,red=2,white=7,yellow=8)").grid(row=14)
Label(master,text="stalk-color-below-ring:(brown=4,buff=0,cinnamon=1,gray=3, \
orange=5,pink=6,red=2,white=7,yellow=8)").grid(row=15)
Label(master,text="veil-type:(partial=0,universal=1)").grid(row=16)
Label(master,text="veil-color:(brown=0,orange=1,white=2,yellow=3)").grid(row=17)
Label(master,text="ring-number:(none=0,one=1,two=2)").grid(row=18)
Label(master,text="ring-type:(cobwebby=0,evanescent=1,flaring=2,large=3,\
none=4,pendant=5,sheathing=6,zone=7)").grid(row=19)
Label(master,text="spore-print-color:(black=2,brown=3,buff=0,chocolate=1, \
green=5,orange=4,purple=6,white=7,yellow=8 \
)").grid(row=20)
Label(master,text="population:(abundant=0,clustered=1,numerous=2,scattered=3, \
# several=4,solitary=5)").grid(row=21)
Label(master,text="habitat:(grasses=1,leaves=2,meadows=3,paths=4,urban=5,\
# waste=6,woods=0)").grid(row=22)
e1 = Entry(master)
e2 = Entry(master)
e3 = Entry(master)
e4 = Entry(master)
e5 = Entry(master)
e6 = Entry(master)
e7 = Entry(master)
e8 = Entry(master)
e9 = Entry(master)
e10 = Entry(master)
e11 = Entry(master)
e12 = Entry(master)
e13 = Entry(master)
e14 = Entry(master)
e15 = Entry(master)
e16 = Entry(master)
e17 = Entry(master)
e18 = Entry(master)
e19 = Entry(master)
e20 = Entry(master)
e21 = Entry(master)
e22 = Entry(master)
e1.grid(row=1, column=1)
e2.grid(row=2, column=1)
e3.grid(row=3, column=1)
e4.grid(row=4, column=1)
e5.grid(row=5, column=1)
e6.grid(row=6, column=1)
e7.grid(row=7, column=1)
e8.grid(row=8, column=1)
e9.grid(row=9, column=1)
e10.grid(row=10,column=1)
e11.grid(row=11,column=1)
e12.grid(row=12,column=1)
e13.grid(row=13,column=1)
e14.grid(row=14,column=1)
e15.grid(row=15,column=1)
e16.grid(row=16,column=1)
e17.grid(row=17,column=1)
e18.grid(row=18,column=1)
e19.grid(row=19,column=1)
e20.grid(row=20,column=1)
e21.grid(row=21,column=1)
e22.grid(row=22,column=1)
Button(master, text='Predict', command=show_entry_fields).grid()
mainloop()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: