0% found this document useful (0 votes)
46 views7 pages

Classifier Performance on MNIST Dataset

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
46 views7 pages

Classifier Performance on MNIST Dataset

Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

1. Load the Fashion MNIST dataset (or digits if offline).

Train and compare the performance of the following classifiers:


• Logistic Regression
• K-Nearest Neighbors (KNN)
• Decision Tree
• Random Forest
• Report the accuracy of each model on a test set.
• Visualize a bar chart comparing the test accuracies.
• Display confusion matrices for each model side by side.
• Identify and visualize misclassified images for each model.

from [Link] import mnist


import numpy as np
import [Link] as plt
import seaborn as sns

from sklearn.model_selection import train_test_split


from sklearn.linear_model import LogisticRegression
from [Link] import KNeighborsClassifier
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier
from [Link] import accuracy_score, confusion_matrix

(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()

X_train_full = X_train_full.reshape(-1, 784) / 255.0


X_test = X_test.reshape(-1, 784) / 255.0

X_train, _, y_train, _ = train_test_split(X_train_full, y_train_full, train_size=10000, stratify=y_train_full,


random_state=42)

models = {
"Logistic Regression": LogisticRegression(max_iter=1000),
"K-Nearest Neighbors": KNeighborsClassifier(),
"Decision Tree": DecisionTreeClassifier(),
"Random Forest": RandomForestClassifier()
}

accuracies = {}
conf_matrices = {}
misclassified = {}

CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 1


for name, model in [Link]():
print(f"Training {name}...")
[Link](X_train, y_train)
y_pred = [Link](X_test)
acc = accuracy_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)
miscl = [Link](y_pred != y_test)[0]

accuracies[name] = acc
conf_matrices[name] = cm
misclassified[name] = miscl

[Link](figsize=(8, 5))
[Link](x=list([Link]()), y=list([Link]()))
[Link]("Test Accuracy of Classifiers on MNIST")
[Link]("Accuracy")
[Link](0.8, 1.0)
[Link](rotation=45)
plt.tight_layout()
[Link]()

fig, axes = [Link](2, 2, figsize=(14, 10))


for ax, (name, cm) in zip([Link](), conf_matrices.items()):
[Link](cm, annot=True, fmt='d', cmap='Blues', ax=ax)
ax.set_title(f"{name} Confusion Matrix")
ax.set_xlabel("Predicted")
ax.set_ylabel("Actual")
plt.tight_layout()
[Link]()

fig, axes = [Link](len(models), 5, figsize=(12, 10))


[Link]("Misclassified MNIST Images by Model", fontsize=16)

for i, (name, indices) in enumerate([Link]()):


for j, idx in enumerate(indices[:5]):
ax = axes[i, j]
[Link](X_test[idx].reshape(28, 28), cmap='gray')
ax.set_title(f"True: {y_test[idx]}\nPred: {models[name].predict([X_test[idx]])[0]}")
[Link]('off')
for j in range(len(indices), 5):
axes[i, j].axis('off')

plt.tight_layout(rect=[0, 0, 1, 0.96])
[Link]()

CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 2


Output :

CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 3


2. Plot learning curves to visualize the performance of your model with varying training sizes. Plot
validation curves to determine the optimal values for hyperparameters.

import numpy as np
import [Link] as plt
from sklearn.model_selection import learning_curve, validation_curve, train_test_split
from [Link] import KNeighborsClassifier
from [Link] import DecisionTreeClassifier
from [Link] import mnist

(X_train_full, y_train_full), (X_test, y_test) = mnist.load_data()


X_train_full = X_train_full.reshape(-1, 28*28) / 255.0
X_test = X_test.reshape(-1, 28*28) / 255.0

X_train, _, y_train, _ = train_test_split(X_train_full, y_train_full, train_size=10000, stratify=y_train_full,


random_state=42)

def plot_learning_curve(estimator, title, X, y, cv=3):


train_sizes, train_scores, val_scores = learning_curve(estimator, X, y, cv=cv, scoring='accuracy',
n_jobs=-1)
train_mean = train_scores.mean(axis=1)
val_mean = val_scores.mean(axis=1)

[Link](figsize=(8, 5))
[Link](train_sizes, train_mean, label="Training score", marker='o')
[Link](train_sizes, val_mean, label="Validation score", marker='s')
[Link](title)
[Link]("Training Set Size")
[Link]("Accuracy")
[Link]()
[Link]()
plt.tight_layout()
[Link]()

def plot_validation_curve(estimator, X, y, param_name, param_range, title, cv=3):


train_scores, val_scores = validation_curve(
estimator, X, y, param_name=param_name, param_range=param_range,
scoring='accuracy', cv=cv, n_jobs=-1
)
train_mean = train_scores.mean(axis=1)
val_mean = val_scores.mean(axis=1)

CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 4


[Link](figsize=(8, 5))
[Link](param_range, train_mean, label="Training score", marker='o')
[Link](param_range, val_mean, label="Validation score", marker='s')
[Link](title)
[Link](param_name)
[Link]("Accuracy")
[Link]()
[Link]()
plt.tight_layout()
[Link]()

plot_learning_curve(KNeighborsClassifier(), "Learning Curve: K-Nearest Neighbors", X_train, y_train)

plot_validation_curve(KNeighborsClassifier(), X_train, y_train, param_name="n_neighbors",


param_range=[Link](1, 11), title="Validation Curve: KNN - n_neighbors")

plot_learning_curve(DecisionTreeClassifier(), "Learning Curve: Decision Tree", X_train, y_train)

plot_validation_curve(DecisionTreeClassifier(), X_train, y_train, param_name="max_depth",


param_range=[Link](1, 21), title="Validation Curve: Decision Tree - max_depth")
Output :

CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 5


CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 6
CS&E (DATA SCIENCE) BIET, DAVANGERE Page | 7

You might also like