DWM Practical Endgame
1. Naive Bayes
1. Import statements
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder
from [Link] import confusion_matrix, accuracy_score
from sklearn.naive_bayes import GaussianNB
data = pd.read_csv('laptop_buying_data.csv')
2. remove class label from dataset and put it in 'y' array
X = [Link]('Purchase Intention',axis=1)
y = data['Purchase Intention']
3. encoding (श वाले कॉलम को अंक म लाना)
le = LabelEncoder()
X['Gender'] = le.fit_transform(X['Gender'])
X['Occupation'] = le.fit_transform(X['Occupation'])
X['Brand Preference'] = le.fit_transform(X['Brand Preference'])
4. Splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_stat
e = 42)
5. call naive bayes (split data ko naive bayes me fit karke predicted 'y'
dhundna)
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred = nb_model.predict(X_test)
6. print accuracy and matrix
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print(conf_matrix)
print(accuraccy)
2. ID3/Gini
1. Import statements
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier
from [Link] import confusion_matrix, accuracy_score
import [Link] as plt
from sklearn import tree
data = pd.read_csv('laptop_buying_data.csv')
2. encoding + remove class label from dataset and put it in 'y' array
X = data[['Age', 'Income', 'Previous Purchases', 'Gender', 'Occupation', 'Brand Prefer
ence']]
X = pd.get_dummies(X)
y = data['Purchase Intention']
3. splitting
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=
42)
4. call ID3 (almost same as naive bayes)
criterion = 'gini' for GINI
id3_model = DecisionTreeClassifier(criterion='entropy', random_state=42)
id3_model.fit(X_train, y_train)
y_pred = id3_model.predict(X_test)
5. print accuracy and matrix
conf_matrix = confusion_matrix(y_test, y_pred)
accuracy = accuracy_score(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)
print(f'Accuracy: {accuracy:.2f}')
6. plotting
[Link](figsize=(20,10))
tree.plot_tree(id3_model, filled=True, feature_names=[Link], class_names=
['No Purchase', 'Purchase'])
[Link]()
3. K Means
1. Import statements
import pandas as pd
import [Link] as plt
from [Link] import KMeans
data = pd.read_csv('customer_clustering_data.csv')
2. add dataset to X (no encoding required)
X = data[['Annual Income (k$)', 'Spending Score (1-100)']]
3. Call KMeans
optimal_k = 4
kmeans = KMeans(n_clusters=optimal_k, random_state=42)
[Link](X)
data['Cluster'] = kmeans.labels_
4. Plot
[Link](figsize=(10, 5))
[Link](data['Annual Income (k$)'], data['Spending Score (1-
100)'], c=data['Cluster'], cmap='viridis', s=50)
centers = kmeans.cluster_centers_
[Link](centers[:, 0], centers[:, 1], c='red', s=200, alpha=0.75, marker='X')
[Link]('Annual Income (k$)')
[Link]('Spending Score (1-100)')
[Link]()
[Link]()
5. Elbow method to find optimal_k (optional, mai toh chhod raha, seedha
4 assume)
inertia = []
k_values = range(1, 11)
for k in k_values:
kmeans = KMeans(n_clusters=k, random_state=42)
[Link](X)
[Link](kmeans.inertia_)
[Link](figsize=(10, 5))
[Link](k_values, inertia, marker='o')
[Link]('Number of clusters (k)')
[Link]('Inertia')
[Link](k_values)
[Link]()
[Link]()
Flow
1. Naive Bayes
import > drop_class_label > encoding > splitting > call_NaiveBayes > print
2. ID3
import > drop_class_label + encoding > splitting > call_NaiveBayes > print >
plot
3. K Means
import > add to X > find_optimal_K (optional) > call_KMeans > Plot