0% found this document useful (0 votes)
40 views33 pages

Practicalpgm ML

The document provides a comprehensive overview of various machine learning techniques, including linear regression, multiple regression, polynomial regression, bagging classifiers, AdaBoost, logistic regression, decision trees, random forests, SVM classification, K-means clustering, density-based clustering, and the Apriori algorithm. Each section includes code examples using Python's sklearn library to demonstrate the implementation of these algorithms on sample datasets. The document emphasizes the importance of model evaluation metrics such as accuracy, mean squared error, and R-squared scores.

Uploaded by

Kala Mca
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
40 views33 pages

Practicalpgm ML

The document provides a comprehensive overview of various machine learning techniques, including linear regression, multiple regression, polynomial regression, bagging classifiers, AdaBoost, logistic regression, decision trees, random forests, SVM classification, K-means clustering, density-based clustering, and the Apriori algorithm. Each section includes code examples using Python's sklearn library to demonstrate the implementation of these algorithms on sample datasets. The document emphasizes the importance of model evaluation metrics such as accuracy, mean squared error, and R-squared scores.

Uploaded by

Kala Mca
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

1.

1 Linear regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
simple_data = {
'YearsExperience': [1.1, 2.0, 3.2, 4.0, 5.0, 6.8, 7.1, 8.0, 9.0, 10.3],
'Salary': [39343, 46205, 66029, 56957, 61000, 81363, 84400, 93940, 105000, 122391]
}
simple_df = [Link](simple_data)
X_simple = simple_df[['YearsExperience']]
y_simple = simple_df['Salary']
X_train_simple, X_test_simple, y_train_simple, y_test_simple = train_test_split(X_simple,
y_simple, test_size=0.2, random_state=42)
model_simple = LinearRegression()
model_simple.fit(X_train_simple, y_train_simple)
y_pred_simple = model_simple.predict(X_test_simple)
print("Simple Linear Regression")
print("Coefficients:", model_simple.coef_)
print("Intercept:", model_simple.intercept_)
print("Mean Squared Error:", mean_squared_error(y_test_simple, y_pred_simple))
print("R-squared Score:", r2_score(y_test_simple, y_pred_simple))
Output:
1.2 Multiple Regression
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from [Link] import mean_squared_error, r2_score
multiple_data = {
'YearsExperience': [1.1, 2.0, 3.2, 4.0, 5.0, 6.8, 7.1, 8.0, 9.0, 10.3],
'EducationLevel': [1, 2, 2, 3, 3, 4, 4, 5, 5, 5],
'Salary': [39343, 46205, 66029, 56957, 61000, 81363, 84400, 93940, 105000, 122391]
}
multiple_df = [Link](multiple_data)
X_multiple = multiple_df[['YearsExperience', 'EducationLevel']]
y_multiple = multiple_df['Salary']
X_train_multiple, X_test_multiple, y_train_multiple, y_test_multiple =
train_test_split(X_multiple, y_multiple, test_size=0.2, random_state=42)
model_multiple = LinearRegression()
model_multiple.fit(X_train_multiple, y_train_multiple)
y_pred_multiple = model_multiple.predict(X_test_multiple)
print("Multiple Linear Regression")
print("Coefficients:", model_multiple.coef_)
print("Intercept:", model_multiple.intercept_)
print("Mean Squared Error:", mean_squared_error(y_test_multiple, y_pred_multiple))
print("R-squared Score:", r2_score(y_test_multiple, y_pred_multiple))
Output
2. program to implement the Polynomial Regression
import numpy as np
import [Link] as plt
from sklearn.linear_model import LinearRegression
from [Link] import PolynomialFeatures
X = [Link]([1, 2, 3, 4, 5, 6, 7]).reshape(-1, 1) # Years of Experience
y = [Link]([50000, 55000, 65000, 80000, 110000, 150000, 200000]).reshape(-1, 1) #
Salary
degree = 2
poly = PolynomialFeatures(degree=degree)
X_poly = poly.fit_transform(X)
model = LinearRegression()
[Link](X_poly, y)
y_pred = [Link](X_poly)
[Link](X, y, color='blue', label='Original Data') # Scatter plot of actual data
[Link](X, y_pred, color='red', label=f'Polynomial Degree {degree}') # Polynomial curve
[Link]("Years of Experience")
[Link]("Salary (in dollars)")
[Link]("Polynomial Regression")
[Link]()
[Link]()
print("Model Coefficients:", model.coef_)
print("Model Intercept:", model.intercept_)
years_of_experience = [Link]([[8]])
predicted_salary = [Link]([Link](years_of_experience))
print(f"Predicted Salary for 8 years of experience: ${predicted_salary[0][0]:,.2f}")
Output:

Visual plot diagram:


[Link] classifier
from [Link] import load_iris
from [Link] import BaggingClassifier
from [Link] import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from [Link] import accuracy_score
iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split([Link], [Link], test_size=0.2,
random_state=42)
base_estimator = DecisionTreeClassifier(max_depth=3)
bagging = BaggingClassifier(estimator=base_estimator, n_estimators=10, random_state=42)
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
Output:
[Link] the Adaboost Methods
import pandas as pd
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import AdaBoostClassifier
from [Link] import accuracy_score
iris = load_iris()
iris_data = [Link]([Link], columns=iris.feature_names)
iris_data['target'] = [Link]
print("Iris Dataset:")
print(iris_data.head())
X = [Link]
y = [Link]
X_train, X_val, Y_train, Y_val = train_test_split(X, y, test_size=0.25, random_state=28)
adb = AdaBoostClassifier(algorithm='SAMME', n_estimators=50, learning_rate=1)
adb_model = [Link](X_train, Y_train)
y_pred = adb_model.predict(X_val)
accuracy = accuracy_score(Y_val, y_pred)
print(f"Accuracy on the validation set: {accuracy:.4f}")
Output:
[Link] Logistic Regression algorithm.
import numpy
from sklearn import linear_model
X = [Link]([3.78, 2.44, 2.09, 0.14, 1.72, 1.65, 4.92, 4.37, 4.96, 4.52, 3.69,
5.88]).reshape(-1, 1)
y = [Link]([0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1])
logr = linear_model.LogisticRegression()
[Link](X, y)
predicted = [Link]([Link]([3.46]).reshape(-1, 1))
if predicted[0] == 1:
print("The tumor with a size of 3.46mm is predicted to be cancerous.")
else:
print("The tumor with a size of 3.46mm is predicted to be non-cancerous.")
Output:
6. Decision Tree Classification
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import DecisionTreeClassifier, plot_tree
from sklearn import metrics
from [Link] import LabelEncoder
import [Link] as plt
data = {
'Age': [36, 42, 23, 52, 43, 44, 66, 35, 52, 35, 24, 18, 45],
'Experience': [10, 12, 4, 4, 21, 14, 3, 14, 13, 5, 3, 3, 9],
'Rank': [9, 4, 6, 4, 8, 5, 7, 9, 7, 9, 5, 7, 9],
'Nationality': ['UK', 'USA', 'N', 'USA', 'USA', 'UK', 'N', 'UK', 'N', 'N', 'USA', 'UK', 'UK'],
'Go': ['NO', 'NO', 'NO', 'NO', 'YES', 'NO', 'YES', 'YES', 'YES', 'YES', 'NO', 'YES', 'YES']
}
df = [Link](data)
print("Dataset:")
print(df)
le_nationality = LabelEncoder()
le_go = LabelEncoder()
df['Nationality'] = le_nationality.fit_transform(df['Nationality'])
df['Go'] = le_go.fit_transform(df['Go'])
X = [Link]('Go', axis=1)
y = df['Go']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
dtree = DecisionTreeClassifier(random_state=42)
[Link](X_train, y_train)
y_pred = [Link](X_test)
print("\nAccuracy on the test set:", metrics.accuracy_score(y_test, y_pred))
[Link](figsize=(12,8))
plot_tree(dtree, filled=True, feature_names=[Link], class_names=le_go.classes_,
rounded=True, proportion=True)
[Link]()
Output:
[Link] the Random Forest Classification
import pandas as pd
import seaborn as sns
import [Link] as plt
from sklearn.model_selection import train_test_split
from [Link] import LabelEncoder
from [Link] import RandomForestClassifier
from [Link] import classification_report, confusion_matrix
df = pd.read_csv("C:/Users/Admin/Downloads/[Link]").drop(['Cabin', 'PassengerId',
'Name', 'Ticket'], axis=1)
[Link](0, inplace=True)
le = LabelEncoder()
df['Sex'] = le.fit_transform(df['Sex'])
df['Embarked'] = le.fit_transform(df['Embarked'].astype(str))
X, y = [Link]('Survived', axis=1), df['Survived']
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42)
clf = RandomForestClassifier(n_estimators=100)
[Link](X_train, y_train)
y_pred = [Link](X_test)
print("Predictions:", y_pred)
print("Accuracy:", [Link](X_test, y_test))
print("Classification Report:\n", classification_report(y_test, y_pred))
cm = confusion_matrix(y_test, y_pred)
[Link](cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not Survived', 'Survived'],
yticklabels=['Not Survived', 'Survived'])
[Link]('Confusion Matrix')
[Link]('Predicted')
[Link]('Actual')
[Link]()
[Link](x=clf.feature_importances_, y=[Link])
[Link]('Feature Importance')
[Link]('Importance')
[Link]('Features')
[Link]()
Output:
[Link] the SVM Classification

import pandas as pd
import math
from [Link] import SVC
from [Link] import accuracy_score
heart = pd.read_csv("C:/Users/Admin/Downloads/heart_disease.csv") # Ensure correct file
path
nrows = [Link]([Link][0] * 0.8)
training = [Link][:nrows]
test = [Link][nrows:]
model1 = SVC()
[Link](training[["age", "chol"]], training["present"])
predictions1 = [Link](test[["age", "chol"]])
accuracy1 = sum(test["present"] == predictions1) / [Link][0]
print(f"Model 1 (using 'age' and 'chol') accuracy: {accuracy1}")
model2 = SVC()
[Link](training[["age", "chol", "thalach"]], training["present"])
predictions2 = [Link](test[["age", "chol", "thalach"]])
accuracy2 = sum(test["present"] == predictions2) / [Link][0]
print(f"Model 2 (using 'age', 'chol', and 'thalach') accuracy: {accuracy2}")
Output:
9.K Means Clustering
import numpy as np
import [Link] as plt
from [Link] import make_blobs
X, y = make_blobs(n_samples=500, n_features=2, centers=3, random_state=23)
k=3
[Link](23)
clusters = {i: {'center': 2 * (2 * [Link](([Link][1],)) - 1), 'points': []} for i in
range(k)}
def distance(p1, p2):
return [Link]([Link]((p1 - p2) ** 2))
def assign_clusters(X, clusters):
for idx in range([Link][0]):
dist = [distance(X[idx], clusters[i]['center']) for i in range(k)]
clusters[[Link](dist)]['points'].append(X[idx])
return clusters
def update_clusters(clusters):
for i in range(k):
points = [Link](clusters[i]['points'])
if [Link][0] > 0:
clusters[i]['center'] = [Link](axis=0)
clusters[i]['points'] = []
return clusters
iterations = 10
for _ in range(iterations):
clusters = assign_clusters(X, clusters)
clusters = update_clusters(clusters)
pred = [[Link]([distance(x, clusters[i]['center']) for i in range(k)]) for x in X]
fig, ax = [Link](1, 3, figsize=(15, 5))

ax[0].scatter(X[:, 0], X[:, 1])


for i in range(k):
ax[0].scatter(clusters[i]['center'][0], clusters[i]['center'][1], marker='*', c='red')
ax[0].set_title('Initial Centers')
ax[1].scatter(X[:, 0], X[:, 1], c=pred)
for i in range(k):
ax[1].scatter(clusters[i]['center'][0], clusters[i]['center'][1], marker='*', c='red')
ax[1].set_title('After K-Means Iteration')
ax[2].scatter(X[:, 0], X[:, 1], c=pred)
for i in range(k):
ax[2].scatter(clusters[i]['center'][0], clusters[i]['center'][1], marker='^', c='red')
ax[2].set_title('Final Clustering')
plt.tight_layout()
[Link]()
Output
[Link] based Clustering
import numpy as np
from [Link] import DBSCAN
from sklearn import metrics
from [Link] import make_blobs
from [Link] import StandardScaler
import [Link] as plt
centers = [[1, 1], [-1, -1], [1, -1]]
X, labels_true = make_blobs(n_samples=750, centers=centers, cluster_std=0.4,
random_state=0)
X = StandardScaler().fit_transform(X)
db = DBSCAN(eps=0.3, min_samples=10).fit(X)
core_samples_mask = np.zeros_like(db.labels_, dtype=bool)
core_samples_mask[db.core_sample_indices_] = True
labels = db.labels_
n_clusters_ = len(set(labels)) - (1 if -1 in labels else 0)
n_noise_ = list(labels).count(-1)
print('Estimated number of clusters: %d' % n_clusters_)
print('Estimated number of noise points: %d' % n_noise_)
print("Homogeneity: %0.3f" % metrics.homogeneity_score(labels_true, labels))
print("Completeness: %0.3f" % metrics.completeness_score(labels_true, labels))
print("V-measure: %0.3f" % metrics.v_measure_score(labels_true, labels))
print("Adjusted Rand Index: %0.3f"
% metrics.adjusted_rand_score(labels_true, labels))
print("Adjusted Mutual Information: %0.3f"
% metrics.adjusted_mutual_info_score(labels_true, labels))
print("Silhouette Coefficient: %0.3f"
% metrics.silhouette_score(X, labels))

unique_labels = set(labels)
colors = [[Link](each) for each in [Link](0, 1, len(unique_labels))]
for k, col in zip(unique_labels, colors):
if k == -1:
col = [0, 0, 0, 1] # Assign black color to noise
class_member_mask = (labels == k)
xy = X[class_member_mask & core_samples_mask]
[Link](xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=14)
xy = X[class_member_mask & ~core_samples_mask]
[Link](xy[:, 0], xy[:, 1], 'o', markerfacecolor=tuple(col),
markeredgecolor='k', markersize=6)

[Link]('Estimated number of clusters: %d' % n_clusters_)


[Link]()
Output
[Link] algorithm for market basket analysis
import pandas as pd
import numpy as np
import [Link] as plt
import seaborn as sns # Import seaborn
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from [Link] import TransactionEncoder
basket = pd.read_csv("C:/Users/Admin/Downloads/Groceries_dataset.csv")
print([Link]())
basket = [Link](['Member_number',
'Date'])['itemDescription'].apply(list).reset_index()
te = TransactionEncoder()
transactions = te.fit_transform(basket['itemDescription'])
transactions_df = [Link](transactions, columns=te.columns_)
print(transactions_df.head())
frequent_itemsets = apriori(transactions_df, min_support=6/len(basket), use_colnames=True,
max_len=2)
try:
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.5)
except TypeError as e:
print(f"Error: {e}")
print("Attempting to add 'num_itemsets=2' to workaround the issue.")
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.5,
num_itemsets=2)
print([Link]())
print("Rules identified: ", len(rules))
[Link](style="whitegrid")
fig = [Link](figsize=(12, 12))
ax = fig.add_subplot(projection='3d')
x = rules['support']
y = rules['confidence']
z = rules['lift']
ax.set_xlabel("Support")
ax.set_ylabel("Confidence")
ax.set_zlabel("Lift")
[Link](x, y, z)
ax.set_title("3D Distribution of Association Rules") [Link]()
Output:
12. Supervised Machine Learning algorithms.
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import load_iris
from [Link] import StandardScaler
from sklearn.linear_model import LogisticRegression
from [Link] import KNeighborsClassifier
from [Link] import SVC
from [Link] import DecisionTreeClassifier
from [Link] import RandomForestClassifier
from [Link] import accuracy_score, classification_report, confusion_matrix
iris = load_iris()
X = [Link]
y = [Link]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = [Link](X_test)
classifiers = {
"Logistic Regression": LogisticRegression(),
"K-Nearest Neighbors (KNN)": KNeighborsClassifier(),
"Support Vector Machine (SVM)": SVC(),
"Decision Tree": DecisionTreeClassifier(),
"Random Forest": RandomForestClassifier()
}
results = {}
for name, clf in [Link]():
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
# Store the results
results[name] = {
"Accuracy": accuracy,
"Classification Report": classification_report(y_test, y_pred),
"Confusion Matrix": confusion_matrix(y_test, y_pred)
}

for name, result in [Link]():


print(f"Model: {name}")
print(f"Accuracy: {result['Accuracy']:.4f}")
print(f"Classification Report:\n{result['Classification Report']}")
print(f"Confusion Matrix:\n{result['Confusion Matrix']}")
print("-" * 50)
Output

You might also like