0% found this document useful (0 votes)
30 views14 pages

ML Lab Works

The document contains multiple Python programs implementing various machine learning algorithms including FIND-S, Candidate Elimination, Decision Trees, Linear Regression, Logistic Regression, K-Nearest Neighbors, and more. Each program is designed to perform specific tasks such as classification, regression, clustering, and exploratory data analysis using libraries like scikit-learn and pandas. Additionally, the document addresses error handling for file operations and provides visualizations for certain algorithms.

Uploaded by

Reddy. Veeraiah
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
30 views14 pages

ML Lab Works

The document contains multiple Python programs implementing various machine learning algorithms including FIND-S, Candidate Elimination, Decision Trees, Linear Regression, Logistic Regression, K-Nearest Neighbors, and more. Each program is designed to perform specific tasks such as classification, regression, clustering, and exploratory data analysis using libraries like scikit-learn and pandas. Additionally, the document addresses error handling for file operations and provides visualizations for certain algorithms.

Uploaded by

Reddy. Veeraiah
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd

# --- Program 1: FIND-S Algorithm ---

# Filename: find_s.py
"""
Implements the FIND-S algorithm to find the most specific hypothesis
that fits the given training data.
"""
import csv

print("Data in csv file is:")


data = []
with open("[Link]", "r") as fp: # Corrected file extension
read = [Link](fp)
for r in read:
[Link](r)
print(r)

print("\n")
n = len(data[0]) - 1
print(" No. of attributes are:", n)
print("Initial Hypothesis: ")
hypothesis = ['0'] * n
print(hypothesis)

for i in range(0, len(data)):


if (data[i][n] == 'yes'):
for j in range(0, n):
hypothesis[j] = data[i][j]
break

print(hypothesis)
print("\n")
print("After every iteration: ")
for i in range(0, len(data)):
if (data[i][n] == 'yes'):
for j in range(0, n):
if (hypothesis[j] != data[i][j]):
hypothesis[j] = '?'
print(hypothesis)

print("\n")
print("Final Hypothesis:")
print(hypothesis)

# --- Program 2: Candidate Elimination Algorithm ---


# Filename: candidate_elimination.py
"""
Implements the Candidate Elimination Algorithm to find the most general
and most specific hypotheses that are consistent with the training data.
"""
import numpy as np
import pandas as pd

data = pd.read_csv('[Link]') # Corrected CSV name


concepts = [Link](data)[:, :-1]
print("Instances are:\n", concepts)
target = [Link](data)[:, -1]

def learn(concepts, target):


specific_h = concepts[0].copy()
print("\nInitialization of specific_h & general_h")
print("Specific boundary:\n", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("General boundary:\n", general_h)

for i, h in enumerate(concepts):
print("\nInstance", i+1, "is", h)
if target[i] == "yes":
print("Instance is positive")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
elif target[i] == "no":
print("Instance is negative")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'

print("Specific boundary after", i+1, "instance:\n", specific_h)


print("General boundary after", i+1, "instance:\n", general_h)
print("\n")

indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?']] #
Adjusted to 5 question marks
for i in indices:
general_h.remove(['?', '?', '?', '?', '?']) #Adjusted to 5 question marks
return specific_h, general_h

s_final, g_final = learn(concepts, target)


print("Final specific-h:\n", s_final)
print("Final general_h:\n", g_final)

# --- Program 3: Decision Tree ---


# Filename: decision_tree.py
"""
Implements a simple decision tree using scikit-learn.
"""
from [Link] import DecisionTreeClassifier
import numpy as np

X = [Link]([[1, 1, 1],
[1, 0, 1],
[0, 1, 0],
[0, 0, 1],
[1, 1, 0]])

y = [Link]([1, 1, 0, 0, 1])

tree = DecisionTreeClassifier(criterion='entropy') # Using criterion entropy

[Link](X, y)

new_sample = [Link]([[1, 0, 1]])


predicted_class = [Link](new_sample)
print("Predicted class:", predicted_class[0])

# --- Program 4: Linear Regression ---


# Filename: linear_regression.py
"""
Performs linear regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error
from sklearn.linear_model import LinearRegression
import [Link] as plt
import numpy as np

data = pd.read_csv("[Link]") # Changed filename to [Link]


print(data)

diameter = data['diameter'].[Link](-1, 1)
price = data['price'].values

model = LinearRegression()
[Link](diameter, price)

intercept = model.intercept_
slope = model.coef_[0]

print("Intercept:", intercept)
print("Slope:", slope)
predictions = [Link](diameter)

mse = [Link]((predictions - price) ** 2)


print("Mean squared Error:", mse)

[Link](diameter, price, color='blue')


[Link](diameter, predictions, color='red')
[Link]('Diameter')
[Link]('Price')
[Link]("Linear Regression")
[Link]()

new_diameter = [Link]([20]).reshape(-1, 1) # Corrected: Reshape the input


future_price = [Link](new_diameter)
print("Predicted price for a diameter of 20:", future_price[0])

# --- Program 5: Logistic Regression ---


# Filename: logistic_regression.py
"""
Performs logistic regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score

data = pd.read_csv('[Link]')
print(data)

X = data[['studyhours']]
y = data['examresult']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)

model = LogisticRegression()
[Link](X_train, y_train)

predictions = [Link](X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)

new_data = [Link]({'studyhours': [1, 7, 9]})


new_predictions = [Link](new_data)

print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: predicted Result: {}".format(i+1, prediction))

# --- Program 6: Binary classifier (Likely another Logistic Regression example,


slightly different) ---
# Filename: binary_classifier.py
"""
Performs binary classification (using Logistic Regression) on a dataset.
"""

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score

data = pd.read_csv('[Link]')
print(data)

X = data[['studyhours']]
Y = data['examresult'] # Corrected target variable name

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,


random_state=42) # Corrected test_size

model = LogisticRegression()
[Link](X_train, y_train)

predictions = [Link](X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: ", accuracy)

new_data = [Link]({'studyhours': [1, 7, 9]})


new_predictions = [Link](new_data)

print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: Predicted Result {}".format(i+1, prediction))

# --- Program 7: Bias, Variance, Cross-Validation ---


# Filename: bias_variance_cv.py
"""
Demonstrates how to calculate bias and variance using cross-validation
for a linear regression model.
"""

import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression #Added
Logistic Regression import
from statistics import mean, stdev

data = pd.read_csv("[Link]")
X_set = [Link]('quality', axis=1) # Corrected to X_set
y_set = data['quality']

# Linear Regression
model = LinearRegression()
scores = cross_val_score(model, X_set, y_set, cv=10)
print("Linear Regression Scores:", scores)
print("Linear Regression Bias (Mean):", mean(scores))
print("Linear Regression Variance (StDev):", stdev(scores))

# K-fold Cross Validation (K-list wasn't used correctly in the images)


# Here's a correct way to show how bias and variance change with different folds
k_values = [2, 5, 10, 20] # Example values for K
bias_scores = []
variance_scores = []

for k in k_values:
model = LinearRegression() # Create new model for each K

scores = cross_val_score(model, X_set, y_set, cv=k)


bias_scores.append(mean(scores))
variance_scores.append(stdev(scores))

print("\nLinear Regression K-Fold Validation Results:")


for i in range(len(k_values)):
print(f"K={k_values[i]}: Bias={bias_scores[i]:.4f}, Variance={variance_scores[i]:.4f}")

# --- Program 8: K-Nearest Neighbors (KNN) ---


# Filename: [Link]
"""
Implements the K-Nearest Neighbors algorithm on the Iris dataset.
"""
from [Link] import load_iris
from [Link] import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from [Link] import accuracy_score

iris = load_iris()
X = [Link]
y = [Link]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,


random_state=0)
knn = KNeighborsClassifier()
[Link](X_train, y_train)
y_pred = [Link](X_test)

accuracy = accuracy_score(y_test, y_pred)


print("Accuracy:", accuracy)

print("Correct predictions:")
for i in range(len(y_test)):
if y_test[i] == y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-Predicted label:",
iris.target_names[y_pred[i]])

print("\nWrong Predictions:")
for i in range(len(y_test)):
if y_test[i] != y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-predicted label:",
iris.target_names[y_pred[i]])

# --- Program 9: Locally Weighted Regression ---


# Filename: locally_weighted_regression.py
"""
Implements Locally Weighted Regression.
"""
import numpy as np
import [Link] as plt

def lwr(x_train, y_train, x_test, tau):


y_pred = [Link](len(x_test))
for i, test_point in enumerate(x_test):
weights = [Link](-((x_train - test_point) ** 2) / (2 * tau ** 2))
X = [Link]([np.ones_like(x_train), x_train]).T
W = [Link](weights)
try:
theta = [Link](X.T @ W @ X) @ (X.T @ (W @ y_train))
y_pred[i] = [Link]([1, test_point]) @ theta
except [Link]:
print("Singular matrix encountered. Adjusting tau or data may be needed.")
y_pred[i] = 0 # or some other default value
return y_pred

[Link](42)
x_train = [Link](0, 10, 100)
y_train = 2 * [Link](x_train) + [Link](0, 0.2, 100)
x_test = [Link](0, 10, 50)
tau = 0.1 # Corrected tau value (0.01 was likely too small)

y_pred = lwr(x_train, y_train, x_test, tau)


[Link](x_train, y_train, color='blue')
[Link](x_test, y_pred, color='red')
[Link]("Locally Weighted Regression")
[Link]('x')
[Link]('y')
[Link]()

# --- Program 10: Naive Bayes ---


# Filename: naive_bayes.py
"""
Implements Naive Bayes classification on the Iris dataset.
"""
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score, precision_score

iris = load_iris()
X, y = [Link], [Link]
target_names = iris.target_names

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,


random_state=42)

nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)


precision = precision_score(y_test, y_pred, average='weighted')

print('Accuracy:', accuracy)
print("Precision:", precision)

# predict for new values


new_data = [[5.1, 3.5, 1.4, 0.2], [6.9, 3.2, 5.7, 2.3]] # Example Data
new_prediction = nb_classifier.predict(new_data)

print("New predictions:", [target_names[prediction] for prediction in new_prediction])

# --- Program 11: EM and K-Means Clustering ---


# Filename: em_kmeans.py
"""
Compares EM (Gaussian Mixture Model) and K-Means clustering on
a heart disease dataset.
"""
import pandas as pd
from [Link] import KMeans
from [Link] import GaussianMixture
from [Link] import StandardScaler
import numpy as np
import [Link] as plt

# Load the data, handling potential errors


try:
data = pd.read_csv("[Link]") # Make sure '[Link]' is in the same directory
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed. Check the format.")
exit()

features = ['trestbps', 'chol'] # Features for clustering


X = data[features]

# Scale the data


scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# K-Means Clustering
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
[Link](X_scaled)
kmeans_labels = kmeans.labels_

# EM Clustering (Gaussian Mixture Model)


em = GaussianMixture(n_components=2, random_state=42)
[Link](X_scaled)
em_labels = [Link](X_scaled)

# Create a colormap
colormap = [Link](['red', 'green'])

# Create the plot


[Link](figsize=(14, 5))

# Original Data
[Link](1, 3, 1)
[Link](X['trestbps'], X['chol'], c=colormap[data['target']], s=40)
[Link]('Original Data')
[Link]('trestbps')
[Link]('chol')

# K-Means Clustering
[Link](1, 3, 2)
[Link](X['trestbps'], X['chol'], c=colormap[kmeans_labels], s=40)
[Link]('K-Means Clustering')
[Link]('trestbps')
[Link]('chol')

# EM Clustering
[Link](1, 3, 3)
[Link](X['trestbps'], X['chol'], c=colormap[em_labels], s=40)
[Link]('EM Clustering')
[Link]('trestbps')
[Link]('chol')

[Link]()

# --- Program 12: Exploratory Data Analysis (EDA) ---


# Filename: exploratory_data_analysis.py
"""
Performs Exploratory Data Analysis on the Iris dataset using pandas and matplotlib.
"""
import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns

# Load data, with error handling


try:
df = pd.read_csv('[Link]') # Change filename to [Link]
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed.")
exit()

print("Exploratory data analysis of iris dataset:\n")

print("First few rows:\n", [Link]())

print("\nData information:\n", [Link]())

print("\nMissing values:\n", [Link]().sum())


print("\nColumn names:\n", [Link])

print("\nValue counts for 'species':\n", df['species'].value_counts())

print("\nData types of each column:\n", [Link])

print("\nCorrelation matrix:\n", [Link](numeric_only=True)) # numeric_only added

# --- Program 13: Bayesian Network ---


# Filename: bayesian_network.py
"""
Constructs a Bayesian network for diagnosing heart disease using pgmpy.
"""
import numpy as np
import pandas as pd
from [Link] import BayesianModel
from [Link] import MaximumLikelihoodEstimator
from [Link] import VariableElimination

# Load the data, handling possible errors


try:
heartDisease = pd.read_csv('[Link]') # Replace with correct path
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed.")
exit()

heartDisease = [Link]('?', [Link])

print("Few examples from dataset are:")


print([Link]())

model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'),


('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease',
'chol')])

print("\nLearning CPD using Maximum Likelihood Estimators")


[Link](heartDisease, estimator=MaximumLikelihoodEstimator)

print("\nInferencing with Bayesian Network!")


HeartDisease_infer = VariableElimination(model)
print("\n1. Probability of Heart Disease given age = 30:")
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 30})
print(q['heartdisease'])

print("\n2. Probability of Heart Disease given cholesterol = 100:")


q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])

# --- Program 14: Support Vector Machine (SVM) ---


# Filename: svm_classification.py
"""
Implements Support Vector Machine (SVM) classification on the Iris dataset.
"""
import numpy as np
import [Link] as plt
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import SVC
from [Link] import accuracy_score

# Load data
iris = load_iris()
X = [Link][:, :2] # Use only the first two features for visualization
y = [Link]

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=0)

# Scale data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = [Link](X_test)

# Train SVM classifier


svm_classifier = SVC(kernel='linear', C=1.0, random_state=0)
svm_classifier.fit(X_train, y_train)

# Make predictions
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)

# Calculate accuracy
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Training accuracy:", accuracy_train)
print("Testing accuracy:", accuracy_test)

# Plot decision boundary (adapted from example)


def plot_decision_boundary(classifier, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = [Link]([Link](x_min, x_max, 0.02),
[Link](y_min, y_max, 0.02))

Z = [Link](np.c_[[Link](), [Link]()])
Z = [Link]([Link])

[Link](xx, yy, Z, alpha=0.4)


[Link](X[:, 0], X[:, 1], c=y, marker='o', edgecolors='k')
[Link]("Feature 1")
[Link]("Feature 2")
[Link]("Decision Boundary")
[Link]()

plot_decision_boundary(svm_classifier, X_train, y_train)

# --- Program 15: Principal Component Analysis (PCA) ---


# Filename: pca_analysis.py
"""
Demonstrates Principal Component Analysis (PCA) using scikit-learn.
"""
import numpy as np
import [Link] as plt
from [Link] import load_iris
from [Link] import PCA

# Example 1 (Simple array)


X = [Link]([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])

pca = PCA(n_components=2)
[Link](X)
X_transformed = [Link](X)

print("Original data:\n", X)
print("\nTransformed data:\n", X_transformed)

# Example 2 (Iris dataset)


iris = load_iris()
X = [Link]
y = [Link]
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)

[Link](figsize=(8, 6))
for i in range(len(iris.target_names)):
[Link](X_pca[y == i, 0], X_pca[y == i, 1], label=iris.target_names[i])

[Link]('Principal Component 1')


[Link]('Principal Component 2')
[Link]('PCA of Iris Dataset')
[Link]()
[Link]()

You might also like