# --- Program 1: FIND-S Algorithm ---
# Filename: find_s.py
"""
Implements the FIND-S algorithm to find the most specific hypothesis
that fits the given training data.
"""
import csv
print("Data in csv file is:")
data = []
with open("[Link]", "r") as fp: # Corrected file extension
read = [Link](fp)
for r in read:
[Link](r)
print(r)
print("\n")
n = len(data[0]) - 1
print(" No. of attributes are:", n)
print("Initial Hypothesis: ")
hypothesis = ['0'] * n
print(hypothesis)
for i in range(0, len(data)):
if (data[i][n] == 'yes'):
for j in range(0, n):
hypothesis[j] = data[i][j]
break
print(hypothesis)
print("\n")
print("After every iteration: ")
for i in range(0, len(data)):
if (data[i][n] == 'yes'):
for j in range(0, n):
if (hypothesis[j] != data[i][j]):
hypothesis[j] = '?'
print(hypothesis)
print("\n")
print("Final Hypothesis:")
print(hypothesis)
# --- Program 2: Candidate Elimination Algorithm ---
# Filename: candidate_elimination.py
"""
Implements the Candidate Elimination Algorithm to find the most general
and most specific hypotheses that are consistent with the training data.
"""
import numpy as np
import pandas as pd
data = pd.read_csv('[Link]') # Corrected CSV name
concepts = [Link](data)[:, :-1]
print("Instances are:\n", concepts)
target = [Link](data)[:, -1]
def learn(concepts, target):
specific_h = concepts[0].copy()
print("\nInitialization of specific_h & general_h")
print("Specific boundary:\n", specific_h)
general_h = [["?" for i in range(len(specific_h))] for i in range(len(specific_h))]
print("General boundary:\n", general_h)
for i, h in enumerate(concepts):
print("\nInstance", i+1, "is", h)
if target[i] == "yes":
print("Instance is positive")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
specific_h[x] = '?'
general_h[x][x] = '?'
elif target[i] == "no":
print("Instance is negative")
for x in range(len(specific_h)):
if h[x] != specific_h[x]:
general_h[x][x] = specific_h[x]
else:
general_h[x][x] = '?'
print("Specific boundary after", i+1, "instance:\n", specific_h)
print("General boundary after", i+1, "instance:\n", general_h)
print("\n")
indices = [i for i, val in enumerate(general_h) if val == ['?', '?', '?', '?', '?']] #
Adjusted to 5 question marks
for i in indices:
general_h.remove(['?', '?', '?', '?', '?']) #Adjusted to 5 question marks
return specific_h, general_h
s_final, g_final = learn(concepts, target)
print("Final specific-h:\n", s_final)
print("Final general_h:\n", g_final)
# --- Program 3: Decision Tree ---
# Filename: decision_tree.py
"""
Implements a simple decision tree using scikit-learn.
"""
from [Link] import DecisionTreeClassifier
import numpy as np
X = [Link]([[1, 1, 1],
[1, 0, 1],
[0, 1, 0],
[0, 0, 1],
[1, 1, 0]])
y = [Link]([1, 1, 0, 0, 1])
tree = DecisionTreeClassifier(criterion='entropy') # Using criterion entropy
[Link](X, y)
new_sample = [Link]([[1, 0, 1]])
predicted_class = [Link](new_sample)
print("Predicted class:", predicted_class[0])
# --- Program 4: Linear Regression ---
# Filename: linear_regression.py
"""
Performs linear regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from [Link] import mean_squared_error
from sklearn.linear_model import LinearRegression
import [Link] as plt
import numpy as np
data = pd.read_csv("[Link]") # Changed filename to [Link]
print(data)
diameter = data['diameter'].[Link](-1, 1)
price = data['price'].values
model = LinearRegression()
[Link](diameter, price)
intercept = model.intercept_
slope = model.coef_[0]
print("Intercept:", intercept)
print("Slope:", slope)
predictions = [Link](diameter)
mse = [Link]((predictions - price) ** 2)
print("Mean squared Error:", mse)
[Link](diameter, price, color='blue')
[Link](diameter, predictions, color='red')
[Link]('Diameter')
[Link]('Price')
[Link]("Linear Regression")
[Link]()
new_diameter = [Link]([20]).reshape(-1, 1) # Corrected: Reshape the input
future_price = [Link](new_diameter)
print("Predicted price for a diameter of 20:", future_price[0])
# --- Program 5: Logistic Regression ---
# Filename: logistic_regression.py
"""
Performs logistic regression on a given dataset using scikit-learn.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score
data = pd.read_csv('[Link]')
print(data)
X = data[['studyhours']]
y = data['examresult']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
model = LogisticRegression()
[Link](X_train, y_train)
predictions = [Link](X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy:", accuracy)
new_data = [Link]({'studyhours': [1, 7, 9]})
new_predictions = [Link](new_data)
print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: predicted Result: {}".format(i+1, prediction))
# --- Program 6: Binary classifier (Likely another Logistic Regression example,
slightly different) ---
# Filename: binary_classifier.py
"""
Performs binary classification (using Logistic Regression) on a dataset.
"""
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from [Link] import accuracy_score
data = pd.read_csv('[Link]')
print(data)
X = data[['studyhours']]
Y = data['examresult'] # Corrected target variable name
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2,
random_state=42) # Corrected test_size
model = LogisticRegression()
[Link](X_train, y_train)
predictions = [Link](X_test)
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: ", accuracy)
new_data = [Link]({'studyhours': [1, 7, 9]})
new_predictions = [Link](new_data)
print("New predictions:")
for i, prediction in enumerate(new_predictions):
print("Instance {}: Predicted Result {}".format(i+1, prediction))
# --- Program 7: Bias, Variance, Cross-Validation ---
# Filename: bias_variance_cv.py
"""
Demonstrates how to calculate bias and variance using cross-validation
for a linear regression model.
"""
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LinearRegression, LogisticRegression #Added
Logistic Regression import
from statistics import mean, stdev
data = pd.read_csv("[Link]")
X_set = [Link]('quality', axis=1) # Corrected to X_set
y_set = data['quality']
# Linear Regression
model = LinearRegression()
scores = cross_val_score(model, X_set, y_set, cv=10)
print("Linear Regression Scores:", scores)
print("Linear Regression Bias (Mean):", mean(scores))
print("Linear Regression Variance (StDev):", stdev(scores))
# K-fold Cross Validation (K-list wasn't used correctly in the images)
# Here's a correct way to show how bias and variance change with different folds
k_values = [2, 5, 10, 20] # Example values for K
bias_scores = []
variance_scores = []
for k in k_values:
model = LinearRegression() # Create new model for each K
scores = cross_val_score(model, X_set, y_set, cv=k)
bias_scores.append(mean(scores))
variance_scores.append(stdev(scores))
print("\nLinear Regression K-Fold Validation Results:")
for i in range(len(k_values)):
print(f"K={k_values[i]}: Bias={bias_scores[i]:.4f}, Variance={variance_scores[i]:.4f}")
# --- Program 8: K-Nearest Neighbors (KNN) ---
# Filename: [Link]
"""
Implements the K-Nearest Neighbors algorithm on the Iris dataset.
"""
from [Link] import load_iris
from [Link] import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from [Link] import accuracy_score
iris = load_iris()
X = [Link]
y = [Link]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=0)
knn = KNeighborsClassifier()
[Link](X_train, y_train)
y_pred = [Link](X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Correct predictions:")
for i in range(len(y_test)):
if y_test[i] == y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-Predicted label:",
iris.target_names[y_pred[i]])
print("\nWrong Predictions:")
for i in range(len(y_test)):
if y_test[i] != y_pred[i]:
print("True label:", iris.target_names[y_test[i]], "-predicted label:",
iris.target_names[y_pred[i]])
# --- Program 9: Locally Weighted Regression ---
# Filename: locally_weighted_regression.py
"""
Implements Locally Weighted Regression.
"""
import numpy as np
import [Link] as plt
def lwr(x_train, y_train, x_test, tau):
y_pred = [Link](len(x_test))
for i, test_point in enumerate(x_test):
weights = [Link](-((x_train - test_point) ** 2) / (2 * tau ** 2))
X = [Link]([np.ones_like(x_train), x_train]).T
W = [Link](weights)
try:
theta = [Link](X.T @ W @ X) @ (X.T @ (W @ y_train))
y_pred[i] = [Link]([1, test_point]) @ theta
except [Link]:
print("Singular matrix encountered. Adjusting tau or data may be needed.")
y_pred[i] = 0 # or some other default value
return y_pred
[Link](42)
x_train = [Link](0, 10, 100)
y_train = 2 * [Link](x_train) + [Link](0, 0.2, 100)
x_test = [Link](0, 10, 50)
tau = 0.1 # Corrected tau value (0.01 was likely too small)
y_pred = lwr(x_train, y_train, x_test, tau)
[Link](x_train, y_train, color='blue')
[Link](x_test, y_pred, color='red')
[Link]("Locally Weighted Regression")
[Link]('x')
[Link]('y')
[Link]()
# --- Program 10: Naive Bayes ---
# Filename: naive_bayes.py
"""
Implements Naive Bayes classification on the Iris dataset.
"""
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from [Link] import accuracy_score, precision_score
iris = load_iris()
X, y = [Link], [Link]
target_names = iris.target_names
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,
random_state=42)
nb_classifier = GaussianNB()
nb_classifier.fit(X_train, y_train)
y_pred = nb_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
print('Accuracy:', accuracy)
print("Precision:", precision)
# predict for new values
new_data = [[5.1, 3.5, 1.4, 0.2], [6.9, 3.2, 5.7, 2.3]] # Example Data
new_prediction = nb_classifier.predict(new_data)
print("New predictions:", [target_names[prediction] for prediction in new_prediction])
# --- Program 11: EM and K-Means Clustering ---
# Filename: em_kmeans.py
"""
Compares EM (Gaussian Mixture Model) and K-Means clustering on
a heart disease dataset.
"""
import pandas as pd
from [Link] import KMeans
from [Link] import GaussianMixture
from [Link] import StandardScaler
import numpy as np
import [Link] as plt
# Load the data, handling potential errors
try:
data = pd.read_csv("[Link]") # Make sure '[Link]' is in the same directory
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed. Check the format.")
exit()
features = ['trestbps', 'chol'] # Features for clustering
X = data[features]
# Scale the data
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
# K-Means Clustering
kmeans = KMeans(n_clusters=2, random_state=42, n_init=10)
[Link](X_scaled)
kmeans_labels = kmeans.labels_
# EM Clustering (Gaussian Mixture Model)
em = GaussianMixture(n_components=2, random_state=42)
[Link](X_scaled)
em_labels = [Link](X_scaled)
# Create a colormap
colormap = [Link](['red', 'green'])
# Create the plot
[Link](figsize=(14, 5))
# Original Data
[Link](1, 3, 1)
[Link](X['trestbps'], X['chol'], c=colormap[data['target']], s=40)
[Link]('Original Data')
[Link]('trestbps')
[Link]('chol')
# K-Means Clustering
[Link](1, 3, 2)
[Link](X['trestbps'], X['chol'], c=colormap[kmeans_labels], s=40)
[Link]('K-Means Clustering')
[Link]('trestbps')
[Link]('chol')
# EM Clustering
[Link](1, 3, 3)
[Link](X['trestbps'], X['chol'], c=colormap[em_labels], s=40)
[Link]('EM Clustering')
[Link]('trestbps')
[Link]('chol')
[Link]()
# --- Program 12: Exploratory Data Analysis (EDA) ---
# Filename: exploratory_data_analysis.py
"""
Performs Exploratory Data Analysis on the Iris dataset using pandas and matplotlib.
"""
import numpy as np
import pandas as pd
import [Link] as plt
import seaborn as sns
# Load data, with error handling
try:
df = pd.read_csv('[Link]') # Change filename to [Link]
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed.")
exit()
print("Exploratory data analysis of iris dataset:\n")
print("First few rows:\n", [Link]())
print("\nData information:\n", [Link]())
print("\nMissing values:\n", [Link]().sum())
print("\nColumn names:\n", [Link])
print("\nValue counts for 'species':\n", df['species'].value_counts())
print("\nData types of each column:\n", [Link])
print("\nCorrelation matrix:\n", [Link](numeric_only=True)) # numeric_only added
# --- Program 13: Bayesian Network ---
# Filename: bayesian_network.py
"""
Constructs a Bayesian network for diagnosing heart disease using pgmpy.
"""
import numpy as np
import pandas as pd
from [Link] import BayesianModel
from [Link] import MaximumLikelihoodEstimator
from [Link] import VariableElimination
# Load the data, handling possible errors
try:
heartDisease = pd.read_csv('[Link]') # Replace with correct path
except FileNotFoundError:
print("Error: The file '[Link]' was not found.")
exit()
except [Link]:
print("Error: The file '[Link]' is empty.")
exit()
except [Link]:
print("Error: The file '[Link]' could not be parsed.")
exit()
heartDisease = [Link]('?', [Link])
print("Few examples from dataset are:")
print([Link]())
model = BayesianModel([('age', 'trestbps'), ('age', 'fbs'), ('sex', 'trestbps'),
('exang', 'trestbps'), ('trestbps', 'heartdisease'), ('fbs', 'heartdisease'),
('heartdisease', 'restecg'), ('heartdisease', 'thalach'), ('heartdisease',
'chol')])
print("\nLearning CPD using Maximum Likelihood Estimators")
[Link](heartDisease, estimator=MaximumLikelihoodEstimator)
print("\nInferencing with Bayesian Network!")
HeartDisease_infer = VariableElimination(model)
print("\n1. Probability of Heart Disease given age = 30:")
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'age': 30})
print(q['heartdisease'])
print("\n2. Probability of Heart Disease given cholesterol = 100:")
q = HeartDisease_infer.query(variables=['heartdisease'], evidence={'chol': 100})
print(q['heartdisease'])
# --- Program 14: Support Vector Machine (SVM) ---
# Filename: svm_classification.py
"""
Implements Support Vector Machine (SVM) classification on the Iris dataset.
"""
import numpy as np
import [Link] as plt
from [Link] import load_iris
from sklearn.model_selection import train_test_split
from [Link] import StandardScaler
from [Link] import SVC
from [Link] import accuracy_score
# Load data
iris = load_iris()
X = [Link][:, :2] # Use only the first two features for visualization
y = [Link]
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25,
random_state=0)
# Scale data
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = [Link](X_test)
# Train SVM classifier
svm_classifier = SVC(kernel='linear', C=1.0, random_state=0)
svm_classifier.fit(X_train, y_train)
# Make predictions
y_pred_train = svm_classifier.predict(X_train)
y_pred_test = svm_classifier.predict(X_test)
# Calculate accuracy
accuracy_train = accuracy_score(y_train, y_pred_train)
accuracy_test = accuracy_score(y_test, y_pred_test)
print("Training accuracy:", accuracy_train)
print("Testing accuracy:", accuracy_test)
# Plot decision boundary (adapted from example)
def plot_decision_boundary(classifier, X, y):
x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
xx, yy = [Link]([Link](x_min, x_max, 0.02),
[Link](y_min, y_max, 0.02))
Z = [Link](np.c_[[Link](), [Link]()])
Z = [Link]([Link])
[Link](xx, yy, Z, alpha=0.4)
[Link](X[:, 0], X[:, 1], c=y, marker='o', edgecolors='k')
[Link]("Feature 1")
[Link]("Feature 2")
[Link]("Decision Boundary")
[Link]()
plot_decision_boundary(svm_classifier, X_train, y_train)
# --- Program 15: Principal Component Analysis (PCA) ---
# Filename: pca_analysis.py
"""
Demonstrates Principal Component Analysis (PCA) using scikit-learn.
"""
import numpy as np
import [Link] as plt
from [Link] import load_iris
from [Link] import PCA
# Example 1 (Simple array)
X = [Link]([[1, 2, 3],
[4, 5, 6],
[7, 8, 9]])
pca = PCA(n_components=2)
[Link](X)
X_transformed = [Link](X)
print("Original data:\n", X)
print("\nTransformed data:\n", X_transformed)
# Example 2 (Iris dataset)
iris = load_iris()
X = [Link]
y = [Link]
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X)
[Link](figsize=(8, 6))
for i in range(len(iris.target_names)):
[Link](X_pca[y == i, 0], X_pca[y == i, 1], label=iris.target_names[i])
[Link]('Principal Component 1')
[Link]('Principal Component 2')
[Link]('PCA of Iris Dataset')
[Link]()
[Link]()