ASSESSMENT-2
NAME - VAIBHAV DUTT TRIVEDI
REG. NO. – 22MIC0118
COURSE TITLE – MACHINE LEARNING
COURSE CODE – CSI 3026
LAB SLOT – L9+L10
CODE-
import pandas as pd
import numpy as np
def calculate_entropy(column):
values, counts = [Link](column, return_counts=True)
probabilities = counts / len(column)
return -[Link](probabilities * np.log2(probabilities))
def calculate_info_gain(data, feature, target):
total_entropy = calculate_entropy(data[target])
feature_values = data[feature].unique()
weighted_entropy = 0
for value in feature_values:
subset = data[data[feature] == value]
weight = len(subset) / len(data)
weighted_entropy += weight * calculate_entropy(subset[target])
return total_entropy - weighted_entropy
def build_tree(data, features, target):
if len([Link](data[target])) == 1:
return data[target].iloc[0]
if not features:
return data[target].mode()[0]
best_feature = max(features, key=lambda f: calculate_info_gain(data, f, target))
tree = {best_feature: {}}
remaining_features = [f for f in features if f != best_feature]
for value in [Link](data[best_feature]):
subset = data[data[best_feature] == value]
tree[best_feature][value] = build_tree(subset, remaining_features, target)
return tree
data = pd.read_csv("[Link]")
target = [Link][-1]
features = list([Link][:-1])
decision_tree = build_tree(data, features, target)
print("Decision Tree:", decision_tree)
CODE-
import pandas as pd
import numpy as np
import [Link] as plt
data = pd.read_csv('[Link]')
x = data['X']
y = data['Y']
n = len(x)
mean_x = [Link](x)
mean_y = [Link](y)
numerator = [Link]((x - mean_x) * (y - mean_y))
denom = [Link]((x - mean_x) ** 2)
slope = numerator / denom
intercept = mean_y - slope * mean_x
print("Slope (m):", slope)
print("Intercept (b):", intercept)
y_pred = slope * x + intercept
[Link](x, y, color='blue', label='Data Points')
[Link](x, y_pred, color='red', label='Regression Line')
[Link]('X')
[Link]('Y')
[Link]('Simple Linear Regression')
[Link]()
[Link]()
4
CODE-
import pandas as pd
import numpy as np
import [Link] as plt
def sigmoid(z):
return 1 / (1 + [Link](-z))
data = pd.read_csv('[Link]')
x = data['X']
y = data['Y']
n = len(x)
mean_x = [Link](x)
mean_y = [Link](y)
numerator = [Link]((x - mean_x) * (y - mean_y))
denom = [Link]((x - mean_x) ** 2)
slope = numerator / denom
intercept = mean_y - slope * mean_x
print("Slope (m):", slope)
print("Intercept (b):", intercept)
z = slope * x + intercept
y_pred = sigmoid(z)
[Link](x, y, color='blue', label='Data Points')
[Link](x, y_pred, color='red', label='Logistic Regression Curve')
[Link]('X')
[Link]('Probability')
[Link]('Logistic Regression')
[Link]()
[Link]()
CODE-
import pandas as pd
import numpy as np
def gini_impurity(column):
values, counts = [Link](column, return_counts=True)
probabilities = counts / len(column)
return 1 - [Link](probabilities ** 2)
def calculate_gini_index(data, feature, target):
feature_values = data[feature].unique()
weighted_gini = 0
for value in feature_values:
subset = data[data[feature] == value]
weight = len(subset) / len(data)
weighted_gini += weight * gini_impurity(subset[target])
return weighted_gini
def build_cart_tree(data, features, target):
if len([Link](data[target])) == 1:
return data[target].iloc[0]
if not features:
return data[target].mode()[0]
best_feature = min(features, key=lambda f: calculate_gini_index(data, f, target))
tree = {best_feature: {}}
remaining_features = [f for f in features if f != best_feature]
for value in [Link](data[best_feature]):
subset = data[data[best_feature] == value]
tree[best_feature][value] = build_cart_tree(subset, remaining_features, target)
return tree
data = pd.read_csv("[Link]")
target = [Link][-1]
features = list([Link][:-1])
decision_tree = build_cart_tree(data, features, target)
print("Decision Tree:", decision_tree)