手写决策树算法,实现准确率、精度、召回率、F度量值
时间: 2023-09-24 14:09:47 浏览: 149
好的,以下是手写决策树算法的实现及评估指标的计算:
```
import pandas as pd
import numpy as np
class DecisionTree():
def __init__(self, criterion='gini', max_depth=None):
self.criterion = criterion
self.max_depth = max_depth
def fit(self, X, y):
self.classes = np.unique(y)
self.tree = self._grow_tree(X, y)
def predict(self, X):
return np.array([self._predict(inputs, self.tree) for inputs in X])
def _best_split(self, X, y):
m = y.size
if m <= 1:
return None, None
num_parent = [np.sum(y == c) for c in self.classes]
best_gini = float('inf')
best_idx, best_thr = None, None
for idx in range(X.shape[1]):
thresholds, classes = zip(*sorted(zip(X[:, idx], y)))
num_left = [0] * len(self.classes)
num_right = num_parent.copy()
for i in range(1, m):
c = classes[i - 1]
num_left[c] += 1
num_right[c] -= 1
gini_left = 1.0 - sum((num_left[x] / i) ** 2 for x in range(len(self.classes)))
gini_right = 1.0 - sum((num_right[x] / (m - i)) ** 2 for x in range(len(self.classes)))
gini = (i * gini_left + (m - i) * gini_right) / m
if thresholds[i] == thresholds[i - 1]:
continue
if gini < best_gini:
best_gini = gini
best_idx = idx
best_thr = (thresholds[i] + thresholds[i - 1]) / 2
return best_idx, best_thr
def _grow_tree(self, X, y, depth=0):
num_samples_per_class = [np.sum(y == i) for i in self.classes]
predicted_class = np.argmax(num_samples_per_class)
node = Node(predicted_class=predicted_class)
if depth < self.max_depth:
idx, thr = self._best_split(X, y)
if idx is not None:
indices_left = X[:, idx] < thr
X_left, y_left = X[indices_left], y[indices_left]
X_right, y_right = X[~indices_left], y[~indices_left]
node.feature_index = idx
node.threshold = thr
node.left = self._grow_tree(X_left, y_left, depth + 1)
node.right = self._grow_tree(X_right, y_right, depth + 1)
return node
def _predict(self, inputs, node):
if node.left is None and node.right is None:
return node.predicted_class
if inputs[node.feature_index] < node.threshold:
return self._predict(inputs, node.left)
return self._predict(inputs, node.right)
class Node():
def __init__(self, feature_index=None, threshold=None, predicted_class=None):
self.feature_index = feature_index
self.threshold = threshold
self.predicted_class = predicted_class
self.left = None
self.right = None
def accuracy(y_true, y_pred):
return np.mean(y_true == y_pred)
def precision(y_true, y_pred):
tp = np.sum((y_true == 1) & (y_pred == 1))
fp = np.sum((y_true == 0) & (y_pred == 1))
return tp / (tp + fp)
def recall(y_true, y_pred):
tp = np.sum((y_true == 1) & (y_pred == 1))
fn = np.sum((y_true == 1) & (y_pred == 0))
return tp / (tp + fn)
def f1_score(y_true, y_pred):
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
return 2 * p * r / (p + r)
# 测试代码
data = pd.read_csv('data.csv')
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values
tree = DecisionTree()
tree.fit(X, y)
y_pred = tree.predict(X)
print('Accuracy: ', accuracy(y, y_pred))
print('Precision: ', precision(y, y_pred))
print('Recall: ', recall(y, y_pred))
print('F1 score: ', f1_score(y, y_pred))
```
其中,`DecisionTree` 类为决策树模型的实现,`Node` 类为树结点的实现,`accuracy`、`precision`、`recall`、`f1_score` 分别为准确率、精度、召回率、F度量值的计算函数。
运行代码后即可得到相应的评估指标结果。
阅读全文
相关推荐
















