dsctree_phishing_dtr
Typer | Posted on | |
# ------------------------------
# 导入必要库
# ------------------------------
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix, accuracy_score
# ------------------------------
# 加载数据集(CSV,无表头,整数型数据)
# ------------------------------
phishing_dataset = np.genfromtxt('/.../phishing_dataset.csv', delimiter=',', dtype=np.int32) # 替换为实际路径
# ------------------------------
# 拆分特征与目标变量
# ------------------------------
samples = phishing_dataset[:, :-1] # 所有列,除最后一列为特征
targets = phishing_dataset[:, -1] # 最后一列为标签(是否钓鱼网站)
# ------------------------------
# 拆分训练集和测试集(默认 80/20)
# ------------------------------
training_samples, testing_samples, training_targets, testing_targets = train_test_split(
samples, targets, test_size=0.2
)
# ------------------------------
# 创建并训练决策树模型
# ------------------------------
tree_classifier = DecisionTreeClassifier()
tree_classifier.fit(training_samples, training_targets)
# ------------------------------
# 测试模型并生成预测结果
# ------------------------------
predictions = tree_classifier.predict(testing_samples)
# ------------------------------
# 计算混淆矩阵与准确率
# ------------------------------
conf_matrix = confusion_matrix(testing_targets, predictions)
print("Confusion Matrix:")
print(conf_matrix)
# ------------------------------
# 手动从混淆矩阵计算准确率(与 accuracy_score 对比)
# ------------------------------
TP = conf_matrix[0, 0] # True Positive
TN = conf_matrix[1, 1] # True Negative
FP = conf_matrix[0, 1] # False Positive
FN = conf_matrix[1, 0] # False Negative
accuracy_manual = (TP + TN) / (TP + TN + FP + FN)
print("Accuracy (Manual): {:.2f}%".format(accuracy_manual * 100))