log_phishing_dtr
Typer | Posted on | |
# ------------------------------
# 导入必要库
# ------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# ------------------------------
# 加载数据集(注意:此处用 numpy 加载 CSV,非 pandas)
# ------------------------------
phishing_dataset = np.genfromtxt('/.../phishing_dataset.csv', delimiter=',', dtype=np.int32) # 替换为实际路径
# ------------------------------
# 拆分特征与目标
# ------------------------------
samples = phishing_dataset[:, :-1] # 所有列(除最后一列)为特征
targets = phishing_dataset[:, -1] # 最后一列为目标(是否为钓鱼网站)
# ------------------------------
# 划分训练集与测试集(80% 训练,20% 测试)
# ------------------------------
training_samples, testing_samples, training_targets, testing_targets = train_test_split(
samples, targets, test_size=0.2, random_state=0
)
# ------------------------------
# 创建并训练逻辑回归模型
# ------------------------------
log_classifier = LogisticRegression()
log_classifier.fit(training_samples, training_targets)
# ------------------------------
# 进行预测并评估模型准确率
# ------------------------------
predictions = log_classifier.predict(testing_samples)
accuracy = 100.0 * accuracy_score(testing_targets, predictions)
print("Logistic Regression accuracy: " + str(accuracy))