log_phishing_dtr

# ------------------------------
# 导入必要库
# ------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# ------------------------------
# 加载数据集(注意:此处用 numpy 加载 CSV,非 pandas)
# ------------------------------
phishing_dataset = np.genfromtxt('/.../phishing_dataset.csv', delimiter=',', dtype=np.int32)  # 替换为实际路径

# ------------------------------
# 拆分特征与目标
# ------------------------------
samples = phishing_dataset[:, :-1]  # 所有列(除最后一列)为特征
targets = phishing_dataset[:, -1]  # 最后一列为目标(是否为钓鱼网站)

# ------------------------------
# 划分训练集与测试集(80% 训练,20% 测试)
# ------------------------------
training_samples, testing_samples, training_targets, testing_targets = train_test_split(
    samples, targets, test_size=0.2, random_state=0
)

# ------------------------------
# 创建并训练逻辑回归模型
# ------------------------------
log_classifier = LogisticRegression()
log_classifier.fit(training_samples, training_targets)

# ------------------------------
# 进行预测并评估模型准确率
# ------------------------------
predictions = log_classifier.predict(testing_samples)
accuracy = 100.0 * accuracy_score(testing_targets, predictions)

print("Logistic Regression accuracy: " + str(accuracy))

 

Scroll to Top