prcp_spam_filter
Typer | Posted on | |
# ------------------------------
# 导入所需库
# ------------------------------
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Perceptron
from sklearn.metrics import accuracy_score
# ------------------------------
# 加载数据集
# ------------------------------
df = pd.read_csv('/.../sms_spam_perceptron.csv') # 请替换为实际路径
# ------------------------------
# 准备标签 y:将 'spam' 映射为 -1,其余为 1(非 spam)
# ------------------------------
y = df.iloc[:, 0].values
y = np.where(y == 'spam', -1, 1)
# ------------------------------
# 准备特征 X:取第2列和第3列作为特征(通常为词频/TF等)
# ------------------------------
X = df.iloc[:, [1, 2]].values
# ------------------------------
# 划分训练集与测试集
# ------------------------------
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.3, random_state=0
)
# ------------------------------
# 创建感知机模型并训练
# ------------------------------
p = Perceptron(max_iter=40, eta0=0.1, random_state=0)
p.fit(X_train, y_train)
# ------------------------------
# 进行预测并评估模型
# ------------------------------
y_pred = p.predict(X_test)
misclassified_samples = (y_test != y_pred).sum()
accuracy = accuracy_score(y_test, y_pred)
print('Misclassified samples:', misclassified_samples)
print('Accuracy: %.2f' % accuracy)