clstr_model
Typer | Posted on | |
# ------------------------------
# 导入必要的库
# ------------------------------
import pandas as pd
import seaborn as sns
from sklearn.decomposition import PCA
from sklearn.mixture import GaussianMixture
# ------------------------------
# 加载并查看原始数据
# ------------------------------
data_df = pd.read_csv("/.../clustering.csv")
data_df.describe() # 查看统计摘要信息
# ------------------------------
# 分离特征和目标变量
# ------------------------------
X_data = data_df.drop('class_1', axis=1) # 特征
y_data = data_df['class_1'] # 原始类别(如果需要可用于比较)
# ------------------------------
# 使用 PCA 进行降维至二维,用于可视化
# ------------------------------
pca = PCA(n_components=2)
pca.fit(X_data)
X_2D = pca.transform(X_data)
# 添加 PCA 结果列以便绘图
data_df['PCA1'] = X_2D[:, 0]
data_df['PCA2'] = X_2D[:, 1]
# ------------------------------
# 使用 Gaussian Mixture Model 聚类
# ------------------------------
gm = GaussianMixture(n_components=3, covariance_type='full')
gm.fit(X_data)
# 预测聚类标签
y_gm = gm.predict(X_data)
data_df['cluster'] = y_gm # 将聚类标签加入数据集
# ------------------------------
# 可视化聚类结果(按聚类列分图)
# ------------------------------
sns.lmplot(x="PCA1", y="PCA2", data=data_df, col='cluster', fit_reg=False)