gaussian_anomaly
Typer | Posted on | |
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# Load the dataset
dataset = pd.read_csv('/.../network-logs.csv')
hist_dist = dataset[['LATENCY', 'THROUGHPUT']].hist(grid=False, figsize=(10, 4))
data = dataset[['LATENCY', 'THROUGHPUT']].values
plt.scatter(data[:, 0], data[:, 1], alpha=0.6)
plt.xlabel('LATENCY')
plt.ylabel('THROUGHPUT')
plt.title('DATA FLOW')
plt.show()
gaussian_anomaly_detection = GaussianAnomalyDetection(data)
print('mu param estimation: ')
print(gaussian_anomaly_detection.mu_param)
print('\n')
print('sigma squared estimation: ')
print(gaussian_anomaly_detection.sigma_squared)
targets = dataset['ANOMALY'].values.reshape((data.shape[0], 1))
probs = gaussian_anomaly_detection.multivariate_gaussian(data)
(threshold, F1, precision_, recall_, f1_) = gaussian_anomaly_detection.select_threshold(targets, probs)
print('threshold estimation: ')
print(threshold)
outliers = np.where(probs < threshold)[0]
plt.scatter(data[:, 0], data[:, 1], alpha=0.6, label='Dataset')
plt.xlabel('LATENCY')
plt.ylabel('THROUGHPUT')
plt.title('DATA FLOW')
plt.scatter(data[outliers, 0], data[outliers, 1], alpha=0.6, c='red', label='Outliers')
plt.legend()
plt.plot()
print('F1 score: ', F1)
from sklearn.metrics import roc_curve
FPR, TPR, OPC = roc_curve(targets, probs)
# Plotting Sensitivity
plt.plot(OPC, TPR)
# Plotting ROC curve
plt.plot(FPR, TPR)