gaussian_anomaly

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Load the dataset
dataset = pd.read_csv('/.../network-logs.csv')

hist_dist = dataset[['LATENCY', 'THROUGHPUT']].hist(grid=False, figsize=(10, 4))

data = dataset[['LATENCY', 'THROUGHPUT']].values

plt.scatter(data[:, 0], data[:, 1], alpha=0.6)
plt.xlabel('LATENCY')
plt.ylabel('THROUGHPUT')
plt.title('DATA FLOW')
plt.show()

gaussian_anomaly_detection = GaussianAnomalyDetection(data)

print('mu param estimation: ')
print(gaussian_anomaly_detection.mu_param)
print('\n')
print('sigma squared estimation: ')
print(gaussian_anomaly_detection.sigma_squared)

targets = dataset['ANOMALY'].values.reshape((data.shape[0], 1))

probs = gaussian_anomaly_detection.multivariate_gaussian(data)

(threshold, F1, precision_, recall_, f1_) = gaussian_anomaly_detection.select_threshold(targets, probs)

print('threshold estimation: ')
print(threshold)

outliers = np.where(probs < threshold)[0]

plt.scatter(data[:, 0], data[:, 1], alpha=0.6, label='Dataset')
plt.xlabel('LATENCY')
plt.ylabel('THROUGHPUT')
plt.title('DATA FLOW')

plt.scatter(data[outliers, 0], data[outliers, 1], alpha=0.6, c='red', label='Outliers')
plt.legend()
plt.plot()

print('F1 score: ', F1)

from sklearn.metrics import roc_curve
FPR, TPR, OPC = roc_curve(targets, probs)

# Plotting Sensitivity
plt.plot(OPC, TPR)

# Plotting ROC curve
plt.plot(FPR, TPR)

 

Scroll to Top