Machine Learning for Anomaly Detection (original) (raw)

`# threshold value to consider a

datapoint inlier or outlier

threshold = stats.scoreatpercentile(scores_pred, 100 * outlier_fraction)

decision function calculates the raw

anomaly score for every point

Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1 Z = Z.reshape(xx.shape)

fill blue colormap from minimum anomaly

score to threshold value

subplot = plt.subplot(1, 2, 1) subplot.contourf(xx, yy, Z, levels = np.linspace(Z.min(), threshold, 10), cmap = plt.cm.Blues_r)

draw red contour line where anomaly

score is equal to threshold

a = subplot.contour(xx, yy, Z, levels =[threshold], linewidths = 2, colors ='red')

fill orange contour lines where range of anomaly

score is from threshold to maximum anomaly score

subplot.contourf(xx, yy, Z, levels =[threshold, Z.max()], colors ='orange')

scatter plot of inliers with white dots

b = subplot.scatter(X_train[:-n_outliers, 0], X_train[:-n_outliers, 1], c ='white', s = 20, edgecolor ='k')

scatter plot of outliers with black dots

c = subplot.scatter(X_train[-n_outliers:, 0], X_train[-n_outliers:, 1], c ='black', s = 20, edgecolor ='k') subplot.axis('tight')

subplot.legend( [a.collections[0], b, c], ['learned decision function', 'true inliers', 'true outliers'], prop = matplotlib.font_manager.FontProperties(size = 10), loc ='lower right')

subplot.set_title('K-Nearest Neighbours') subplot.set_xlim((-10, 10)) subplot.set_ylim((-10, 10)) plt.show()

`