Machine Learning for Anomaly Detection (original) (raw)
`# threshold value to consider a
datapoint inlier or outlier
threshold = stats.scoreatpercentile(scores_pred, 100 * outlier_fraction)
decision function calculates the raw
anomaly score for every point
Z = clf.decision_function(np.c_[xx.ravel(), yy.ravel()]) * -1 Z = Z.reshape(xx.shape)
fill blue colormap from minimum anomaly
score to threshold value
subplot = plt.subplot(1, 2, 1) subplot.contourf(xx, yy, Z, levels = np.linspace(Z.min(), threshold, 10), cmap = plt.cm.Blues_r)
draw red contour line where anomaly
score is equal to threshold
a = subplot.contour(xx, yy, Z, levels =[threshold], linewidths = 2, colors ='red')
fill orange contour lines where range of anomaly
score is from threshold to maximum anomaly score
subplot.contourf(xx, yy, Z, levels =[threshold, Z.max()], colors ='orange')
scatter plot of inliers with white dots
b = subplot.scatter(X_train[:-n_outliers, 0], X_train[:-n_outliers, 1], c ='white', s = 20, edgecolor ='k')
scatter plot of outliers with black dots
c = subplot.scatter(X_train[-n_outliers:, 0], X_train[-n_outliers:, 1], c ='black', s = 20, edgecolor ='k') subplot.axis('tight')
subplot.legend( [a.collections[0], b, c], ['learned decision function', 'true inliers', 'true outliers'], prop = matplotlib.font_manager.FontProperties(size = 10), loc ='lower right')
subplot.set_title('K-Nearest Neighbours') subplot.set_xlim((-10, 10)) subplot.set_ylim((-10, 10)) plt.show()
`