ParamDev commited on
Commit
46f679b
·
verified ·
1 Parent(s): 12b791c

Upload 3 files

Browse files
Files changed (3) hide show
  1. app.py +161 -0
  2. gitattributes +31 -0
  3. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import matplotlib.pyplot as plt
3
+ from threading import Thread
4
+ from matplotlib.colors import ListedColormap
5
+ from sklearn.datasets import make_moons, make_circles, make_classification
6
+ from sklearn.datasets import make_blobs, make_circles, make_moons
7
+ import gradio as gr
8
+ import math
9
+ from functools import partial
10
+ import time
11
+
12
+ import matplotlib
13
+
14
+ from sklearn import svm
15
+ from sklearn.datasets import make_moons, make_blobs
16
+ from sklearn.covariance import EllipticEnvelope
17
+ from sklearn.ensemble import IsolationForest
18
+ from sklearn.neighbors import LocalOutlierFactor
19
+ from sklearn.linear_model import SGDOneClassSVM
20
+ from sklearn.kernel_approximation import Nystroem
21
+ from sklearn.pipeline import make_pipeline
22
+
23
+ def get_groundtruth_model(X, labels):
24
+ # dummy model to show true label distribution
25
+ class Dummy:
26
+ def __init__(self, y):
27
+ self.labels_ = labels
28
+
29
+ return Dummy(labels)
30
+
31
+ #### PLOT
32
+ FIGSIZE = 10,10
33
+ figure = plt.figure(figsize=(25, 10))
34
+
35
+
36
+ def train_models(input_data, outliers_fraction, n_samples, clf_name):
37
+ n_outliers = int(outliers_fraction * n_samples)
38
+ n_inliers = n_samples - n_outliers
39
+ blobs_params = dict(random_state=0, n_samples=n_inliers, n_features=2)
40
+ NAME_CLF_MAPPING = {"Robust covariance": EllipticEnvelope(contamination=outliers_fraction),
41
+ "One-Class SVM": svm.OneClassSVM(nu=outliers_fraction, kernel="rbf", gamma=0.1),
42
+ "One-Class SVM (SGD)":make_pipeline(
43
+ Nystroem(gamma=0.1, random_state=42, n_components=150),
44
+ SGDOneClassSVM(
45
+ nu=outliers_fraction,
46
+ shuffle=True,
47
+ fit_intercept=True,
48
+ random_state=42,
49
+ tol=1e-6,
50
+ ),
51
+ ),
52
+ "Isolation Forest": IsolationForest(contamination=outliers_fraction, random_state=42),
53
+ "Local Outlier Factor": LocalOutlierFactor(n_neighbors=35, contamination=outliers_fraction),
54
+ }
55
+ DATA_MAPPING = {
56
+ "Central Blob":make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
57
+ "Two Blobs": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
58
+ "Blob with Noise": make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
59
+ "Moons": 4.0
60
+ * (
61
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
62
+ - np.array([0.5, 0.25])
63
+ ),
64
+ "Noise": 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
65
+ }
66
+ DATASETS = [
67
+ make_blobs(centers=[[0, 0], [0, 0]], cluster_std=0.5, **blobs_params)[0],
68
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[0.5, 0.5], **blobs_params)[0],
69
+ make_blobs(centers=[[2, 2], [-2, -2]], cluster_std=[1.5, 0.3], **blobs_params)[0],
70
+ 4.0
71
+ * (
72
+ make_moons(n_samples=n_samples, noise=0.05, random_state=0)[0]
73
+ - np.array([0.5, 0.25])
74
+ ),
75
+ 14.0 * (np.random.RandomState(42).rand(n_samples, 2) - 0.5),
76
+ ]
77
+
78
+ xx, yy = np.meshgrid(np.linspace(-7, 7, 150), np.linspace(-7, 7, 150))
79
+ clf = NAME_CLF_MAPPING[clf_name]
80
+ plt.figure(figsize=(len(NAME_CLF_MAPPING) * 2 + 4, 12.5))
81
+
82
+
83
+ plot_num = 1
84
+ rng = np.random.RandomState(42)
85
+ X = DATA_MAPPING[input_data]
86
+ X = np.concatenate([X, rng.uniform(low=-6, high=6, size=(n_outliers, 2))], axis=0)
87
+
88
+ t0 = time.time()
89
+ clf.fit(X)
90
+ t1 = time.time()
91
+ # fit the data and tag outliers
92
+ if clf_name == "Local Outlier Factor":
93
+ y_pred = clf.fit_predict(X)
94
+ else:
95
+ y_pred = clf.fit(X).predict(X)
96
+
97
+ # plot the levels lines and the points
98
+ if clf_name != "Local Outlier Factor":
99
+ Z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
100
+ Z = Z.reshape(xx.shape)
101
+ plt.contour(xx, yy, Z, levels=[0], linewidths=10, colors="black")
102
+
103
+ colors = np.array(["#377eb8", "#ff7f00"])
104
+ plt.scatter(X[:, 0], X[:, 1], s=100, color=colors[(y_pred + 1) // 2])
105
+
106
+ plt.xlim(-7, 7)
107
+ plt.ylim(-7, 7)
108
+ plt.xticks(())
109
+ plt.yticks(())
110
+ plt.text(
111
+ 0.99,
112
+ 0.01,
113
+ ("%.2fs" % (t1 - t0)).lstrip("0"),
114
+ transform=plt.gca().transAxes,
115
+ size=60,
116
+ horizontalalignment="right",
117
+ )
118
+ plot_num += 1
119
+
120
+ return plt
121
+
122
+ description = "Learn how different anomaly detection algorithms perform in different datasets."
123
+
124
+ def iter_grid(n_rows, n_cols):
125
+ # create a grid using gradio Block
126
+ for _ in range(n_rows):
127
+ with gr.Row():
128
+ for _ in range(n_cols):
129
+ with gr.Column():
130
+ yield
131
+
132
+ title = "🕵️‍♀️ compare anomaly detection algorithms 🕵️‍♂️"
133
+ with gr.Blocks() as demo:
134
+ gr.Markdown(f"## {title}")
135
+ gr.Markdown(description)
136
+
137
+ input_models = ["Robust covariance","One-Class SVM","One-Class SVM (SGD)","Isolation Forest",
138
+ "Local Outlier Factor"]
139
+ input_data = gr.Radio(
140
+ choices=["Central Blob", "Two Blobs", "Blob with Noise", "Moons", "Noise"],
141
+ value="Moons"
142
+ )
143
+ n_samples = gr.Slider(minimum=100, maximum=500, step=25, label="Number of Samples")
144
+ outliers_fraction = gr.Slider(minimum=0.1, maximum=0.9, step=0.1, label="Fraction of Outliers")
145
+ counter = 0
146
+
147
+
148
+ for _ in iter_grid(5, 5):
149
+ if counter >= len(input_models):
150
+ break
151
+
152
+ input_model = input_models[counter]
153
+ plot = gr.Plot(label=input_model)
154
+ fn = partial(train_models, clf_name=input_model)
155
+ input_data.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
156
+ n_samples.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
157
+ outliers_fraction.change(fn=fn, inputs=[input_data, outliers_fraction, n_samples], outputs=plot)
158
+ counter += 1
159
+
160
+ demo.launch(enable_queue=True, debug=True)
161
+
gitattributes ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ftz filter=lfs diff=lfs merge=lfs -text
6
+ *.gz filter=lfs diff=lfs merge=lfs -text
7
+ *.h5 filter=lfs diff=lfs merge=lfs -text
8
+ *.joblib filter=lfs diff=lfs merge=lfs -text
9
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
10
+ *.model filter=lfs diff=lfs merge=lfs -text
11
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
12
+ *.npy filter=lfs diff=lfs merge=lfs -text
13
+ *.npz filter=lfs diff=lfs merge=lfs -text
14
+ *.onnx filter=lfs diff=lfs merge=lfs -text
15
+ *.ot filter=lfs diff=lfs merge=lfs -text
16
+ *.parquet filter=lfs diff=lfs merge=lfs -text
17
+ *.pickle filter=lfs diff=lfs merge=lfs -text
18
+ *.pkl filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pt filter=lfs diff=lfs merge=lfs -text
21
+ *.pth filter=lfs diff=lfs merge=lfs -text
22
+ *.rar filter=lfs diff=lfs merge=lfs -text
23
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
24
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
25
+ *.tflite filter=lfs diff=lfs merge=lfs -text
26
+ *.tgz filter=lfs diff=lfs merge=lfs -text
27
+ *.wasm filter=lfs diff=lfs merge=lfs -text
28
+ *.xz filter=lfs diff=lfs merge=lfs -text
29
+ *.zip filter=lfs diff=lfs merge=lfs -text
30
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
31
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ scikit-learn
2
+ matplotlib
3
+ numpy