File size: 6,266 Bytes
c820b57 db438bf c820b57 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import numpy as np
import gradio as gr
import plotly.graph_objects as go
from sklearn.datasets import make_circles
from sklearn.naive_bayes import BernoulliNB
from sklearn.decomposition import TruncatedSVD
from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
def plot_scatter(X, y, title):
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=X[:, 0],
y=X[:, 1],
mode="markers",
marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
)
)
fig.update_layout(
title=title,
xaxis=dict(showticklabels=False),
yaxis=dict(showticklabels=False)
)
return fig
def plot_decision_boundary(X, y, model, data_preprocess=None, title=None):
# Creating Grid
h = 0.01
x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
grid = np.c_[xx.ravel(), yy.ravel()]
# Creating Contour
if data_preprocess:
grid = data_preprocess.transform(grid)
y_grid_pred = model.predict_proba(grid)[:, 1]
# Plotting
fig = go.Figure()
fig.add_trace(
go.Heatmap(
x=np.arange(x_min, x_max, h),
y=np.arange(y_min, y_max, h),
z=y_grid_pred.reshape(xx.shape),
colorscale="Viridis",
opacity=0.8,
showscale=False
)
)
fig.add_trace(
go.Scatter(
x=X[:, 0],
y=X[:, 1],
mode="markers",
marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
)
)
fig.update_layout(
title=title if title else "Decision Boundary",
xaxis=dict(showticklabels=False),
yaxis=dict(showticklabels=False)
)
return fig
def app_fn(
factor: float,
random_state: int,
noise:float,
n_estimators: int,
max_depth: int
):
# make a synthetic dataset
X, y = make_circles(factor=factor, random_state=random_state, noise=noise)
# use RandomTreesEmbedding to transform data
hasher = RandomTreesEmbedding(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
X_transformed = hasher.fit_transform(X)
# Visualize result after dimensionality reduction using truncated SVD
svd = TruncatedSVD(n_components=2)
X_reduced = svd.fit_transform(X_transformed)
# Learn a Naive Bayes classifier on the transformed data
nb = BernoulliNB()
nb.fit(X_transformed, y)
# Learn an ExtraTreesClassifier for comparison
trees = ExtraTreesClassifier(max_depth=max_depth, n_estimators=n_estimators, random_state=random_state)
trees.fit(X, y)
# Plotting Original Data
fig1 = plot_scatter(X, y, "Original Data")
fig2 = plot_scatter(X_reduced, y, f"Truncated SVD Reduction (2D) of Transformed Data ({X_transformed.shape[1]})")
fig3 = plot_decision_boundary(X, y, nb, hasher, "Naive Bayes Decision Boundary")
fig4 = plot_decision_boundary(X, y, trees, title="Extra Trees Decision Boundary")
return fig1, fig2, fig3, fig4
title = "Hashing Feature Transformation using Totally Random Trees"
with gr.Blocks() as demo:
gr.Markdown(f"# {title}")
gr.Markdown(
"""
### RandomTreesEmbedding provides a way to map data to a very high-dimensional, \
sparse representation, which might be beneficial for classification. \
The mapping is completely unsupervised and very efficient.
### This example visualizes the partitions given by several trees and shows how \
the transformation can also be used for non-linear dimensionality reduction \
or non-linear classification.
### Points that are neighboring often share the same leaf of a \
tree and therefore share large parts of their hashed representation. \
This allows to separate two concentric circles simply based on \
the principal components of the transformed data with truncated SVD.
### In high-dimensional spaces, linear classifiers often achieve excellent \
accuracy. For sparse binary data, BernoulliNB is particularly well-suited. \
The bottom row compares the decision boundary obtained by BernoulliNB in the \
transformed space with an ExtraTreesClassifier forests learned on the original data.
[Original Example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_embedding.html#sphx-glr-auto-examples-ensemble-plot-random-forest-embedding-py)
"""
)
with gr.Row():
factor = gr.inputs.Slider(minimum=0.05, maximum=1.0, step=0.01, default=0.5, label="Factor")
noise = gr.inputs.Slider(minimum=0.0, maximum=1.0, step=0.01, default=0.05, label="Noise")
n_estimators = gr.inputs.Slider(minimum=1, maximum=100, step=1, default=10, label="Number of Estimators")
max_depth = gr.inputs.Slider(minimum=1, maximum=100, step=1, default=3, label="Max Depth")
random_state = gr.inputs.Slider(minimum=0, maximum=100, step=1, default=0, label="Random State")
with gr.Row():
plot1 = gr.Plot(label="Origianl Data")
plot2 = gr.Plot(label="Truncated Date")
with gr.Row():
plot3 = gr.Plot(label="Naive Bayes Decision Boundary")
plot4 = gr.Plot(label="Extra Trees Decision Boundary")
factor.change(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
noise.change(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
n_estimators.change(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
max_depth.change(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
random_state.change(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
demo.load(app_fn, inputs=[factor, random_state, noise, n_estimators, max_depth], outputs=[plot1, plot2, plot3, plot4])
demo.launch() |