Spaces:

sklearn-docs
/

Hashing-feature-transformation-using-Totally-Random-Trees

Sleeping

App Files Files Community

EduardoPacheco commited on Apr 29, 2023

Commit

c820b57

1 Parent(s): 5e03784

App itself

Browse files

Files changed (1) hide show

app.py +152 -0

app.py ADDED Viewed

	@@ -0,0 +1,152 @@

+import numpy as np
+import gradio as gr
+import plotly.graph_objects as go
+from sklearn.datasets import make_circles
+from sklearn.naive_bayes import BernoulliNB
+from sklearn.decomposition import TruncatedSVD
+from sklearn.ensemble import RandomTreesEmbedding, ExtraTreesClassifier
+def plot_scatter(X, y, title):
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=X[:, 0],
+            y=X[:, 1],
+            mode="markers",
+            marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
+        )
+    )
+    fig.update_layout(
+        title=title,
+        xaxis=dict(showticklabels=False),
+        yaxis=dict(showticklabels=False)
+    )
+    return fig
+def plot_decision_boundary(X, y, model, data_preprocess=None, title=None):
+    # Creating Grid
+    h = 0.01
+    x_min, x_max = X[:, 0].min() - 0.5, X[:, 0].max() + 0.5
+    y_min, y_max = X[:, 1].min() - 0.5, X[:, 1].max() + 0.5
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+    grid = np.c_[xx.ravel(), yy.ravel()]
+    # Creating Contour
+    if data_preprocess:
+        grid = data_preprocess.transform(grid)
+    y_grid_pred = model.predict_proba(grid)[:, 1]
+    # Plotting
+    fig = go.Figure()
+    fig.add_trace(
+        go.Heatmap(
+            x=np.arange(x_min, x_max, h),
+            y=np.arange(y_min, y_max, h),
+            z=y_grid_pred.reshape(xx.shape),
+            colorscale="Viridis",
+            opacity=0.8,
+            showscale=False
+        )
+    )
+    fig.add_trace(
+        go.Scatter(
+            x=X[:, 0],
+            y=X[:, 1],
+            mode="markers",
+            marker=dict(color=y, size=10, colorscale="Viridis", line=dict(width=1)),
+        )
+    )
+    fig.update_layout(
+        title=title if title else "Decision Boundary",
+        xaxis=dict(showticklabels=False),
+        yaxis=dict(showticklabels=False)
+    )
+    return fig
+def app_fn(
+        factor: float,
+        random_state: int,
+        noise:float,
+        n_estimators: int,
+        max_depth: int
+    ):
+    # make a synthetic dataset
+    X, y = make_circles(factor=factor, random_state=random_state, noise=noise)
+    # use RandomTreesEmbedding to transform data
+    hasher = RandomTreesEmbedding(n_estimators=n_estimators, random_state=random_state, max_depth=max_depth)
+    X_transformed = hasher.fit_transform(X)
+    # Visualize result after dimensionality reduction using truncated SVD
+    svd = TruncatedSVD(n_components=2)
+    X_reduced = svd.fit_transform(X_transformed)
+    # Learn a Naive Bayes classifier on the transformed data
+    nb = BernoulliNB()
+    nb.fit(X_transformed, y)
+    # Learn an ExtraTreesClassifier for comparison
+    trees = ExtraTreesClassifier(max_depth=max_depth, n_estimators=n_estimators, random_state=random_state)
+    trees.fit(X, y)
+    # Plotting Original Data
+    fig1 = plot_scatter(X, y, "Original Data")
+    fig2 = plot_scatter(X_reduced, y, f"Truncated SVD Reduction (2D) of Transformed Data ({X_transformed.shape[1]})")
+    fig3 = plot_decision_boundary(X, y, nb, hasher, "Naive Bayes Decision Boundary")
+    fig4 = plot_decision_boundary(X, y, trees, title="Extra Trees Decision Boundary")
+    return fig1, fig2, fig3, fig4
+title = "Hashing Feature Transformation using Totally Random Trees"
+with gr.Blocks() as demo:
+    gr.Markdown(f"# {title}")
+    gr.Markdown(
+        """
+        ### RandomTreesEmbedding provides a way to map data to a very high-dimensional, \
+        sparse representation, which might be beneficial for classification. \
+        The mapping is completely unsupervised and very efficient.
+        ### This example visualizes the partitions given by several trees and shows how \
+        the transformation can also be used for non-linear dimensionality reduction \
+        or non-linear classification.
+        ### Points that are neighboring often share the same leaf of a \
+        tree and therefore share large parts of their hashed representation. \
+        This allows to separate two concentric circles simply based on \
+        the principal components of the transformed data with truncated SVD.
+        ### In high-dimensional spaces, linear classifiers often achieve excellent \
+        accuracy. For sparse binary data, BernoulliNB is particularly well-suited. \
+        The bottom row compares the decision boundary obtained by BernoulliNB in the \
+        transformed space with an ExtraTreesClassifier forests learned on the original data.
+        [Original Example](https://scikit-learn.org/stable/auto_examples/ensemble/plot_random_forest_embedding.html#sphx-glr-auto-examples-ensemble-plot-random-forest-embedding-py)
+        """
+    )
+    with gr.Row():
+        factor = gr.inputs.Slider(minimum=0.05, maximum=1.0, step=0.01, default=0.5, label="Factor")
+        noise = gr.inputs.Slider(minimum=0.0, maximum=1.0, step=0.01, default=0.05, label="Noise")
+        n_estimators = gr.inputs.Slider(minimum=1, maximum=100, step=1, default=10, label="Number of Estimators")
+        max_depth = gr.inputs.Slider(minimum=1, maximum=100, step=1, default=3, label="Max Depth")
+        random_state = gr.inputs.Slider(minimum=0, maximum=100, step=1, default=0, label="Random State")
+    btn = gr.Button(label="Run")
+    with gr.Row():
+        plot1 = gr.Plot(label="Origianl Data")
+        plot2 = gr.Plot(label="Truncated Date")
+    with gr.Row():
+        plot3 = gr.Plot(label="Naive Bayes Decision Boundary")
+        plot4 = gr.Plot(label="Extra Trees Decision Boundary")
+    btn.click(app_fn, outputs=[plot1, plot2, plot3, plot4], inputs=[factor, random_state, noise, n_estimators, max_depth])
+    demo.load(app_fn, inputs=[factor, random_state, noise, n_estimators, max_depth], outputs=[plot1, plot2, plot3, plot4])
+demo.launch()