Spaces:

hellorahulk
/

semantic-search

Build error

App Files Files Community

hellorahulk commited on Sep 19, 2022

Commit

b01727d

1 Parent(s): ae31cb7

Create app.py

Browse files

Files changed (1) hide show

app.py +103 -0

app.py ADDED Viewed

	@@ -0,0 +1,103 @@

+import gradio as gr
+from smart_open import open
+import gensim
+from gensim.similarities.annoy import AnnoyIndexer
+import plotly.express as px
+import pandas as pd
+import numpy as np
+import pacmap
+# Load into gensim model
+def load_gensim(fname):
+    model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False)
+    # Search using Annoy indexer; Faster method
+    annoy_index = AnnoyIndexer(model, 100)
+    return model, annoy_index
+def searchNexplore(word, final_dfs, model, annoy_index, topn):
+    vector = model[word]
+    approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index)
+    rows = []
+    for row in approximate_neighbors:
+        rows.append(row[0])
+    searched_df = final_dfs.loc[rows]
+    return searched_df, approximate_neighbors
+def embedding_dim_reduction(
+    embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
+):
+    """
+    Perform PaCMAP dimention reduction
+    Selection of values :
+    1. Default transorms MN_ratio=0.5, FP_ratio=2.0
+    2. For heavy transformations MN_ratio=30, FP_ratio=100.0
+    """
+    reducer = pacmap.PaCMAP(
+        n_components=n_dim,
+        n_neighbors=n_neighbors,
+        MN_ratio=MN_ratio,
+        FP_ratio=FP_ratio,
+        lr=0.05,
+        num_iters=1000,
+        verbose=False,
+    )
+    reduced_embeddings = reducer.fit_transform(embeddings, init="pca")
+    return reduced_embeddings
+model, annoy_index = load_gensim("embedding_dump.txt")
+final_dfs = pd.read_csv("raw_embeddings_allinone.csv")
+final_dfs.set_index("Unnamed: 0", inplace=True)
+def get_semantic(input_text, topn):
+    searched_df, approximate_neighbors = searchNexplore(
+        input_text, final_dfs, model, annoy_index, topn
+    )
+    reduced_embeddings = embedding_dim_reduction(
+        searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
+    )
+    fig1 = px.scatter(
+        x=reduced_embeddings[:, 0],
+        y=reduced_embeddings[:, 1],
+        hover_name=searched_df.index.tolist(),
+        color=searched_df.index.tolist(),
+    )
+    reduced_embeddings = embedding_dim_reduction(
+        searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
+    )
+    fig2 = px.scatter_3d(
+        x=reduced_embeddings[:, 0],
+        y=reduced_embeddings[:, 1],
+        z=reduced_embeddings[:, 2],
+        hover_name=searched_df.index.tolist(),
+        color=searched_df.index.tolist(),
+    )
+    return fig1, fig2, approximate_neighbors
+iface = gr.Interface(
+    fn=get_semantic,
+    inputs=[
+        "text",
+        gr.Slider(0, 1000, value=100),
+    ],
+    outputs=["plot", "plot", "list"],
+    examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]],
+    title="Sentiment Explorer",
+    description="Get Sentiment search results",
+    theme="peach",
+).launch(inline=False)