hellorahulk commited on
Commit
b01727d
·
1 Parent(s): ae31cb7

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +103 -0
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ from smart_open import open
4
+ import gensim
5
+ from gensim.similarities.annoy import AnnoyIndexer
6
+ import plotly.express as px
7
+ import pandas as pd
8
+ import numpy as np
9
+ import pacmap
10
+
11
+
12
+ # Load into gensim model
13
+ def load_gensim(fname):
14
+ model = gensim.models.KeyedVectors.load_word2vec_format(fname, binary=False)
15
+ # Search using Annoy indexer; Faster method
16
+ annoy_index = AnnoyIndexer(model, 100)
17
+ return model, annoy_index
18
+
19
+
20
+ def searchNexplore(word, final_dfs, model, annoy_index, topn):
21
+
22
+ vector = model[word]
23
+ approximate_neighbors = model.most_similar([vector], topn=topn, indexer=annoy_index)
24
+ rows = []
25
+ for row in approximate_neighbors:
26
+ rows.append(row[0])
27
+ searched_df = final_dfs.loc[rows]
28
+ return searched_df, approximate_neighbors
29
+
30
+
31
+ def embedding_dim_reduction(
32
+ embeddings, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
33
+ ):
34
+ """
35
+ Perform PaCMAP dimention reduction
36
+
37
+ Selection of values :
38
+ 1. Default transorms MN_ratio=0.5, FP_ratio=2.0
39
+ 2. For heavy transformations MN_ratio=30, FP_ratio=100.0
40
+ """
41
+ reducer = pacmap.PaCMAP(
42
+ n_components=n_dim,
43
+ n_neighbors=n_neighbors,
44
+ MN_ratio=MN_ratio,
45
+ FP_ratio=FP_ratio,
46
+ lr=0.05,
47
+ num_iters=1000,
48
+ verbose=False,
49
+ )
50
+
51
+ reduced_embeddings = reducer.fit_transform(embeddings, init="pca")
52
+ return reduced_embeddings
53
+
54
+
55
+ model, annoy_index = load_gensim("embedding_dump.txt")
56
+ final_dfs = pd.read_csv("raw_embeddings_allinone.csv")
57
+ final_dfs.set_index("Unnamed: 0", inplace=True)
58
+
59
+
60
+ def get_semantic(input_text, topn):
61
+
62
+ searched_df, approximate_neighbors = searchNexplore(
63
+ input_text, final_dfs, model, annoy_index, topn
64
+ )
65
+
66
+ reduced_embeddings = embedding_dim_reduction(
67
+ searched_df, n_dim=2, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
68
+ )
69
+
70
+ fig1 = px.scatter(
71
+ x=reduced_embeddings[:, 0],
72
+ y=reduced_embeddings[:, 1],
73
+ hover_name=searched_df.index.tolist(),
74
+ color=searched_df.index.tolist(),
75
+ )
76
+
77
+ reduced_embeddings = embedding_dim_reduction(
78
+ searched_df, n_dim=3, n_neighbors=10, MN_ratio=0.5, FP_ratio=2.0
79
+ )
80
+
81
+ fig2 = px.scatter_3d(
82
+ x=reduced_embeddings[:, 0],
83
+ y=reduced_embeddings[:, 1],
84
+ z=reduced_embeddings[:, 2],
85
+ hover_name=searched_df.index.tolist(),
86
+ color=searched_df.index.tolist(),
87
+ )
88
+
89
+ return fig1, fig2, approximate_neighbors
90
+
91
+
92
+ iface = gr.Interface(
93
+ fn=get_semantic,
94
+ inputs=[
95
+ "text",
96
+ gr.Slider(0, 1000, value=100),
97
+ ],
98
+ outputs=["plot", "plot", "list"],
99
+ examples=[["SOPA_CANJA_C/ALETRIA_MAGGI_82GR", 100]],
100
+ title="Sentiment Explorer",
101
+ description="Get Sentiment search results",
102
+ theme="peach",
103
+ ).launch(inline=False)