fractalz commited on
Commit
418ff33
·
verified ·
1 Parent(s): 6c5d83b

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +93 -0
app.py ADDED
@@ -0,0 +1,93 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import numpy as np
3
+ from scipy.spatial.distance import cosine
4
+ import pandas as pd
5
+
6
+ # --- Simulate a small pre-trained Word2Vec model ---
7
+ # Dummy word vectors for demonstration
8
+ dummy_word_vectors = {
9
+ 'cat': np.array([0.9, 0.7, 0.1, 0.2]),
10
+ 'dog': np.array([0.8, 0.8, 0.3, 0.1]),
11
+ 'kitten': np.array([0.85, 0.75, 0.15, 0.25]),
12
+ 'puppy': np.array([0.75, 0.85, 0.25, 0.15]),
13
+ 'fish': np.array([0.1, 0.2, 0.9, 0.8]),
14
+ 'bird': np.array([0.2, 0.1, 0.8, 0.9]),
15
+ 'ocean': np.array([0.05, 0.15, 0.95, 0.85]),
16
+ 'sky': np.array([0.25, 0.05, 0.85, 0.95]),
17
+ 'run': np.array([0.6, 0.3, 0.1, 0.1]),
18
+ 'walk': np.array([0.55, 0.35, 0.15, 0.05]),
19
+ 'jump': np.array([0.65, 0.25, 0.05, 0.15]),
20
+ 'king': np.array([0.9, 0.1, 0.1, 0.8]),
21
+ 'queen': np.array([0.8, 0.2, 0.2, 0.9]),
22
+ 'man': np.array([0.9, 0.15, 0.05, 0.7]),
23
+ 'woman': np.array([0.85, 0.1, 0.15, 0.85])
24
+ }
25
+
26
+ # Normalize vectors (important for cosine similarity)
27
+ for word, vec in dummy_word_vectors.items():
28
+ dummy_word_vectors[word] = vec / np.linalg.norm(vec)
29
+
30
+ # --- Function to find nearest neighbors ---
31
+ def find_nearest_neighbors(search_word_input):
32
+ search_word = search_word_input.lower()
33
+
34
+ if search_word not in dummy_word_vectors:
35
+ return (
36
+ pd.DataFrame([{"Message": f"'{search_word}' not found in our dummy vocabulary. Try one of these: {', '.join(list(dummy_word_vectors.keys()))}"}]),
37
+ "Warning: Word not found!"
38
+ )
39
+
40
+ target_vector = dummy_word_vectors[search_word]
41
+ similarities = []
42
+ for word, vector in dummy_word_vectors.items():
43
+ if word != search_word: # Don't compare a word to itself
44
+ similarity = 1 - cosine(target_vector, vector)
45
+ similarities.append({"Word": word, "Cosine Similarity": similarity})
46
+
47
+ results_df = pd.DataFrame(similarities).sort_values(
48
+ by="Cosine Similarity", ascending=False
49
+ ).reset_index(drop=True)
50
+
51
+ # Format the DataFrame for better display in Gradio
52
+ results_df["Cosine Similarity"] = results_df["Cosine Similarity"].round(4)
53
+ results_df.columns = ["Neighbor Word", "Similarity Score"] # Rename for UI clarity
54
+
55
+ message = f"Found nearest neighbors for '{search_word}'!"
56
+ return results_df, message
57
+
58
+ # --- Gradio Interface ---
59
+ iface = gr.Interface(
60
+ fn=find_nearest_neighbors,
61
+ inputs=gr.Textbox(
62
+ label="Enter a word to explore its neighbors:",
63
+ placeholder="e.g., cat, king, fish"
64
+ ),
65
+ outputs=[
66
+ gr.DataFrame(
67
+ headers=["Neighbor Word", "Similarity Score"],
68
+ row_count=5, # Display up to 5 rows by default
69
+ wrap=True,
70
+ interactive=False,
71
+ label="Nearest Neighbors"
72
+ ),
73
+ gr.Markdown(
74
+ label="Status"
75
+ )
76
+ ],
77
+ title="🚀 Word Vector Explorer (Gradio POC)",
78
+ description=(
79
+ "Discover the semantic neighbors of words using word embeddings! "
80
+ "Type a word, and see its closest companions in the vector space."
81
+ "<br>_Note: This POC uses dummy word vectors. In a full version, this would connect to a large pre-trained Word2Vec model!_"
82
+ ),
83
+ allow_flagging="never", # Optional: disables the "Flag" button
84
+ examples=[
85
+ ["cat"],
86
+ ["king"],
87
+ ["fish"],
88
+ ["run"]
89
+ ]
90
+ )
91
+
92
+ if __name__ == "__main__":
93
+ iface.launch()