File size: 3,357 Bytes
418ff33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import gradio as gr
import numpy as np
from scipy.spatial.distance import cosine
import pandas as pd

# --- Simulate a small pre-trained Word2Vec model ---
# Dummy word vectors for demonstration
dummy_word_vectors = {
    'cat': np.array([0.9, 0.7, 0.1, 0.2]),
    'dog': np.array([0.8, 0.8, 0.3, 0.1]),
    'kitten': np.array([0.85, 0.75, 0.15, 0.25]),
    'puppy': np.array([0.75, 0.85, 0.25, 0.15]),
    'fish': np.array([0.1, 0.2, 0.9, 0.8]),
    'bird': np.array([0.2, 0.1, 0.8, 0.9]),
    'ocean': np.array([0.05, 0.15, 0.95, 0.85]),
    'sky': np.array([0.25, 0.05, 0.85, 0.95]),
    'run': np.array([0.6, 0.3, 0.1, 0.1]),
    'walk': np.array([0.55, 0.35, 0.15, 0.05]),
    'jump': np.array([0.65, 0.25, 0.05, 0.15]),
    'king': np.array([0.9, 0.1, 0.1, 0.8]),
    'queen': np.array([0.8, 0.2, 0.2, 0.9]),
    'man': np.array([0.9, 0.15, 0.05, 0.7]),
    'woman': np.array([0.85, 0.1, 0.15, 0.85])
}

# Normalize vectors (important for cosine similarity)
for word, vec in dummy_word_vectors.items():
    dummy_word_vectors[word] = vec / np.linalg.norm(vec)

# --- Function to find nearest neighbors ---
def find_nearest_neighbors(search_word_input):
    search_word = search_word_input.lower()

    if search_word not in dummy_word_vectors:
        return (
            pd.DataFrame([{"Message": f"'{search_word}' not found in our dummy vocabulary. Try one of these: {', '.join(list(dummy_word_vectors.keys()))}"}]),
            "Warning: Word not found!"
        )

    target_vector = dummy_word_vectors[search_word]
    similarities = []
    for word, vector in dummy_word_vectors.items():
        if word != search_word: # Don't compare a word to itself
            similarity = 1 - cosine(target_vector, vector)
            similarities.append({"Word": word, "Cosine Similarity": similarity})

    results_df = pd.DataFrame(similarities).sort_values(
        by="Cosine Similarity", ascending=False
    ).reset_index(drop=True)

    # Format the DataFrame for better display in Gradio
    results_df["Cosine Similarity"] = results_df["Cosine Similarity"].round(4)
    results_df.columns = ["Neighbor Word", "Similarity Score"] # Rename for UI clarity

    message = f"Found nearest neighbors for '{search_word}'!"
    return results_df, message

# --- Gradio Interface ---
iface = gr.Interface(
    fn=find_nearest_neighbors,
    inputs=gr.Textbox(
        label="Enter a word to explore its neighbors:",
        placeholder="e.g., cat, king, fish"
    ),
    outputs=[
        gr.DataFrame(
            headers=["Neighbor Word", "Similarity Score"],
            row_count=5, # Display up to 5 rows by default
            wrap=True,
            interactive=False,
            label="Nearest Neighbors"
        ),
        gr.Markdown(
            label="Status"
        )
    ],
    title="🚀 Word Vector Explorer (Gradio POC)",
    description=(
        "Discover the semantic neighbors of words using word embeddings! "
        "Type a word, and see its closest companions in the vector space."
        "<br>_Note: This POC uses dummy word vectors. In a full version, this would connect to a large pre-trained Word2Vec model!_"
    ),
    allow_flagging="never", # Optional: disables the "Flag" button
    examples=[
        ["cat"],
        ["king"],
        ["fish"],
        ["run"]
    ]
)

if __name__ == "__main__":
    iface.launch()