Spaces:
Sleeping
Sleeping
import gradio as gr | |
import numpy as np | |
from scipy.spatial.distance import cosine | |
import pandas as pd | |
# --- Simulate a small pre-trained Word2Vec model --- | |
# Dummy word vectors for demonstration | |
dummy_word_vectors = { | |
'cat': np.array([0.9, 0.7, 0.1, 0.2]), | |
'dog': np.array([0.8, 0.8, 0.3, 0.1]), | |
'kitten': np.array([0.85, 0.75, 0.15, 0.25]), | |
'puppy': np.array([0.75, 0.85, 0.25, 0.15]), | |
'fish': np.array([0.1, 0.2, 0.9, 0.8]), | |
'bird': np.array([0.2, 0.1, 0.8, 0.9]), | |
'ocean': np.array([0.05, 0.15, 0.95, 0.85]), | |
'sky': np.array([0.25, 0.05, 0.85, 0.95]), | |
'run': np.array([0.6, 0.3, 0.1, 0.1]), | |
'walk': np.array([0.55, 0.35, 0.15, 0.05]), | |
'jump': np.array([0.65, 0.25, 0.05, 0.15]), | |
'king': np.array([0.9, 0.1, 0.1, 0.8]), | |
'queen': np.array([0.8, 0.2, 0.2, 0.9]), | |
'man': np.array([0.9, 0.15, 0.05, 0.7]), | |
'woman': np.array([0.85, 0.1, 0.15, 0.85]) | |
} | |
# Normalize vectors (important for cosine similarity) | |
for word, vec in dummy_word_vectors.items(): | |
dummy_word_vectors[word] = vec / np.linalg.norm(vec) | |
# --- Function to find nearest neighbors --- | |
def find_nearest_neighbors(search_word_input): | |
search_word = search_word_input.lower() | |
if search_word not in dummy_word_vectors: | |
return ( | |
pd.DataFrame([{"Message": f"'{search_word}' not found in our dummy vocabulary. Try one of these: {', '.join(list(dummy_word_vectors.keys()))}"}]), | |
"Warning: Word not found!" | |
) | |
target_vector = dummy_word_vectors[search_word] | |
similarities = [] | |
for word, vector in dummy_word_vectors.items(): | |
if word != search_word: # Don't compare a word to itself | |
similarity = 1 - cosine(target_vector, vector) | |
similarities.append({"Word": word, "Cosine Similarity": similarity}) | |
results_df = pd.DataFrame(similarities).sort_values( | |
by="Cosine Similarity", ascending=False | |
).reset_index(drop=True) | |
# Format the DataFrame for better display in Gradio | |
results_df["Cosine Similarity"] = results_df["Cosine Similarity"].round(4) | |
results_df.columns = ["Neighbor Word", "Similarity Score"] # Rename for UI clarity | |
message = f"Found nearest neighbors for '{search_word}'!" | |
return results_df, message | |
# --- Gradio Interface --- | |
iface = gr.Interface( | |
fn=find_nearest_neighbors, | |
inputs=gr.Textbox( | |
label="Enter a word to explore its neighbors:", | |
placeholder="e.g., cat, king, fish" | |
), | |
outputs=[ | |
gr.DataFrame( | |
headers=["Neighbor Word", "Similarity Score"], | |
row_count=5, # Display up to 5 rows by default | |
wrap=True, | |
interactive=False, | |
label="Nearest Neighbors" | |
), | |
gr.Markdown( | |
label="Status" | |
) | |
], | |
title="π Word Vector Explorer (Gradio POC)", | |
description=( | |
"Discover the semantic neighbors of words using word embeddings! " | |
"Type a word, and see its closest companions in the vector space." | |
"<br>_Note: This POC uses dummy word vectors. In a full version, this would connect to a large pre-trained Word2Vec model!_" | |
), | |
allow_flagging="never", # Optional: disables the "Flag" button | |
examples=[ | |
["cat"], | |
["king"], | |
["fish"], | |
["run"] | |
] | |
) | |
if __name__ == "__main__": | |
iface.launch() |