LanguageGames2 / app.py
fractalz's picture
Create app.py
418ff33 verified
raw
history blame
3.36 kB
import gradio as gr
import numpy as np
from scipy.spatial.distance import cosine
import pandas as pd
# --- Simulate a small pre-trained Word2Vec model ---
# Dummy word vectors for demonstration
dummy_word_vectors = {
'cat': np.array([0.9, 0.7, 0.1, 0.2]),
'dog': np.array([0.8, 0.8, 0.3, 0.1]),
'kitten': np.array([0.85, 0.75, 0.15, 0.25]),
'puppy': np.array([0.75, 0.85, 0.25, 0.15]),
'fish': np.array([0.1, 0.2, 0.9, 0.8]),
'bird': np.array([0.2, 0.1, 0.8, 0.9]),
'ocean': np.array([0.05, 0.15, 0.95, 0.85]),
'sky': np.array([0.25, 0.05, 0.85, 0.95]),
'run': np.array([0.6, 0.3, 0.1, 0.1]),
'walk': np.array([0.55, 0.35, 0.15, 0.05]),
'jump': np.array([0.65, 0.25, 0.05, 0.15]),
'king': np.array([0.9, 0.1, 0.1, 0.8]),
'queen': np.array([0.8, 0.2, 0.2, 0.9]),
'man': np.array([0.9, 0.15, 0.05, 0.7]),
'woman': np.array([0.85, 0.1, 0.15, 0.85])
}
# Normalize vectors (important for cosine similarity)
for word, vec in dummy_word_vectors.items():
dummy_word_vectors[word] = vec / np.linalg.norm(vec)
# --- Function to find nearest neighbors ---
def find_nearest_neighbors(search_word_input):
search_word = search_word_input.lower()
if search_word not in dummy_word_vectors:
return (
pd.DataFrame([{"Message": f"'{search_word}' not found in our dummy vocabulary. Try one of these: {', '.join(list(dummy_word_vectors.keys()))}"}]),
"Warning: Word not found!"
)
target_vector = dummy_word_vectors[search_word]
similarities = []
for word, vector in dummy_word_vectors.items():
if word != search_word: # Don't compare a word to itself
similarity = 1 - cosine(target_vector, vector)
similarities.append({"Word": word, "Cosine Similarity": similarity})
results_df = pd.DataFrame(similarities).sort_values(
by="Cosine Similarity", ascending=False
).reset_index(drop=True)
# Format the DataFrame for better display in Gradio
results_df["Cosine Similarity"] = results_df["Cosine Similarity"].round(4)
results_df.columns = ["Neighbor Word", "Similarity Score"] # Rename for UI clarity
message = f"Found nearest neighbors for '{search_word}'!"
return results_df, message
# --- Gradio Interface ---
iface = gr.Interface(
fn=find_nearest_neighbors,
inputs=gr.Textbox(
label="Enter a word to explore its neighbors:",
placeholder="e.g., cat, king, fish"
),
outputs=[
gr.DataFrame(
headers=["Neighbor Word", "Similarity Score"],
row_count=5, # Display up to 5 rows by default
wrap=True,
interactive=False,
label="Nearest Neighbors"
),
gr.Markdown(
label="Status"
)
],
title="πŸš€ Word Vector Explorer (Gradio POC)",
description=(
"Discover the semantic neighbors of words using word embeddings! "
"Type a word, and see its closest companions in the vector space."
"<br>_Note: This POC uses dummy word vectors. In a full version, this would connect to a large pre-trained Word2Vec model!_"
),
allow_flagging="never", # Optional: disables the "Flag" button
examples=[
["cat"],
["king"],
["fish"],
["run"]
]
)
if __name__ == "__main__":
iface.launch()