Spaces:

fractalz
/

LanguageGames2

Sleeping

App Files Files Community

LanguageGames2 / app.py

$fractalz's picture$

fractalz

Create app.py

418ff33 verified 2 months ago

raw

history blame

3.36 kB

	import gradio as gr
	import numpy as np
	from scipy.spatial.distance import cosine
	import pandas as pd

	# --- Simulate a small pre-trained Word2Vec model ---
	# Dummy word vectors for demonstration
	dummy_word_vectors = {
	'cat': np.array([0.9, 0.7, 0.1, 0.2]),
	'dog': np.array([0.8, 0.8, 0.3, 0.1]),
	'kitten': np.array([0.85, 0.75, 0.15, 0.25]),
	'puppy': np.array([0.75, 0.85, 0.25, 0.15]),
	'fish': np.array([0.1, 0.2, 0.9, 0.8]),
	'bird': np.array([0.2, 0.1, 0.8, 0.9]),
	'ocean': np.array([0.05, 0.15, 0.95, 0.85]),
	'sky': np.array([0.25, 0.05, 0.85, 0.95]),
	'run': np.array([0.6, 0.3, 0.1, 0.1]),
	'walk': np.array([0.55, 0.35, 0.15, 0.05]),
	'jump': np.array([0.65, 0.25, 0.05, 0.15]),
	'king': np.array([0.9, 0.1, 0.1, 0.8]),
	'queen': np.array([0.8, 0.2, 0.2, 0.9]),
	'man': np.array([0.9, 0.15, 0.05, 0.7]),
	'woman': np.array([0.85, 0.1, 0.15, 0.85])
	}

	# Normalize vectors (important for cosine similarity)
	for word, vec in dummy_word_vectors.items():
	dummy_word_vectors[word] = vec / np.linalg.norm(vec)

	# --- Function to find nearest neighbors ---
	def find_nearest_neighbors(search_word_input):
	search_word = search_word_input.lower()

	if search_word not in dummy_word_vectors:
	return (
	pd.DataFrame([{"Message": f"'{search_word}' not found in our dummy vocabulary. Try one of these: {', '.join(list(dummy_word_vectors.keys()))}"}]),
	"Warning: Word not found!"
	)

	target_vector = dummy_word_vectors[search_word]
	similarities = []
	for word, vector in dummy_word_vectors.items():
	if word != search_word: # Don't compare a word to itself
	similarity = 1 - cosine(target_vector, vector)
	similarities.append({"Word": word, "Cosine Similarity": similarity})

	results_df = pd.DataFrame(similarities).sort_values(
	by="Cosine Similarity", ascending=False
	).reset_index(drop=True)

	# Format the DataFrame for better display in Gradio
	results_df["Cosine Similarity"] = results_df["Cosine Similarity"].round(4)
	results_df.columns = ["Neighbor Word", "Similarity Score"] # Rename for UI clarity

	message = f"Found nearest neighbors for '{search_word}'!"
	return results_df, message

	# --- Gradio Interface ---
	iface = gr.Interface(
	fn=find_nearest_neighbors,
	inputs=gr.Textbox(
	label="Enter a word to explore its neighbors:",
	placeholder="e.g., cat, king, fish"
	),
	outputs=[
	gr.DataFrame(
	headers=["Neighbor Word", "Similarity Score"],
	row_count=5, # Display up to 5 rows by default
	wrap=True,
	interactive=False,
	label="Nearest Neighbors"
	),
	gr.Markdown(
	label="Status"
	)
	],
	title="🚀 Word Vector Explorer (Gradio POC)",
	description=(
	"Discover the semantic neighbors of words using word embeddings! "
	"Type a word, and see its closest companions in the vector space."
	"<br>_Note: This POC uses dummy word vectors. In a full version, this would connect to a large pre-trained Word2Vec model!_"
	),
	allow_flagging="never", # Optional: disables the "Flag" button
	examples=[
	["cat"],
	["king"],
	["fish"],
	["run"]
	]
	)

	if __name__ == "__main__":
	iface.launch()