embedding_testq / app.py
Sebbe33's picture
Update app.py
e4f69cf verified
import streamlit as st
import google.generativeai as genai
import numpy as np
# Configure Gemini API
genai.configure(api_key=st.secrets["GEMINI_API_KEY"])
st.title("Text Embedding Similarity Test")
def split_into_chunks(text, chunk_size=500):
"""Split text into chunks of approximately specified character length"""
return [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
def get_embedding(text):
"""Get embedding for a single text chunk"""
return genai.embed_content(
model="models/text-embedding-004",
content=text
)['embedding']
def cosine_similarity(vec1, vec2):
"""Compute cosine similarity between two vectors"""
return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))
# Text input areas
col1, col2 = st.columns(2)
with col1:
input_text1 = st.text_area("Enter your first text:",
height=200,
placeholder="Type or paste your first text here...")
with col2:
input_text2 = st.text_area("Enter text to compare:",
height=200,
placeholder="Type or paste text to compare...")
if st.button("Run Similarity Test"):
if not input_text1.strip() or not input_text2.strip():
st.warning("Please enter text in both input fields.")
else:
with st.spinner("Analyzing texts..."):
try:
# Process first text into chunks
chunks = split_into_chunks(input_text1)
if len(chunks) > 1:
st.info(f"Split first text into {len(chunks)} chunks")
# Generate embeddings for all chunks
embeddings = [get_embedding(chunk) for chunk in chunks]
# Generate embedding for comparison text
compare_embedding = get_embedding(input_text2)
# Calculate similarities
similarities = [cosine_similarity(emb, compare_embedding) for emb in embeddings]
max_score = max(similarities)
max_index = similarities.index(max_score)
# Display results
st.subheader("πŸ“Š Similarity Results")
st.write(f"**Highest similarity score:** {max_score:.4f}")
st.subheader("🧩 Most Similar Chunk")
st.write(chunks[max_index])
st.subheader("πŸ“ˆ All Chunk Similarities")
for i, (chunk, score) in enumerate(zip(chunks, similarities)):
st.write(f"Chunk {i+1} ({len(chunk)} chars): {score:.4f}")
st.expander(f"View chunk {i+1}").write(chunk)
except Exception as e:
st.error(f"Error processing texts: {str(e)}")