ReithBjarkan's picture
OpenAI API Key link
4c2bddb
import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import numpy as np
import pandas as pd
# App title
st.title("Keyword Cosine Similarity Tool")
# Overview
st.markdown(
"""
**Purpose:**
Elevate the most semantically relevant queries from keyword research.
"""
)
# Inputs
st.header("Input Parameters")
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
st.text_area(
"Keywords to Compare",
placeholder="Enter keywords separated by new lines or commas",
help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.",
key="keywords"
)
keywords = st.session_state.keywords
model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"])
openai_api_key = st.text_input("OpenAI API Key (optional) πŸ”— [Get API Key](https://platform.openai.com/api-keys)", type="password")
# Instructions tooltip
with st.expander("ℹ️ Instructions (click for details)"):
st.markdown(
"""
**How to use this tool:**
1. Enter your **Primary Keyword** in the input field.
2. Provide a list of **Keywords to Compare** (separated by new lines or commas).
3. Select an **Embedding Model** to compute keyword embeddings.
4. If using OpenAI embeddings, input your **API Key**.
5. Click **Calculate Similarities** to compute and rank your keywords by relevance.
**What you'll get:**
- A sorted table of your comparison keywords based on their cosine similarity to your primary keyword.
- Option to download the results as a CSV file.
"""
)
# Process Button
if st.button("Calculate Similarities"):
if not primary_keyword or not keywords:
st.error("Please provide both the primary keyword and keywords to compare.")
else:
# Process keywords: Split by commas and/or new lines
keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()]
# Check model type
if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]:
# Load SentenceTransformer model
st.info(f"Loading model: {model_name}")
model = SentenceTransformer(model_name)
# Generate embeddings
st.info("Generating embeddings...")
primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
elif model_name == "OpenAI Embeddings":
if not openai_api_key:
st.error("Please provide your OpenAI API key for this model.")
else:
st.info("Generating OpenAI embeddings...")
# Initialize OpenAI client with new v1.0.0 interface
client = OpenAI(api_key=openai_api_key)
def get_openai_embedding(text):
response = client.embeddings.create(
model="text-embedding-ada-002",
input=text
)
return np.array(response.data[0].embedding)
primary_embedding = get_openai_embedding(primary_keyword)
keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
else:
st.error("Invalid model selection.")
st.stop()
# Calculate cosine similarities
st.info("Calculating cosine similarities...")
similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
# Sort results by cosine similarity
st.info("Sorting results...")
results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True)
# Display results
st.header("Results")
# Download results as CSV - moved up to appear right after Results title
st.download_button(
label="πŸ“₯ Download Results as CSV",
data=pd.DataFrame(sorted_results).to_csv(index=False),
file_name="cosine_similarity_results.csv",
mime="text/csv"
)
# Display the results table
df_results = pd.DataFrame(sorted_results)
st.table(df_results)
# Debugging/Intermediate Data - moved below results
with st.expander("πŸ”§ Technical Details (click to expand)"):
st.write("Primary Embedding:", primary_embedding)
st.write("Keyword Embeddings:", keyword_embeddings)
# Footer
st.markdown("---")
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:rybacorn@gmail.com)")