import streamlit as st from sentence_transformers import SentenceTransformer from sklearn.metrics.pairwise import cosine_similarity from openai import OpenAI import numpy as np import pandas as pd # App title st.title("Keyword Cosine Similarity Tool") # Overview st.markdown( """ **Purpose:** Elevate the most semantically relevant queries from keyword research. """ ) # Inputs st.header("Input Parameters") primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword") st.text_area( "Keywords to Compare", placeholder="Enter keywords separated by new lines or commas", help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.", key="keywords" ) keywords = st.session_state.keywords model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"]) openai_api_key = st.text_input("OpenAI API Key (optional) 🔗 [Get API Key](https://platform.openai.com/api-keys)", type="password") # Instructions tooltip with st.expander("â„šī¸ Instructions (click for details)"): st.markdown( """ **How to use this tool:** 1. Enter your **Primary Keyword** in the input field. 2. Provide a list of **Keywords to Compare** (separated by new lines or commas). 3. Select an **Embedding Model** to compute keyword embeddings. 4. If using OpenAI embeddings, input your **API Key**. 5. Click **Calculate Similarities** to compute and rank your keywords by relevance. **What you'll get:** - A sorted table of your comparison keywords based on their cosine similarity to your primary keyword. - Option to download the results as a CSV file. """ ) # Process Button if st.button("Calculate Similarities"): if not primary_keyword or not keywords: st.error("Please provide both the primary keyword and keywords to compare.") else: # Process keywords: Split by commas and/or new lines keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()] # Check model type if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]: # Load SentenceTransformer model st.info(f"Loading model: {model_name}") model = SentenceTransformer(model_name) # Generate embeddings st.info("Generating embeddings...") primary_embedding = model.encode(primary_keyword, convert_to_tensor=True) keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True) elif model_name == "OpenAI Embeddings": if not openai_api_key: st.error("Please provide your OpenAI API key for this model.") else: st.info("Generating OpenAI embeddings...") # Initialize OpenAI client with new v1.0.0 interface client = OpenAI(api_key=openai_api_key) def get_openai_embedding(text): response = client.embeddings.create( model="text-embedding-ada-002", input=text ) return np.array(response.data[0].embedding) primary_embedding = get_openai_embedding(primary_keyword) keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list]) else: st.error("Invalid model selection.") st.stop() # Calculate cosine similarities st.info("Calculating cosine similarities...") similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0] # Sort results by cosine similarity st.info("Sorting results...") results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)] sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True) # Display results st.header("Results") # Download results as CSV - moved up to appear right after Results title st.download_button( label="đŸ“Ĩ Download Results as CSV", data=pd.DataFrame(sorted_results).to_csv(index=False), file_name="cosine_similarity_results.csv", mime="text/csv" ) # Display the results table df_results = pd.DataFrame(sorted_results) st.table(df_results) # Debugging/Intermediate Data - moved below results with st.expander("🔧 Technical Details (click to expand)"): st.write("Primary Embedding:", primary_embedding) st.write("Keyword Embeddings:", keyword_embeddings) # Footer st.markdown("---") st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:rybacorn@gmail.com)")