Spaces:

ReithBjarkan
/

SEO_Keyword_Similarity_Tool

Running

File size: 5,351 Bytes

import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import numpy as np
import pandas as pd

# App title
st.title("Keyword Cosine Similarity Tool")

# Overview
st.markdown(
    """

    **Purpose:** 

    Elevate the most semantically relevant queries from keyword research.

    """
)

# Inputs
st.header("Input Parameters")
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
st.text_area(
    "Keywords to Compare",
    placeholder="Enter keywords separated by new lines or commas",
    help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.",
    key="keywords"
)
keywords = st.session_state.keywords
model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"])
openai_api_key = st.text_input("OpenAI API Key (optional) 🔗 [Get API Key](https://platform.openai.com/api-keys)", type="password")

# Instructions tooltip
with st.expander("ℹ️ Instructions (click for details)"):
    st.markdown(
        """

        **How to use this tool:**

        1. Enter your **Primary Keyword** in the input field.

        2. Provide a list of **Keywords to Compare** (separated by new lines or commas).

        3. Select an **Embedding Model** to compute keyword embeddings.

        4. If using OpenAI embeddings, input your **API Key**.

        5. Click **Calculate Similarities** to compute and rank your keywords by relevance.



        **What you'll get:**

        - A sorted table of your comparison keywords based on their cosine similarity to your primary keyword.

        - Option to download the results as a CSV file.

        """
    )

# Process Button
if st.button("Calculate Similarities"):
    if not primary_keyword or not keywords:
        st.error("Please provide both the primary keyword and keywords to compare.")
    else:
        # Process keywords: Split by commas and/or new lines
        keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()]
        
        # Check model type
        if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]:
            # Load SentenceTransformer model
            st.info(f"Loading model: {model_name}")
            model = SentenceTransformer(model_name)
            
            # Generate embeddings
            st.info("Generating embeddings...")
            primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
            keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
        
        elif model_name == "OpenAI Embeddings":
            if not openai_api_key:
                st.error("Please provide your OpenAI API key for this model.")
            else:
                st.info("Generating OpenAI embeddings...")
                
                # Initialize OpenAI client with new v1.0.0 interface
                client = OpenAI(api_key=openai_api_key)
                
                def get_openai_embedding(text):
                    response = client.embeddings.create(
                        model="text-embedding-ada-002",
                        input=text
                    )
                    return np.array(response.data[0].embedding)
                
                primary_embedding = get_openai_embedding(primary_keyword)
                keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
        
        else:
            st.error("Invalid model selection.")
            st.stop()
        
        # Calculate cosine similarities
        st.info("Calculating cosine similarities...")
        similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
        
        # Sort results by cosine similarity
        st.info("Sorting results...")
        results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
        sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True)

        # Display results
        st.header("Results")
        
        # Download results as CSV - moved up to appear right after Results title
        st.download_button(
            label="📥 Download Results as CSV",
            data=pd.DataFrame(sorted_results).to_csv(index=False),
            file_name="cosine_similarity_results.csv",
            mime="text/csv"
        )
        
        # Display the results table
        df_results = pd.DataFrame(sorted_results)
        st.table(df_results)

        # Debugging/Intermediate Data - moved below results
        with st.expander("🔧 Technical Details (click to expand)"):
            st.write("Primary Embedding:", primary_embedding)
            st.write("Keyword Embeddings:", keyword_embeddings)

# Footer
st.markdown("---")
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:&#114;&#121;&#98;&#97;&#99;&#111;&#114;&#110;&#64;&#103;&#109;&#97;&#105;&#108;&#46;&#99;&#111;&#109;)")