File size: 5,351 Bytes
d32067c
 
 
2b51c5d
d32067c
59859db
d32067c
 
 
 
0e965c1
 
 
6fe6b9e
4c2bddb
0e965c1
 
 
d32067c
 
 
68a368d
 
 
 
 
 
 
d14b366
4c2bddb
d32067c
d375321
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d32067c
 
 
 
 
68a368d
 
d32067c
92a2e60
d14b366
92a2e60
d32067c
 
 
 
 
 
 
 
 
 
 
 
 
 
2b51c5d
 
 
d32067c
2b51c5d
d32067c
 
 
2b51c5d
d32067c
 
 
 
 
 
 
 
 
 
 
 
59859db
 
 
 
 
d32067c
 
d375321
 
59859db
d375321
 
59859db
 
 
d375321
 
 
 
59859db
d375321
 
 
 
d32067c
 
 
08cbeb7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from openai import OpenAI
import numpy as np
import pandas as pd

# App title
st.title("Keyword Cosine Similarity Tool")

# Overview
st.markdown(
    """

    **Purpose:** 

    Elevate the most semantically relevant queries from keyword research.

    """
)

# Inputs
st.header("Input Parameters")
primary_keyword = st.text_input("Primary Keyword", placeholder="Enter your primary keyword")
st.text_area(
    "Keywords to Compare",
    placeholder="Enter keywords separated by new lines or commas",
    help="You can input keywords on separate lines or separated by commas. Any keywords with commas will be treated as separate queries.",
    key="keywords"
)
keywords = st.session_state.keywords
model_name = st.selectbox("Select Embedding Model", ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2", "OpenAI Embeddings"])
openai_api_key = st.text_input("OpenAI API Key (optional) πŸ”— [Get API Key](https://platform.openai.com/api-keys)", type="password")

# Instructions tooltip
with st.expander("ℹ️ Instructions (click for details)"):
    st.markdown(
        """

        **How to use this tool:**

        1. Enter your **Primary Keyword** in the input field.

        2. Provide a list of **Keywords to Compare** (separated by new lines or commas).

        3. Select an **Embedding Model** to compute keyword embeddings.

        4. If using OpenAI embeddings, input your **API Key**.

        5. Click **Calculate Similarities** to compute and rank your keywords by relevance.



        **What you'll get:**

        - A sorted table of your comparison keywords based on their cosine similarity to your primary keyword.

        - Option to download the results as a CSV file.

        """
    )

# Process Button
if st.button("Calculate Similarities"):
    if not primary_keyword or not keywords:
        st.error("Please provide both the primary keyword and keywords to compare.")
    else:
        # Process keywords: Split by commas and/or new lines
        keyword_list = [kw.strip() for kw in keywords.replace(",", "\n").split("\n") if kw.strip()]
        
        # Check model type
        if model_name in ["sentence-transformers/LaBSE", "sentence-transformers/all-MiniLM-L6-v2"]:
            # Load SentenceTransformer model
            st.info(f"Loading model: {model_name}")
            model = SentenceTransformer(model_name)
            
            # Generate embeddings
            st.info("Generating embeddings...")
            primary_embedding = model.encode(primary_keyword, convert_to_tensor=True)
            keyword_embeddings = model.encode(keyword_list, convert_to_tensor=True)
        
        elif model_name == "OpenAI Embeddings":
            if not openai_api_key:
                st.error("Please provide your OpenAI API key for this model.")
            else:
                st.info("Generating OpenAI embeddings...")
                
                # Initialize OpenAI client with new v1.0.0 interface
                client = OpenAI(api_key=openai_api_key)
                
                def get_openai_embedding(text):
                    response = client.embeddings.create(
                        model="text-embedding-ada-002",
                        input=text
                    )
                    return np.array(response.data[0].embedding)
                
                primary_embedding = get_openai_embedding(primary_keyword)
                keyword_embeddings = np.array([get_openai_embedding(kw) for kw in keyword_list])
        
        else:
            st.error("Invalid model selection.")
            st.stop()
        
        # Calculate cosine similarities
        st.info("Calculating cosine similarities...")
        similarities = cosine_similarity([primary_embedding], keyword_embeddings)[0]
        
        # Sort results by cosine similarity
        st.info("Sorting results...")
        results = [{"Keyword": kw, "Cosine Similarity": sim} for kw, sim in zip(keyword_list, similarities)]
        sorted_results = sorted(results, key=lambda x: x["Cosine Similarity"], reverse=True)

        # Display results
        st.header("Results")
        
        # Download results as CSV - moved up to appear right after Results title
        st.download_button(
            label="πŸ“₯ Download Results as CSV",
            data=pd.DataFrame(sorted_results).to_csv(index=False),
            file_name="cosine_similarity_results.csv",
            mime="text/csv"
        )
        
        # Display the results table
        df_results = pd.DataFrame(sorted_results)
        st.table(df_results)

        # Debugging/Intermediate Data - moved below results
        with st.expander("πŸ”§ Technical Details (click to expand)"):
            st.write("Primary Embedding:", primary_embedding)
            st.write("Keyword Embeddings:", keyword_embeddings)

# Footer
st.markdown("---")
st.markdown("Created by [Ryland Bacorn](https://huggingface.co/ReithBjarkan). Report a [bug or make a suggestion](mailto:rybacorn@gmail.com)")