Spaces:
Runtime error
Runtime error
File size: 3,673 Bytes
514343b 4fd42f1 514343b 59e519b 9983408 843aeb0 44264ed 418bd7c 88993fe 44264ed e838b9b b3c3404 1716434 b3c3404 418bd7c b3c3404 20efea7 b3c3404 20efea7 b3c3404 56c44d6 15e7e9a ea052a5 15e7e9a b0f213e 15e7e9a b1d589a 15e7e9a 56c44d6 08a6457 534b3f6 747ffcd 308457d c7626f8 08a6457 fd7311e 08a6457 fc99537 b85f19f 82206e8 fc99537 82206e8 fd7311e b0f213e a1229f1 fd7311e b0f213e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 |
''' To-do
Create a side bar to compare two or upload CSV
In the second tab, allow them to compare all CSV files
'''
import streamlit as st
import pandas as pd
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
model = SentenceTransformer('paraphrase-xlm-r-multilingual-v1')
# Streamlit interface
st.title("Sentence Similarity")
sidebar_selectbox = st.sidebar.selectbox(
"What would you like to work with?",
("Compare two sentences", "Bulk upload and mark")
)
# Streamlit form elements (default to "Compare two sentences")
if sidebar_selectbox == "Compare two sentences":
st.subheader("Compare the similarity between two sentences")
with st.form("submission_form", clear_on_submit=False):
sentence_1 = st.text_input("Sentence 1 input")
sentence_2 = st.text_input("Sentence 2 input")
submit_button_compare = st.form_submit_button("Compare Sentences")
# If submit_button_compare clicked
if submit_button_compare:
# Perform calculations
#Initialise sentences
sentences = []
# Append input sentences to 'sentences' list
sentences.append(sentence_1)
sentences.append(sentence_2)
# Create embeddings for both sentences
sentence_embeddings = model.encode(sentences)
cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
st.write('Similarity between {} and {} is {}%'.format(sentence_1,
sentence_2, cos_sim))
if sidebar_selectbox == "Bulk upload and mark":
st.subheader("Bulk compare similarity of sentences")
sentence_reference = st.text_input("Reference sentence input")
# Only allow user to upload CSV files
data_file = st.file_uploader("Upload CSV",type=["csv"])
if data_file is not None:
file_details = {"filename":data_file.name, "filetype":data_file.type, "filesize":data_file.size}
st.write(file_details)
df = pd.read_csv(data_file)
# Get length of df.shape (might not need this)
#total_rows = df.shape[0]
similarity_scores = []
for idx, row in df.iterrows():
# st.write(idx, row['Sentences'])
# Create an empty sentence list
sentences = []
# Compare the setences two by two
sentence_comparison = row['Sentences']
sentences.append(sentence_reference)
sentences.append(sentence_comparison)
sentence_embeddings = model.encode(sentences)
cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
cos_sim = round(cos_sim * 100)
similarity_scores.append(cos_sim)
# Append new column to dataframe
df['Similarity'] = similarity_scores
st.dataframe(df) |