File size: 1,913 Bytes
514343b
4fd42f1
 
 
 
 
 
 
 
 
514343b
843aeb0
 
9983408
 
843aeb0
44264ed
 
88993fe
9983408
418bd7c
 
88993fe
44264ed
e838b9b
 
 
 
 
 
418bd7c
20efea7
88993fe
20efea7
 
 
 
e838b9b
 
 
 
 
 
cd6e5f0
 
 
 
 
e838b9b
88993fe
e838b9b
 
418bd7c
 
 
fee5ced
88993fe
 
c996dc1
fee5ced
 
ea052a5
5241bce
a4967e1
fee5ced
b1d589a
20efea7
68b6bdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69

''' To-do

Create a side bar to compare two or upload CSV

In the second tab, allow them to compare all CSV files


'''

import streamlit as st
from transformers import pipeline
from textblob import TextBlob
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('paraphrase-xlm-r-multilingual-v1')

sentences = []
     
# Streamlit interface
      
st.title("Sentence Similarity")

sidebar_selectbox = st.sidebar.selectbox(
    "What would you like to work with?",
    ("Compare two sentences", "Bulk upload and mark")
)

# Streamlit form elements (default)

with st.form("submission_form", clear_on_submit=False):

       sentence_1 = st.text_input("Sentence 1 input")
       
       sentence_2 = st.text_input("Sentence 2 input")
       
       submit_button_compare = st.form_submit_button("Compare Sentences")


if sidebar_selectbox == "Bulk upload and mark":
       with st.form("submission_form", clear_on_submit=False):

              sentence_1 = st.text_input("Sentence 1 input")
              
              sentence_2 = st.text_input("Sentence 2 input")
              
              submit_button_compare = st.form_submit_button("Compare Sentences")


# If submit_button_compare clicked
if submit_button_compare:

       # Perform calculations
       
       # Append input sentences to 'sentences' list
       sentences.append(sentence_1)
       sentences.append(sentence_2)
       
       # Create embeddings for both sentences
       sentence_embeddings = model.encode(sentences)
       
       cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
       cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
      
              
       st.write('Similarity between {} and {} is {}%'.format(sentence_1,
              sentence_2, cos_sim))