File size: 1,364 Bytes
514343b
4fd42f1
 
 
 
 
 
 
 
 
514343b
843aeb0
 
9983408
 
843aeb0
44264ed
 
88993fe
9983408
418bd7c
 
88993fe
44264ed
20efea7
418bd7c
20efea7
88993fe
20efea7
 
 
 
 
88993fe
3a19224
418bd7c
 
 
fee5ced
88993fe
 
c996dc1
fee5ced
 
ea052a5
5241bce
a4967e1
fee5ced
b1d589a
20efea7
68b6bdf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52

''' To-do

Create a side bar to compare two or upload CSV

In the second tab, allow them to compare all CSV files


'''

import streamlit as st
from transformers import pipeline
from textblob import TextBlob
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity

model = SentenceTransformer('paraphrase-xlm-r-multilingual-v1')

sentences = []
     
# Streamlit interface
      
st.title("Sentence Similarity")

# Streamlit form elements

with st.form("submission_form", clear_on_submit=False):

       sentence_1 = st.text_input("Sentence 1 input")
       
       sentence_2 = st.text_input("Sentence 2 input")
       
       submit_button = st.form_submit_button("Compare Sentences")

if submit_button:

       # Perform calculations
       
       # Append input sentences to 'sentences' list
       sentences.append(sentence_1)
       sentences.append(sentence_2)
       
       # Create embeddings for both sentences
       sentence_embeddings = model.encode(sentences)
       
       cos_sim = cosine_similarity(sentence_embeddings[0].reshape(1, -1), sentence_embeddings[1].reshape(1, -1))[0][0]
       cos_sim = round(cos_sim * 100) # Convert to percentage and round-off
      
              
       st.write('Similarity between {} and {} is {}%'.format(sentence_1,
              sentence_2, cos_sim))