import streamlit as st import pandas as pd from sentence_transformers import SentenceTransformer model = SentenceTransformer('paraphrase-MiniLM-L6-v2') input_sentence = st.text_input('Movie title', 'Life of Brian') #st.write('The current movie title is', title) #Sentences we want to encode. Example: sentence = ['This framework generates embeddings for each input sentence'] #Sentences are encoded by calling model.encode() embedding = model.encode([input_sentence]) x = st.slider('Select a value') #embedding = model.encode(input_sentence) #st.write(x, 'squared is', x * x, 'embedding', embedding[0][0]) st.write('The embedding of', '"' + input_sentence + '"', 'at position',x,'is',embedding[0][int(x)]) uploaded_file1 = st.file_uploader("Choose a file: sentence list") if uploaded_file1 is not None: #read csv df1=pd.read_csv(uploaded_file1) st.write(df1.head()) uploaded_file2 = st.file_uploader("Choose a file: topic list") if uploaded_file2 is not None: #read csv df2=pd.read_csv(uploaded_file2) st.write(df2.head()) if uploaded_file1 is not None and uploaded_file2 is not None: from sentence_transformers import SentenceTransformer, util import torch embedder = SentenceTransformer('all-MiniLM-L6-v2') corpus = df1['sentence'] topics = df2['topic'] corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) for topic in topics: topic_embedding = embedder.encode(topic, convert_to_tensor=True) cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0] df1[str(topic)] = cos_scores st.write(df1)