Spaces:
Build error
Build error
File size: 3,288 Bytes
985d3f5 9d22f91 985d3f5 2a2a619 985d3f5 be2864a 985d3f5 be2864a 985d3f5 be2864a 985d3f5 2a2a619 985d3f5 76d621c 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 985d3f5 2a2a619 9d22f91 985d3f5 2a2a619 b6b6380 2a2a619 985d3f5 2a2a619 b6b6380 2a2a619 985d3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
# let's import the libraries we need
#from sentence_transformers import SentenceTransformer
#from sentence_transformers import CrossEncoder
import spacy
from sklearn.metrics.pairwise import cosine_similarity
from datasets import load_dataset
import io
import netrc
import pickle
import sys
import pandas as pd
import numpy as np
import streamlit as st
import torch
from tqdm import tqdm
tqdm.pandas()
# Load the English STSB dataset
stsb_dataset = load_dataset('stsb_multi_mt', 'en')
stsb_train = pd.DataFrame(stsb_dataset['train'])
stsb_test = pd.DataFrame(stsb_dataset['test'])
# let's create helper functions
nlp = spacy.load("en_core_web_sm")
def text_processing(sentence):
sentence = [token.lemma_.lower()
for token in nlp(sentence)
if token.is_alpha and not token.is_stop]
return sentence
def cos_sim(sentence1_emb, sentence2_emb):
cos_sim = cosine_similarity(sentence1_emb, sentence2_emb)
return np.diag(cos_sim)
# let's read the csv file
data = (pd.read_csv("SBERT_data.csv")).drop(['Unnamed: 0'], axis=1)
prompt = "charles"
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
XpathFinder = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
data['SBERT CrossEncoder_Score'] = XpathFinder.predict(
sentence_pairs, show_progress_bar=True)
# sorting the values
data.sort_values(by=['SBERT CrossEncoder_Score'], ascending=False)
loaded_model = XpathFinder
# Containers
header_container = st.container()
mod_container = st.container()
# Header
with header_container:
# different levels of text you can include in your app
st.title("Xpath Finder App")
# model container
with mod_container:
# collecting input from user
prompt = st.text_input("Enter your description below ...")
# Loading e data
data = (pd.read_csv("/content/SBERT_data.csv")
).drop(['Unnamed: 0'], axis=1)
data['prompt'] = prompt
data.rename(columns={'target_text': 'sentence2',
'prompt': 'sentence1'}, inplace=True)
data['sentence2'] = data['sentence2'].astype('str')
data['sentence1'] = data['sentence1'].astype('str')
# let's pass the input to the loaded_model with torch compiled with cuda
if prompt:
# let's get the result
simscore = loaded_model.predict([prompt])
from sentence_transformers import CrossEncoder
loaded_model = CrossEncoder("cross-encoder/stsb-roberta-base")
sentence_pairs = []
for sentence1, sentence2 in zip(data['sentence1'], data['sentence2']):
sentence_pairs.append([sentence1, sentence2])
# sorting the df to get highest scoring xpath_container
data['SBERT CrossEncoder_Score'] = loaded_model.predict(sentence_pairs)
most_acc = data.head(5)
# predictions
st.write("Highest Similarity score: ", simscore)
st.text("Is this one of these the Xpath you're looking for?")
st.write(st.write(most_acc["input_text"]))
|