Spaces:

awacke1
/

SentenceSimilarityDemo

Runtime error

App Files Files Community

SentenceSimilarityDemo / app.py

awacke1

Update app.py

1e01e10 verified over 1 year ago

raw

history blame contribute delete

3.08 kB

	import streamlit as st
	import nltk
	#from transformers import pipeline
	import transformers
	from sentence_transformers import SentenceTransformer
	from scipy.spatial.distance import cosine
	import numpy as np
	import seaborn as sns
	import matplotlib.pyplot as plt
	from sklearn.cluster import KMeans
	import tensorflow as tf
	import tensorflow_hub as hub


	def cluster_examples(messages, embed, nc=3):
	km = KMeans(
	n_clusters=nc, init='random',
	n_init=10, max_iter=300,
	tol=1e-04, random_state=0
	)
	km = km.fit_predict(embed)
	cluster_list = []
	for n in range(nc):
	idxs = [i for i in range(len(km)) if km[i] == n]
	ms = [messages[i] for i in idxs]
	cluster_list.append(ms)
	return cluster_list


	def plot_heatmap(labels, heatmap, rotation=90):
	sns.set(font_scale=1.2)
	fig, ax = plt.subplots()
	g = sns.heatmap(
	heatmap,
	xticklabels=labels,
	yticklabels=labels,
	vmin=-1,
	vmax=1,
	cmap="coolwarm")
	g.set_xticklabels(labels, rotation=rotation)
	g.set_title("Textual Similarity")

	st.pyplot(fig)

	# Streamlit app setup
	st.set_page_config(page_title="Sentence Similarity Demo")

	st.sidebar.title("Sentence Similarity Demo")

	text = st.sidebar.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.")

	nc = st.sidebar.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)

	model_type = st.sidebar.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)

	# Model setup
	if model_type == "Sentence Transformer":
	model = SentenceTransformer('paraphrase-distilroberta-base-v1')
	elif model_type == "Universal Sentence Encoder":
	model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
	model = hub.load(model_url)

	nltk.download('punkt')

	# Run model
	if text:
	sentences = nltk.tokenize.sent_tokenize(text)
	if model_type == "Sentence Transformer":
	embed = model.encode(sentences)
	elif model_type == "Universal Sentence Encoder":
	embed = model(sentences).numpy()
	sim = np.zeros([len(embed), len(embed)])
	for i,em in enumerate(embed):
	for j,ea in enumerate(embed):
	sim[i][j] = 1.0-cosine(em,ea)
	st.subheader("Similarity Heatmap")
	plot_heatmap(sentences, sim)
	cluster_list = cluster_examples(sentences, embed, nc)
	st.subheader("Results from K-Means Clustering")
	cluster_table = st.table(cluster_list)