Spaces:
Runtime error
Runtime error
Hugging Face's logo | |
Hugging Face | |
Search models, datasets, users... | |
Models | |
Datasets | |
Spaces | |
Posts | |
Docs | |
Pricing | |
Spaces: | |
awacke1 | |
/ | |
NLPSentenceSimilarityHeatmap | |
like | |
3 | |
App | |
Files | |
Community | |
Settings | |
NLPSentenceSimilarityHeatmap | |
/ | |
app.py | |
awacke1's picture | |
awacke1 | |
Update app.py | |
c4d6857 | |
12 months ago | |
raw | |
history | |
blame | |
edit | |
delete | |
No virus | |
3.06 kB | |
import streamlit as st | |
import nltk | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer | |
from scipy.spatial.distance import cosine | |
import numpy as np | |
import seaborn as sns | |
import matplotlib.pyplot as plt | |
from sklearn.cluster import KMeans | |
import tensorflow as tf | |
import tensorflow_hub as hub | |
def cluster_examples(messages, embed, nc=3): | |
km = KMeans( | |
n_clusters=nc, init='random', | |
n_init=10, max_iter=300, | |
tol=1e-04, random_state=0 | |
) | |
km = km.fit_predict(embed) | |
cluster_list = [] | |
for n in range(nc): | |
idxs = [i for i in range(len(km)) if km[i] == n] | |
ms = [messages[i] for i in idxs] | |
cluster_list.append(ms) | |
return cluster_list | |
def plot_heatmap(labels, heatmap, rotation=90): | |
sns.set(font_scale=1.2) | |
fig, ax = plt.subplots() | |
g = sns.heatmap( | |
heatmap, | |
xticklabels=labels, | |
yticklabels=labels, | |
vmin=-1, | |
vmax=1, | |
cmap="coolwarm") | |
g.set_xticklabels(labels, rotation=rotation) | |
g.set_title("Textual Similarity") | |
st.pyplot(fig) | |
# Streamlit app setup | |
st.set_page_config(page_title="Sentence Similarity Demo") | |
st.sidebar.title("Sentence Similarity Demo") | |
text = st.sidebar.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.") | |
nc = st.sidebar.slider('Select a number of clusters:', min_value=1, max_value=15, value=3) | |
model_type = st.sidebar.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0) | |
# Model setup | |
if model_type == "Sentence Transformer": | |
model = SentenceTransformer('paraphrase-distilroberta-base-v1') | |
elif model_type == "Universal Sentence Encoder": | |
model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5" | |
model = hub.load(model_url) | |
nltk.download('punkt') | |
# Run model | |
if text: | |
sentences = nltk.tokenize.sent_tokenize(text) | |
if model_type == "Sentence Transformer": | |
embed = model.encode(sentences) | |
elif model_type == "Universal Sentence Encoder": | |
embed = model(sentences).numpy() | |
sim = np.zeros([len(embed), len(embed)]) | |
for i,em in enumerate(embed): | |
for j,ea in enumerate(embed): | |
sim[i][j] = 1.0-cosine(em,ea) | |
st.subheader("Similarity Heatmap") | |
plot_heatmap(sentences, sim) | |
cluster_list = cluster_examples(sentences, embed, nc) | |
st.subheader("Results from K-Means Clustering") | |
cluster_table = st.table(cluster_list) | |