awacke1's picture
Update app.py
1e01e10 verified
import streamlit as st
import nltk
#from transformers import pipeline
import transformers
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import tensorflow as tf
import tensorflow_hub as hub
def cluster_examples(messages, embed, nc=3):
km = KMeans(
n_clusters=nc, init='random',
n_init=10, max_iter=300,
tol=1e-04, random_state=0
)
km = km.fit_predict(embed)
cluster_list = []
for n in range(nc):
idxs = [i for i in range(len(km)) if km[i] == n]
ms = [messages[i] for i in idxs]
cluster_list.append(ms)
return cluster_list
def plot_heatmap(labels, heatmap, rotation=90):
sns.set(font_scale=1.2)
fig, ax = plt.subplots()
g = sns.heatmap(
heatmap,
xticklabels=labels,
yticklabels=labels,
vmin=-1,
vmax=1,
cmap="coolwarm")
g.set_xticklabels(labels, rotation=rotation)
g.set_title("Textual Similarity")
st.pyplot(fig)
# Streamlit app setup
st.set_page_config(page_title="Sentence Similarity Demo")
st.sidebar.title("Sentence Similarity Demo")
text = st.sidebar.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.")
nc = st.sidebar.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)
model_type = st.sidebar.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)
# Model setup
if model_type == "Sentence Transformer":
model = SentenceTransformer('paraphrase-distilroberta-base-v1')
elif model_type == "Universal Sentence Encoder":
model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
model = hub.load(model_url)
nltk.download('punkt')
# Run model
if text:
sentences = nltk.tokenize.sent_tokenize(text)
if model_type == "Sentence Transformer":
embed = model.encode(sentences)
elif model_type == "Universal Sentence Encoder":
embed = model(sentences).numpy()
sim = np.zeros([len(embed), len(embed)])
for i,em in enumerate(embed):
for j,ea in enumerate(embed):
sim[i][j] = 1.0-cosine(em,ea)
st.subheader("Similarity Heatmap")
plot_heatmap(sentences, sim)
cluster_list = cluster_examples(sentences, embed, nc)
st.subheader("Results from K-Means Clustering")
cluster_table = st.table(cluster_list)