File size: 3,402 Bytes
e88272a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
Hugging Face's logo
Hugging Face
Search models, datasets, users...
Models
Datasets
Spaces
Posts
Docs
Pricing



Spaces:

awacke1
/
NLPSentenceSimilarityHeatmap 

like
3

App
Files
Community
Settings
NLPSentenceSimilarityHeatmap
/
app.py
awacke1's picture
awacke1
Update app.py
c4d6857
12 months ago
raw
history
blame
edit
delete
No virus
3.06 kB
import streamlit as st
import nltk
from transformers import pipeline
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans
import tensorflow as tf
import tensorflow_hub as hub


def cluster_examples(messages, embed, nc=3):
    km = KMeans(
    n_clusters=nc, init='random',
    n_init=10, max_iter=300, 
    tol=1e-04, random_state=0
    )
    km = km.fit_predict(embed)
    cluster_list = []
    for n in range(nc):
        idxs = [i for i in range(len(km)) if km[i] == n]
        ms = [messages[i] for i in idxs]
        cluster_list.append(ms)
    return cluster_list


def plot_heatmap(labels, heatmap, rotation=90):
    sns.set(font_scale=1.2)
    fig, ax = plt.subplots()
    g = sns.heatmap(
        heatmap,
        xticklabels=labels,
        yticklabels=labels,
        vmin=-1,
        vmax=1,
        cmap="coolwarm")
    g.set_xticklabels(labels, rotation=rotation)
    g.set_title("Textual Similarity")

    st.pyplot(fig)

# Streamlit app setup
st.set_page_config(page_title="Sentence Similarity Demo")

st.sidebar.title("Sentence Similarity Demo")

text = st.sidebar.text_area('Enter sentences:', value="Self confidence in outcomes helps us win and to make us successful.\nShe has a seriously impressive intellect and mind.\nStimulating and deep conversation helps us develop and grow.\nFrom basic quantum particles we get aerodynamics, friction, surface tension, weather, electromagnetism.\nIf she actively engages and comments positively, her anger disappears adapting into win-win's favor.\nI love interesting topics of conversation and the understanding and exploration of thoughts.\nThere is the ability to manipulate things the way you want in your mind to go how you want when you are self confident, that we don’t understand yet.")

nc = st.sidebar.slider('Select a number of clusters:', min_value=1, max_value=15, value=3)

model_type = st.sidebar.radio("Choose model:", ('Sentence Transformer', 'Universal Sentence Encoder'), index=0)

# Model setup
if model_type == "Sentence Transformer":
    model = SentenceTransformer('paraphrase-distilroberta-base-v1')
elif model_type == "Universal Sentence Encoder":
    model_url = "https://tfhub.dev/google/universal-sentence-encoder-large/5"
    model = hub.load(model_url)

nltk.download('punkt')

# Run model
if text:
    sentences = nltk.tokenize.sent_tokenize(text)
    if model_type == "Sentence Transformer":
        embed = model.encode(sentences)
    elif model_type == "Universal Sentence Encoder":
        embed = model(sentences).numpy()
    sim = np.zeros([len(embed), len(embed)])
    for i,em in enumerate(embed):
        for j,ea in enumerate(embed):
            sim[i][j] = 1.0-cosine(em,ea)
    st.subheader("Similarity Heatmap")
    plot_heatmap(sentences, sim)
    cluster_list = cluster_examples(sentences, embed, nc)
    st.subheader("Results from K-Means Clustering")
    cluster_table = st.table(cluster_list)