Spaces:
Build error
Build error
feat: Switching to L6 version of the model to improve times of encoding
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ from sentence_transformers import SentenceTransformer, util
|
|
| 22 |
|
| 23 |
client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
|
| 24 |
model_to_use = {
|
| 25 |
-
"English": "all-MiniLM-
|
| 26 |
"Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
|
| 27 |
}
|
| 28 |
|
|
@@ -135,12 +135,12 @@ def generate_plot(
|
|
| 135 |
) -> Figure:
|
| 136 |
with st.spinner(text=f"Trying to understand '{tw_user}' tweets... π€"):
|
| 137 |
embeddings = embed_text(tws_cleaned, model)
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
encoded_labels = cluster.labels_
|
| 145 |
cluster_keyword = {}
|
| 146 |
with st.spinner("Now trying to express them with my own words... π¬"):
|
|
|
|
| 22 |
|
| 23 |
client = tweepy.Client(bearer_token=st.secrets["tw_bearer_token"])
|
| 24 |
model_to_use = {
|
| 25 |
+
"English": "all-MiniLM-L6-v2",
|
| 26 |
"Use all the ones you know (~15 lang)": "paraphrase-multilingual-MiniLM-L12-v2"
|
| 27 |
}
|
| 28 |
|
|
|
|
| 135 |
) -> Figure:
|
| 136 |
with st.spinner(text=f"Trying to understand '{tw_user}' tweets... π€"):
|
| 137 |
embeddings = embed_text(tws_cleaned, model)
|
| 138 |
+
# encoded_labels = encode_labels(labels)
|
| 139 |
+
cluster = hdbscan.HDBSCAN(
|
| 140 |
+
min_cluster_size=3,
|
| 141 |
+
metric='euclidean',
|
| 142 |
+
cluster_selection_method='eom'
|
| 143 |
+
).fit(embeddings)
|
| 144 |
encoded_labels = cluster.labels_
|
| 145 |
cluster_keyword = {}
|
| 146 |
with st.spinner("Now trying to express them with my own words... π¬"):
|