Spaces:
Running
Running
Update model.py
Browse files
model.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import os
|
2 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
|
3 |
os.environ["HF_HOME"] = "/tmp/hf-home"
|
@@ -42,10 +43,9 @@ def smart_summarize(text, n_clusters=1):
|
|
42 |
if not len(idx):
|
43 |
continue
|
44 |
# Average vector from cluster, converted to ndarray
|
45 |
-
avg_vector =
|
46 |
-
#
|
47 |
sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
|
48 |
-
# Select sentence with highest similarity to avg
|
49 |
most_representative = sentences[idx[np.argmax(sim)]]
|
50 |
summary_sentences.append(most_representative)
|
51 |
|
|
|
1 |
+
|
2 |
import os
|
3 |
os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf-cache"
|
4 |
os.environ["HF_HOME"] = "/tmp/hf-home"
|
|
|
43 |
if not len(idx):
|
44 |
continue
|
45 |
# Average vector from cluster, converted to ndarray
|
46 |
+
avg_vector = tfidf_matrix[idx].mean(axis=0).A # Convert matrix to ndarray
|
47 |
+
# Compute cosine similarity between avg_vector and tfidf vectors in cluster
|
48 |
sim = cosine_similarity(avg_vector, tfidf_matrix[idx].toarray())
|
|
|
49 |
most_representative = sentences[idx[np.argmax(sim)]]
|
50 |
summary_sentences.append(most_representative)
|
51 |
|