Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
b559aef
1
Parent(s):
1b72959
update
Browse files
processors/topic_modeling.py
CHANGED
@@ -117,7 +117,7 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
|
|
117 |
|
118 |
return result
|
119 |
|
120 |
-
def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda"):
|
121 |
"""
|
122 |
Compare topics between two sets of texts
|
123 |
|
@@ -127,10 +127,15 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
|
|
127 |
n_topics (int): Number of topics to extract
|
128 |
n_top_words (int): Number of top words per topic
|
129 |
method (str): Topic modeling method ('lda' or 'nmf')
|
|
|
130 |
|
131 |
Returns:
|
132 |
dict: Comparison results with topics from both sets and similarity metrics
|
133 |
"""
|
|
|
|
|
|
|
|
|
134 |
# Extract topics for each set
|
135 |
topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
|
136 |
topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
|
@@ -169,7 +174,8 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
|
|
169 |
"set2_topics": topics_set_2["topics"],
|
170 |
"similarity_matrix": similarity_matrix,
|
171 |
"matched_topics": matched_topics,
|
172 |
-
"average_similarity": np.mean([match["similarity"] for match in matched_topics])
|
|
|
173 |
}
|
174 |
|
175 |
return result
|
|
|
117 |
|
118 |
return result
|
119 |
|
120 |
+
def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
|
121 |
"""
|
122 |
Compare topics between two sets of texts
|
123 |
|
|
|
127 |
n_topics (int): Number of topics to extract
|
128 |
n_top_words (int): Number of top words per topic
|
129 |
method (str): Topic modeling method ('lda' or 'nmf')
|
130 |
+
model_names (list, optional): Names of the models being compared
|
131 |
|
132 |
Returns:
|
133 |
dict: Comparison results with topics from both sets and similarity metrics
|
134 |
"""
|
135 |
+
# Set default model names if not provided
|
136 |
+
if model_names is None:
|
137 |
+
model_names = ["Model 1", "Model 2"]
|
138 |
+
|
139 |
# Extract topics for each set
|
140 |
topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
|
141 |
topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
|
|
|
174 |
"set2_topics": topics_set_2["topics"],
|
175 |
"similarity_matrix": similarity_matrix,
|
176 |
"matched_topics": matched_topics,
|
177 |
+
"average_similarity": np.mean([match["similarity"] for match in matched_topics]),
|
178 |
+
"models": model_names # Add model names to result
|
179 |
}
|
180 |
|
181 |
return result
|