Ryan commited on
Commit
b559aef
·
1 Parent(s): 1b72959
Files changed (1) hide show
  1. processors/topic_modeling.py +8 -2
processors/topic_modeling.py CHANGED
@@ -117,7 +117,7 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
117
 
118
  return result
119
 
120
- def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda"):
121
  """
122
  Compare topics between two sets of texts
123
 
@@ -127,10 +127,15 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
127
  n_topics (int): Number of topics to extract
128
  n_top_words (int): Number of top words per topic
129
  method (str): Topic modeling method ('lda' or 'nmf')
 
130
 
131
  Returns:
132
  dict: Comparison results with topics from both sets and similarity metrics
133
  """
 
 
 
 
134
  # Extract topics for each set
135
  topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
136
  topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
@@ -169,7 +174,8 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
169
  "set2_topics": topics_set_2["topics"],
170
  "similarity_matrix": similarity_matrix,
171
  "matched_topics": matched_topics,
172
- "average_similarity": np.mean([match["similarity"] for match in matched_topics])
 
173
  }
174
 
175
  return result
 
117
 
118
  return result
119
 
120
+ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
121
  """
122
  Compare topics between two sets of texts
123
 
 
127
  n_topics (int): Number of topics to extract
128
  n_top_words (int): Number of top words per topic
129
  method (str): Topic modeling method ('lda' or 'nmf')
130
+ model_names (list, optional): Names of the models being compared
131
 
132
  Returns:
133
  dict: Comparison results with topics from both sets and similarity metrics
134
  """
135
+ # Set default model names if not provided
136
+ if model_names is None:
137
+ model_names = ["Model 1", "Model 2"]
138
+
139
  # Extract topics for each set
140
  topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
141
  topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
 
174
  "set2_topics": topics_set_2["topics"],
175
  "similarity_matrix": similarity_matrix,
176
  "matched_topics": matched_topics,
177
+ "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
178
+ "models": model_names # Add model names to result
179
  }
180
 
181
  return result