Spaces:

RyanS974
/

525GradioApp

Sleeping

Ryan commited on Apr 21

Commit

b559aef

1 Parent(s): 1b72959

update

Files changed (1) hide show

processors/topic_modeling.py CHANGED Viewed

@@ -117,7 +117,7 @@ def extract_topics(texts, n_topics=3, n_top_words=10, method="lda"):
     return result
-def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda"):
     """
     Compare topics between two sets of texts
@@ -127,10 +127,15 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
         n_topics (int): Number of topics to extract
         n_top_words (int): Number of top words per topic
         method (str): Topic modeling method ('lda' or 'nmf')
     Returns:
         dict: Comparison results with topics from both sets and similarity metrics
     """
     # Extract topics for each set
     topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
     topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
@@ -169,7 +174,8 @@ def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method=
         "set2_topics": topics_set_2["topics"],
         "similarity_matrix": similarity_matrix,
         "matched_topics": matched_topics,
-        "average_similarity": np.mean([match["similarity"] for match in matched_topics])
     }
     return result

     return result
+def compare_topics(texts_set_1, texts_set_2, n_topics=3, n_top_words=10, method="lda", model_names=None):
     """
     Compare topics between two sets of texts
         n_topics (int): Number of topics to extract
         n_top_words (int): Number of top words per topic
         method (str): Topic modeling method ('lda' or 'nmf')
+        model_names (list, optional): Names of the models being compared
     Returns:
         dict: Comparison results with topics from both sets and similarity metrics
     """
+    # Set default model names if not provided
+    if model_names is None:
+        model_names = ["Model 1", "Model 2"]
     # Extract topics for each set
     topics_set_1 = extract_topics(texts_set_1, n_topics, n_top_words, method)
     topics_set_2 = extract_topics(texts_set_2, n_topics, n_top_words, method)
         "set2_topics": topics_set_2["topics"],
         "similarity_matrix": similarity_matrix,
         "matched_topics": matched_topics,
+        "average_similarity": np.mean([match["similarity"] for match in matched_topics]),
+        "models": model_names  # Add model names to result
     }
     return result