Spaces:

RyanS974
/

525GradioApp

Sleeping

Ryan commited on Apr 21

Commit

41e3754

1 Parent(s): 6d70959

update

Files changed (1) hide show

processors/ngram_analysis.py CHANGED Viewed

@@ -9,6 +9,22 @@ from nltk.util import ngrams
 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
 def compare_ngrams(texts, model_names, n=2, top_n=25):
     """
@@ -52,23 +68,16 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
             stop_words='english'
         )
-        # Make sure texts are strings before processing
         processed_texts = []
         for text in texts:
-            # If text is not a string (e.g., it's a list), convert it to a string
-            if not isinstance(text, str):
-                if isinstance(text, list):
-                    # Handle potentially nested lists by flattening
-                    flat_text = []
-                    for item in text:
-                        if isinstance(item, list):
-                            flat_text.extend([str(subitem) for subitem in item])
-                        else:
-                            flat_text.append(str(item))
-                    text = ' '.join(flat_text)
-                else:
-                    text = str(text)  # Convert to string if it's another type
-            processed_texts.append(text)
         X = vectorizer.fit_transform(processed_texts)

 from nltk.tokenize import word_tokenize
 from nltk.corpus import stopwords
+# Helper function to flatten nested lists
+def flatten_list(nested_list):
+    """
+    Recursively flattens a nested list.
+    Args:
+        nested_list (list): A potentially nested list.
+    Returns:
+        list: A flattened list.
+    """
+    for item in nested_list:
+        if isinstance(item, list):
+            yield from flatten_list(item)
+        else:
+            yield item
 def compare_ngrams(texts, model_names, n=2, top_n=25):
     """
             stop_words='english'
         )
+        # Ensure all texts are strings and handle nested lists
         processed_texts = []
         for text in texts:
+            if isinstance(text, list):
+                # Flatten nested lists and join into a single string
+                flat_text = ' '.join(map(str, flatten_list(text)))
+                processed_texts.append(flat_text)
+            else:
+                # Convert non-string objects to strings
+                processed_texts.append(str(text))
         X = vectorizer.fit_transform(processed_texts)