Spaces:

RyanS974
/

525GradioApp

Sleeping

Ryan commited on Apr 21

Commit

c0e1d59

1 Parent(s): 1f1253e

update

Files changed (1) hide show

processors/ngram_analysis.py CHANGED Viewed

@@ -67,29 +67,10 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
             max_features=1000,
             stop_words='english'
         )
-        # Ensure all texts are strings and handle nested lists
-        processed_texts = []
-        for text in texts:
-            try:
-                if isinstance(text, list):
-                    # More thoroughly flatten and ensure we have a string
-                    flat_items = list(flatten_list(text))
-                    # Convert each item to string and join
-                    flat_text = ' '.join([str(item) for item in flat_items])
-                    processed_texts.append(flat_text)
-                else:
-                    # Convert non-string objects to strings
-                    processed_texts.append(str(text))
-                # Verify we have a valid string
-                if not isinstance(processed_texts[-1], str):
-                    processed_texts[-1] = str(processed_texts[-1])
-            except Exception as e:
-                # Handle problematic text by adding empty string
-                print(f"Warning: Error processing text: {e}")
-                processed_texts.append("")
         X = vectorizer.fit_transform(processed_texts)
         # Get feature names (n-grams)

             max_features=1000,
             stop_words='english'
         )
+        # Ensure each text is a string, without attempting complex preprocessing
+        processed_texts = [str(text) if not isinstance(text, str) else text for text in texts]
         X = vectorizer.fit_transform(processed_texts)
         # Get feature names (n-grams)