Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
1f1253e
1
Parent(s):
41e3754
update
Browse files- processors/ngram_analysis.py +18 -7
processors/ngram_analysis.py
CHANGED
@@ -71,13 +71,24 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
|
|
71 |
# Ensure all texts are strings and handle nested lists
|
72 |
processed_texts = []
|
73 |
for text in texts:
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
81 |
|
82 |
X = vectorizer.fit_transform(processed_texts)
|
83 |
|
|
|
71 |
# Ensure all texts are strings and handle nested lists
|
72 |
processed_texts = []
|
73 |
for text in texts:
|
74 |
+
try:
|
75 |
+
if isinstance(text, list):
|
76 |
+
# More thoroughly flatten and ensure we have a string
|
77 |
+
flat_items = list(flatten_list(text))
|
78 |
+
# Convert each item to string and join
|
79 |
+
flat_text = ' '.join([str(item) for item in flat_items])
|
80 |
+
processed_texts.append(flat_text)
|
81 |
+
else:
|
82 |
+
# Convert non-string objects to strings
|
83 |
+
processed_texts.append(str(text))
|
84 |
+
|
85 |
+
# Verify we have a valid string
|
86 |
+
if not isinstance(processed_texts[-1], str):
|
87 |
+
processed_texts[-1] = str(processed_texts[-1])
|
88 |
+
except Exception as e:
|
89 |
+
# Handle problematic text by adding empty string
|
90 |
+
print(f"Warning: Error processing text: {e}")
|
91 |
+
processed_texts.append("")
|
92 |
|
93 |
X = vectorizer.fit_transform(processed_texts)
|
94 |
|