Spaces:
Sleeping
Sleeping
Ryan
commited on
Commit
·
c0e1d59
1
Parent(s):
1f1253e
update
Browse files- processors/ngram_analysis.py +4 -23
processors/ngram_analysis.py
CHANGED
@@ -67,29 +67,10 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
|
|
67 |
max_features=1000,
|
68 |
stop_words='english'
|
69 |
)
|
70 |
-
|
71 |
-
# Ensure
|
72 |
-
processed_texts = []
|
73 |
-
|
74 |
-
try:
|
75 |
-
if isinstance(text, list):
|
76 |
-
# More thoroughly flatten and ensure we have a string
|
77 |
-
flat_items = list(flatten_list(text))
|
78 |
-
# Convert each item to string and join
|
79 |
-
flat_text = ' '.join([str(item) for item in flat_items])
|
80 |
-
processed_texts.append(flat_text)
|
81 |
-
else:
|
82 |
-
# Convert non-string objects to strings
|
83 |
-
processed_texts.append(str(text))
|
84 |
-
|
85 |
-
# Verify we have a valid string
|
86 |
-
if not isinstance(processed_texts[-1], str):
|
87 |
-
processed_texts[-1] = str(processed_texts[-1])
|
88 |
-
except Exception as e:
|
89 |
-
# Handle problematic text by adding empty string
|
90 |
-
print(f"Warning: Error processing text: {e}")
|
91 |
-
processed_texts.append("")
|
92 |
-
|
93 |
X = vectorizer.fit_transform(processed_texts)
|
94 |
|
95 |
# Get feature names (n-grams)
|
|
|
67 |
max_features=1000,
|
68 |
stop_words='english'
|
69 |
)
|
70 |
+
|
71 |
+
# Ensure each text is a string, without attempting complex preprocessing
|
72 |
+
processed_texts = [str(text) if not isinstance(text, str) else text for text in texts]
|
73 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
X = vectorizer.fit_transform(processed_texts)
|
75 |
|
76 |
# Get feature names (n-grams)
|