Ryan commited on
Commit
1f1253e
·
1 Parent(s): 41e3754
Files changed (1) hide show
  1. processors/ngram_analysis.py +18 -7
processors/ngram_analysis.py CHANGED
@@ -71,13 +71,24 @@ def compare_ngrams(texts, model_names, n=2, top_n=25):
71
  # Ensure all texts are strings and handle nested lists
72
  processed_texts = []
73
  for text in texts:
74
- if isinstance(text, list):
75
- # Flatten nested lists and join into a single string
76
- flat_text = ' '.join(map(str, flatten_list(text)))
77
- processed_texts.append(flat_text)
78
- else:
79
- # Convert non-string objects to strings
80
- processed_texts.append(str(text))
 
 
 
 
 
 
 
 
 
 
 
81
 
82
  X = vectorizer.fit_transform(processed_texts)
83
 
 
71
  # Ensure all texts are strings and handle nested lists
72
  processed_texts = []
73
  for text in texts:
74
+ try:
75
+ if isinstance(text, list):
76
+ # More thoroughly flatten and ensure we have a string
77
+ flat_items = list(flatten_list(text))
78
+ # Convert each item to string and join
79
+ flat_text = ' '.join([str(item) for item in flat_items])
80
+ processed_texts.append(flat_text)
81
+ else:
82
+ # Convert non-string objects to strings
83
+ processed_texts.append(str(text))
84
+
85
+ # Verify we have a valid string
86
+ if not isinstance(processed_texts[-1], str):
87
+ processed_texts[-1] = str(processed_texts[-1])
88
+ except Exception as e:
89
+ # Handle problematic text by adding empty string
90
+ print(f"Warning: Error processing text: {e}")
91
+ processed_texts.append("")
92
 
93
  X = vectorizer.fit_transform(processed_texts)
94