Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -162,41 +162,63 @@ def normalize(d, target=1.0):
|
|
162 |
return {key: value * factor for key, value in d.items()}
|
163 |
|
164 |
# --- Visualization Functions with Error Handling ---
|
|
|
165 |
def safe_plot(func, *args, **kwargs):
|
|
|
|
|
166 |
try:
|
167 |
-
|
|
|
168 |
func(*args, **kwargs)
|
169 |
buf = BytesIO()
|
170 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
buf.seek(0)
|
172 |
img = Image.open(buf)
|
173 |
-
plt.close() #
|
174 |
return img
|
175 |
except Exception as e:
|
176 |
print(f"Plotting error in safe_plot: {e}")
|
177 |
-
|
178 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
179 |
|
180 |
def fDistancePlot(text2Party):
|
|
|
181 |
def plot_func():
|
182 |
tokens = word_tokenize(text2Party)
|
183 |
if not tokens:
|
184 |
-
|
185 |
-
|
186 |
fdist = FreqDist(tokens)
|
187 |
fdist.plot(15, title='Frequency Distribution')
|
188 |
-
plt.xticks(rotation=45, ha='right')
|
189 |
plt.tight_layout()
|
190 |
return safe_plot(plot_func)
|
191 |
|
|
|
192 |
def DispersionPlot(textParty):
|
|
|
193 |
try:
|
194 |
word_tokens_party = word_tokenize(textParty)
|
195 |
if not word_tokens_party:
|
196 |
-
|
197 |
moby = Text(word_tokens_party)
|
198 |
fdistance = FreqDist(word_tokens_party)
|
199 |
-
# Get top 5 words, handle potential IndexError if less than 5 unique words
|
200 |
common_words = fdistance.most_common(6)
|
201 |
if len(common_words) < 5:
|
202 |
word_Lst = [word for word, _ in common_words]
|
@@ -204,31 +226,44 @@ def DispersionPlot(textParty):
|
|
204 |
word_Lst = [common_words[x][0] for x in range(5)]
|
205 |
|
206 |
if not word_Lst:
|
207 |
-
|
208 |
|
209 |
-
plt.
|
210 |
-
|
211 |
-
moby.dispersion_plot(word_Lst)
|
212 |
-
|
213 |
buf = BytesIO()
|
214 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
buf.seek(0)
|
216 |
img = Image.open(buf)
|
217 |
-
plt.close() # Close the figure
|
218 |
return img
|
219 |
except Exception as e:
|
220 |
print(f"Dispersion plot error: {e}")
|
221 |
traceback.print_exc()
|
|
|
222 |
return None
|
223 |
|
|
|
224 |
def word_cloud_generator(parsed_text_name, text_Party):
|
|
|
|
|
225 |
try:
|
226 |
-
# Handle case where parsed_text_name might not have .name
|
227 |
filename_lower = ""
|
228 |
if hasattr(parsed_text_name, 'name') and parsed_text_name.name:
|
229 |
filename_lower = parsed_text_name.name.lower()
|
230 |
elif isinstance(parsed_text_name, str):
|
231 |
-
|
232 |
|
233 |
mask_path = None
|
234 |
if 'bjp' in filename_lower:
|
@@ -238,35 +273,51 @@ def word_cloud_generator(parsed_text_name, text_Party):
|
|
238 |
elif 'aap' in filename_lower:
|
239 |
mask_path = 'aapMain2.jpg'
|
240 |
|
241 |
-
# Generate word cloud
|
242 |
if text_Party.strip() == "":
|
243 |
-
|
244 |
|
|
|
245 |
if mask_path and os.path.exists(mask_path):
|
246 |
orgImg = Image.open(mask_path)
|
247 |
-
# Ensure mask is in the right format (e.g., uint8)
|
248 |
if orgImg.mode != 'RGB':
|
249 |
orgImg = orgImg.convert('RGB')
|
250 |
mask = np.array(orgImg)
|
251 |
-
wordcloud = WordCloud(max_words=3000, mask=mask, background_color='white').generate(text_Party)
|
252 |
else:
|
253 |
-
wordcloud = WordCloud(max_words=2000, background_color='white').generate(text_Party)
|
|
|
|
|
|
|
|
|
|
|
|
|
254 |
|
255 |
-
plt.figure(figsize=(8, 6)) # Set figure size
|
256 |
-
plt.imshow(wordcloud, interpolation='bilinear') # Use bilinear interpolation
|
257 |
-
plt.axis("off")
|
258 |
-
plt.tight_layout()
|
259 |
buf = BytesIO()
|
260 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
261 |
buf.seek(0)
|
262 |
img = Image.open(buf)
|
263 |
-
plt.close() # Close the figure
|
264 |
return img
|
|
|
265 |
except Exception as e:
|
266 |
print(f"Word cloud error: {e}")
|
|
|
|
|
267 |
traceback.print_exc()
|
|
|
268 |
return None # Return None on error
|
269 |
|
|
|
270 |
# Initial design for concordance based search
|
271 |
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10, right_margin=10, numLins=4):
|
272 |
"""
|
|
|
162 |
return {key: value * factor for key, value in d.items()}
|
163 |
|
164 |
# --- Visualization Functions with Error Handling ---
|
165 |
+
|
166 |
def safe_plot(func, *args, **kwargs):
|
167 |
+
"""Executes a plotting function and returns the image, handling errors."""
|
168 |
+
buf = None # Initialize buffer
|
169 |
try:
|
170 |
+
# Ensure a clean figure state
|
171 |
+
fig = plt.figure() # Create a new figure explicitly
|
172 |
func(*args, **kwargs)
|
173 |
buf = BytesIO()
|
174 |
+
# Try saving with bbox_inches, but catch potential apply_aspect error
|
175 |
+
try:
|
176 |
+
plt.savefig(buf, format='png', bbox_inches='tight')
|
177 |
+
except AttributeError as ae:
|
178 |
+
if "apply_aspect" in str(ae):
|
179 |
+
print(f"Warning: bbox_inches='tight' failed ({ae}), saving without it.")
|
180 |
+
buf.seek(0) # Reset buffer as it might be partially written
|
181 |
+
buf = BytesIO() # Get a fresh buffer
|
182 |
+
plt.savefig(buf, format='png') # Save without bbox_inches
|
183 |
+
else:
|
184 |
+
raise # Re-raise if it's a different AttributeError
|
185 |
buf.seek(0)
|
186 |
img = Image.open(buf)
|
187 |
+
plt.close(fig) # Explicitly close the specific figure
|
188 |
return img
|
189 |
except Exception as e:
|
190 |
print(f"Plotting error in safe_plot: {e}")
|
191 |
+
if buf:
|
192 |
+
buf.close() # Ensure buffer is closed on error if it was created
|
193 |
+
traceback.print_exc()
|
194 |
+
# Try to return a placeholder or None
|
195 |
+
plt.close('all') # Aggressive close on error
|
196 |
+
return None
|
197 |
+
|
198 |
+
|
199 |
|
200 |
def fDistancePlot(text2Party):
|
201 |
+
"""Generates the frequency distribution plot."""
|
202 |
def plot_func():
|
203 |
tokens = word_tokenize(text2Party)
|
204 |
if not tokens:
|
205 |
+
plt.text(0.5, 0.5, "No data to plot", ha='center', va='center', transform=plt.gca().transAxes) # Use Axes coordinates
|
206 |
+
return
|
207 |
fdist = FreqDist(tokens)
|
208 |
fdist.plot(15, title='Frequency Distribution')
|
209 |
+
plt.xticks(rotation=45, ha='right')
|
210 |
plt.tight_layout()
|
211 |
return safe_plot(plot_func)
|
212 |
|
213 |
+
|
214 |
def DispersionPlot(textParty):
|
215 |
+
"""Generates the word dispersion plot."""
|
216 |
try:
|
217 |
word_tokens_party = word_tokenize(textParty)
|
218 |
if not word_tokens_party:
|
219 |
+
return None
|
220 |
moby = Text(word_tokens_party)
|
221 |
fdistance = FreqDist(word_tokens_party)
|
|
|
222 |
common_words = fdistance.most_common(6)
|
223 |
if len(common_words) < 5:
|
224 |
word_Lst = [word for word, _ in common_words]
|
|
|
226 |
word_Lst = [common_words[x][0] for x in range(5)]
|
227 |
|
228 |
if not word_Lst:
|
229 |
+
return None
|
230 |
|
231 |
+
fig, ax = plt.subplots(figsize=(10, 5)) # Explicitly create figure and axes
|
232 |
+
ax.set_title('Dispersion Plot')
|
233 |
+
moby.dispersion_plot(word_Lst, ax=ax) # Pass the axes object
|
234 |
+
fig.tight_layout() # Use fig.tight_layout()
|
235 |
buf = BytesIO()
|
236 |
+
# Handle potential apply_aspect error for dispersion plot too
|
237 |
+
try:
|
238 |
+
fig.savefig(buf, format='png', bbox_inches='tight')
|
239 |
+
except AttributeError as ae:
|
240 |
+
if "apply_aspect" in str(ae):
|
241 |
+
print(f"Warning: bbox_inches='tight' failed for Dispersion Plot ({ae}), saving without it.")
|
242 |
+
buf.seek(0)
|
243 |
+
buf = BytesIO()
|
244 |
+
fig.savefig(buf, format='png')
|
245 |
+
else:
|
246 |
+
raise
|
247 |
buf.seek(0)
|
248 |
img = Image.open(buf)
|
249 |
+
plt.close(fig) # Close the specific figure
|
250 |
return img
|
251 |
except Exception as e:
|
252 |
print(f"Dispersion plot error: {e}")
|
253 |
traceback.print_exc()
|
254 |
+
plt.close('all') # Aggressive close on error
|
255 |
return None
|
256 |
|
257 |
+
|
258 |
def word_cloud_generator(parsed_text_name, text_Party):
|
259 |
+
"""Generates the word cloud image."""
|
260 |
+
buf = None # Initialize buffer
|
261 |
try:
|
|
|
262 |
filename_lower = ""
|
263 |
if hasattr(parsed_text_name, 'name') and parsed_text_name.name:
|
264 |
filename_lower = parsed_text_name.name.lower()
|
265 |
elif isinstance(parsed_text_name, str):
|
266 |
+
filename_lower = parsed_text_name.lower()
|
267 |
|
268 |
mask_path = None
|
269 |
if 'bjp' in filename_lower:
|
|
|
273 |
elif 'aap' in filename_lower:
|
274 |
mask_path = 'aapMain2.jpg'
|
275 |
|
|
|
276 |
if text_Party.strip() == "":
|
277 |
+
raise ValueError("Text for word cloud is empty")
|
278 |
|
279 |
+
# Generate word cloud object
|
280 |
if mask_path and os.path.exists(mask_path):
|
281 |
orgImg = Image.open(mask_path)
|
|
|
282 |
if orgImg.mode != 'RGB':
|
283 |
orgImg = orgImg.convert('RGB')
|
284 |
mask = np.array(orgImg)
|
285 |
+
wordcloud = WordCloud(max_words=3000, mask=mask, background_color='white', mode='RGBA').generate(text_Party)
|
286 |
else:
|
287 |
+
wordcloud = WordCloud(max_words=2000, background_color='white', mode='RGBA').generate(text_Party)
|
288 |
+
|
289 |
+
# --- Key Fix: Explicitly manage figure and axes for word cloud ---
|
290 |
+
fig, ax = plt.subplots(figsize=(8, 6)) # Create new figure and axes
|
291 |
+
ax.imshow(wordcloud, interpolation='bilinear')
|
292 |
+
ax.axis("off")
|
293 |
+
fig.tight_layout(pad=0) # Remove padding
|
294 |
|
|
|
|
|
|
|
|
|
295 |
buf = BytesIO()
|
296 |
+
# Handle potential apply_aspect error for word cloud too
|
297 |
+
try:
|
298 |
+
fig.savefig(buf, format='png', bbox_inches='tight', dpi=300, facecolor='white') # Added dpi and facecolor
|
299 |
+
except AttributeError as ae:
|
300 |
+
if "apply_aspect" in str(ae):
|
301 |
+
print(f"Warning: bbox_inches='tight' failed for Word Cloud ({ae}), saving without it.")
|
302 |
+
buf.seek(0)
|
303 |
+
buf = BytesIO()
|
304 |
+
fig.savefig(buf, format='png', dpi=300, facecolor='white')
|
305 |
+
else:
|
306 |
+
raise
|
307 |
buf.seek(0)
|
308 |
img = Image.open(buf)
|
309 |
+
plt.close(fig) # Close the specific figure
|
310 |
return img
|
311 |
+
|
312 |
except Exception as e:
|
313 |
print(f"Word cloud error: {e}")
|
314 |
+
if buf:
|
315 |
+
buf.close() # Ensure buffer is closed on error
|
316 |
traceback.print_exc()
|
317 |
+
plt.close('all') # Aggressive close on error
|
318 |
return None # Return None on error
|
319 |
|
320 |
+
|
321 |
# Initial design for concordance based search
|
322 |
def get_all_phases_containing_tar_wrd(target_word, tar_passage, left_margin=10, right_margin=10, numLins=4):
|
323 |
"""
|