Sa-m commited on
Commit
140eb89
·
verified ·
1 Parent(s): 756209b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -37
app.py CHANGED
@@ -288,77 +288,107 @@ def fDistancePlot(text2Party):
288
  return safe_plot(plot_func)
289
 
290
  def DispersionPlot(textParty):
291
- """Generates the word dispersion plot."""
292
- buf = None # Initialize buffer
 
 
 
293
  try:
294
- word_tokens_party = word_tokenize(textParty)
295
- print(f"Debug DispersionPlot: Total tokens: {len(word_tokens_party)}") # Debug print
296
  if not word_tokens_party:
297
  print("Warning: No tokens found for dispersion plot.")
298
  return None
299
 
300
- moby = Text(word_tokens_party)
301
  fdistance = FreqDist(word_tokens_party)
302
- print(f"Debug DispersionPlot: FreqDist sample: {list(fdistance.most_common(10))}") # Debug print
303
 
304
  # --- Improved word selection logic ---
305
- # Get common words, handle potential IndexError if less than 5 unique words
306
- common_words_raw = fdistance.most_common(15) # Check a few more common words
307
- # Filter: length > 2, isalpha (to avoid punctuation/non-informative), not just digits
308
- common_words_filtered = [(word, freq) for word, freq in common_words_raw if len(word) > 2 and word.isalpha() and not word.isdigit()]
309
- print(f"Debug DispersionPlot: Filtered common words: {common_words_filtered}") # Debug print
 
 
310
 
311
  # Select top 5 from filtered list
312
- if len(common_words_filtered) < 5:
313
- word_Lst = [word for word, _ in common_words_filtered]
314
- else:
315
- word_Lst = [common_words_filtered[x][0] for x in range(5)]
316
-
317
- # Final check: Ensure words are present in the Text object (moby)
318
- final_word_list = [word for word in word_Lst if word in moby] # Check membership in the Text object
319
- print(f"Debug DispersionPlot: Final word list for plot: {final_word_list}") # Debug print
320
 
321
  if not final_word_list:
322
- print("Warning: No suitable words found for dispersion plot after filtering and checking membership.")
323
  # Create a simple plot indicating no data
324
  fig, ax = plt.subplots(figsize=(8, 3))
325
  ax.text(0.5, 0.5, "No suitable words found for dispersion plot", ha='center', va='center', transform=ax.transAxes)
326
  ax.set_xlim(0, 1)
327
  ax.set_ylim(0, 1)
328
- ax.axis('off') # Hide axes for the message
329
  fig.suptitle('Dispersion Plot')
330
  else:
331
- # --- Manage figure explicitly without passing 'ax' ---
332
- fig = plt.figure(figsize=(10, 5)) # Create figure explicitly
333
- plt.title('Dispersion Plot')
334
- # Call dispersion_plot with the verified word list
335
- moby.dispersion_plot(final_word_list)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
336
  plt.tight_layout()
337
 
338
  buf = BytesIO()
339
- # Handle potential apply_aspect error for dispersion plot
340
  try:
341
- fig.savefig(buf, format='png', bbox_inches='tight')
342
  except AttributeError as ae:
343
  if "apply_aspect" in str(ae):
344
- print(f"Warning: bbox_inches='tight' failed for Dispersion Plot ({ae}), saving without it.")
345
- buf.seek(0)
346
- buf = BytesIO() # Get a fresh buffer
347
- fig.savefig(buf, format='png')
348
  else:
349
- raise # Re-raise if it's a different AttributeError
350
  buf.seek(0)
351
  img = Image.open(buf)
352
- plt.close(fig) # Close the specific figure created
353
  return img
354
 
355
  except Exception as e:
356
  print(f"Dispersion plot error: {e}")
357
  if buf:
358
- buf.close() # Ensure buffer is closed on error
359
  traceback.print_exc()
360
- plt.close('all') # Aggressive close on error
361
- return None # Return None on error
 
362
 
363
  def word_cloud_generator(parsed_text_name, text_Party):
364
  """Generates the word cloud image."""
 
288
  return safe_plot(plot_func)
289
 
290
  def DispersionPlot(textParty):
291
+ """
292
+ Generates a dispersion plot using Matplotlib.
293
+ Shows the positions of the most common words along the text.
294
+ """
295
+ buf = None
296
  try:
297
+ word_tokens_party = word_tokenize(textParty.lower()) # Lowercase for matching
298
+ print(f"Debug DispersionPlot: Total tokens: {len(word_tokens_party)}")
299
  if not word_tokens_party:
300
  print("Warning: No tokens found for dispersion plot.")
301
  return None
302
 
 
303
  fdistance = FreqDist(word_tokens_party)
304
+ print(f"Debug DispersionPlot: FreqDist sample: {list(fdistance.most_common(10))}")
305
 
306
  # --- Improved word selection logic ---
307
+ common_words_raw = fdistance.most_common(15)
308
+ # Filter words: length > 2, alphabetic, not just digits
309
+ common_words_filtered = [
310
+ (word, freq) for word, freq in common_words_raw
311
+ if len(word) > 2 and word.isalpha() and not word.isdigit()
312
+ ]
313
+ print(f"Debug DispersionPlot: Filtered common words: {common_words_filtered}")
314
 
315
  # Select top 5 from filtered list
316
+ final_word_list = [word for word, _ in common_words_filtered[:5]]
317
+ print(f"Debug DispersionPlot: Final word list for plot: {final_word_list}")
 
 
 
 
 
 
318
 
319
  if not final_word_list:
320
+ print("Warning: No suitable words found for dispersion plot.")
321
  # Create a simple plot indicating no data
322
  fig, ax = plt.subplots(figsize=(8, 3))
323
  ax.text(0.5, 0.5, "No suitable words found for dispersion plot", ha='center', va='center', transform=ax.transAxes)
324
  ax.set_xlim(0, 1)
325
  ax.set_ylim(0, 1)
326
+ ax.axis('off')
327
  fig.suptitle('Dispersion Plot')
328
  else:
329
+ # --- Create the dispersion plot manually ---
330
+ fig, ax = plt.subplots(figsize=(12, 6))
331
+
332
+ # X-axis: position in the text (token index)
333
+ x = list(range(len(word_tokens_party)))
334
+
335
+ # Y-axis: will be offset for each word for visualization
336
+ # We'll plot a scatter point for each occurrence of the target words
337
+ colors = plt.cm.get_cmap('tab10', len(final_word_list))
338
+
339
+ for i, word in enumerate(final_word_list):
340
+ # Find all indices where the word occurs
341
+ offsets = [j for j, token in enumerate(word_tokens_party) if token == word]
342
+ y_positions = [i + 1] * len(offsets) # Offset y-position for each word
343
+ ax.scatter(offsets, y_positions, label=word, color=colors(i), alpha=0.7, s=30) # s is marker size
344
+
345
+ ax.set_xlabel("Position in Text (Token Index)")
346
+ ax.set_ylabel("Words")
347
+ ax.set_title("Dispersion Plot")
348
+
349
+ # Set y-ticks to correspond to the words
350
+ ax.set_yticks(range(1, len(final_word_list) + 1))
351
+ ax.set_yticklabels(final_word_list)
352
+
353
+ # Invert y-axis so the first word in the list is at the top
354
+ ax.invert_yaxis()
355
+
356
+ # Add grid for better readability
357
+ ax.grid(True, axis='x', linestyle='--', alpha=0.5)
358
+
359
+ # Add legend
360
+ # ax.legend(title="Words", bbox_to_anchor=(1.05, 1), loc='upper left') # Place legend outside plot
361
+ # Or, include legend inside if space allows and it's not too cluttered
362
+ # For simplicity inside the plot area (adjust if needed)
363
+ # ax.legend(title="Words")
364
+
365
  plt.tight_layout()
366
 
367
  buf = BytesIO()
368
+ # Handle potential apply_aspect error
369
  try:
370
+ fig.savefig(buf, format='png', bbox_inches='tight', dpi=150) # Added dpi for clarity
371
  except AttributeError as ae:
372
  if "apply_aspect" in str(ae):
373
+ print(f"Warning: bbox_inches='tight' failed for Dispersion Plot ({ae}), saving without it.")
374
+ buf.seek(0)
375
+ buf = BytesIO()
376
+ fig.savefig(buf, format='png', dpi=150)
377
  else:
378
+ raise
379
  buf.seek(0)
380
  img = Image.open(buf)
381
+ plt.close(fig)
382
  return img
383
 
384
  except Exception as e:
385
  print(f"Dispersion plot error: {e}")
386
  if buf:
387
+ buf.close()
388
  traceback.print_exc()
389
+ plt.close('all')
390
+ return None
391
+
392
 
393
  def word_cloud_generator(parsed_text_name, text_Party):
394
  """Generates the word cloud image."""