PuristanLabs1 commited on
Commit
3a2ae2d
·
verified ·
1 Parent(s): 4634485

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -1
app.py CHANGED
@@ -15,6 +15,10 @@ import nltk
15
  import stanza
16
  from transformers import BartForConditionalGeneration, BartTokenizer
17
  from nltk.tokenize import sent_tokenize
 
 
 
 
18
 
19
  nltk.download("punkt")
20
  nltk.download("punkt_tab")
@@ -171,6 +175,29 @@ def extract_entities_with_stanza(text, chunk_size=1000):
171
  return formatted_entities
172
  return entities
173
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  ### 4️⃣ TTS Functionality (KokoroTTS)
175
  @spaces.GPU(duration=1000)
176
  def generate_audio_kokoro(text, lang, selected_voice):
@@ -267,7 +294,8 @@ with gr.Blocks() as demo:
267
  detected_lang = gr.Textbox(label="Detected Language", visible=False)
268
  summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
269
  full_audio_output = gr.Audio(label="Generated Audio", visible=True)
270
- ner_output = gr.Textbox(label="Extracted Entities", visible=True, interactive=False) # ✅ New output for NER
 
271
 
272
  # Step 1: Fetch Text & Detect Language First
273
  process_text_button.click(
@@ -276,6 +304,14 @@ with gr.Blocks() as demo:
276
 
277
  outputs=[extracted_text, metadata_output, detected_lang, process_summary_button, process_audio_button,process_ner_button, extracted_text, metadata_output]
278
  )
 
 
 
 
 
 
 
 
279
 
280
  process_summary_button.click(hierarchical_summarization, inputs=[extracted_text], outputs=[summary_output])
281
 
 
15
  import stanza
16
  from transformers import BartForConditionalGeneration, BartTokenizer
17
  from nltk.tokenize import sent_tokenize
18
+ from wordcloud import WordCloud
19
+ import matplotlib.pyplot as plt
20
+ from PIL import Image
21
+ import io
22
 
23
  nltk.download("punkt")
24
  nltk.download("punkt_tab")
 
175
  return formatted_entities
176
  return entities
177
 
178
+ def generate_wordcloud(text):
179
+ """Generate a word cloud from the given text."""
180
+ if not text:
181
+ return None
182
+
183
+ # Generate word cloud
184
+ wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
185
+
186
+ # Convert word cloud to PIL image
187
+ plt.figure(figsize=(10, 5))
188
+ plt.imshow(wordcloud, interpolation='bilinear')
189
+ plt.axis('off')
190
+
191
+ # Save the plot to a BytesIO object
192
+ buf = io.BytesIO()
193
+ plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
194
+ buf.seek(0)
195
+ plt.close()
196
+
197
+ # Convert to PIL image
198
+ image = Image.open(buf)
199
+ return image
200
+
201
  ### 4️⃣ TTS Functionality (KokoroTTS)
202
  @spaces.GPU(duration=1000)
203
  def generate_audio_kokoro(text, lang, selected_voice):
 
294
  detected_lang = gr.Textbox(label="Detected Language", visible=False)
295
  summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
296
  full_audio_output = gr.Audio(label="Generated Audio", visible=True)
297
+ ner_output = gr.Textbox(label="Extracted Entities", visible=False, interactive=False)
298
+ wordcloud_output = gr.Image(label="Word Cloud", visible=False)
299
 
300
  # Step 1: Fetch Text & Detect Language First
301
  process_text_button.click(
 
304
 
305
  outputs=[extracted_text, metadata_output, detected_lang, process_summary_button, process_audio_button,process_ner_button, extracted_text, metadata_output]
306
  )
307
+
308
+ # Automatically generate word cloud when extracted_text changes
309
+ extracted_text.change(
310
+ generate_wordcloud,
311
+ inputs=[extracted_text],
312
+ outputs=[wordcloud_output],
313
+ show_progress=False
314
+ )
315
 
316
  process_summary_button.click(hierarchical_summarization, inputs=[extracted_text], outputs=[summary_output])
317