Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -15,6 +15,10 @@ import nltk
|
|
| 15 |
import stanza
|
| 16 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
| 17 |
from nltk.tokenize import sent_tokenize
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
nltk.download("punkt")
|
| 20 |
nltk.download("punkt_tab")
|
|
@@ -171,6 +175,29 @@ def extract_entities_with_stanza(text, chunk_size=1000):
|
|
| 171 |
return formatted_entities
|
| 172 |
return entities
|
| 173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 174 |
### 4️⃣ TTS Functionality (KokoroTTS)
|
| 175 |
@spaces.GPU(duration=1000)
|
| 176 |
def generate_audio_kokoro(text, lang, selected_voice):
|
|
@@ -267,7 +294,8 @@ with gr.Blocks() as demo:
|
|
| 267 |
detected_lang = gr.Textbox(label="Detected Language", visible=False)
|
| 268 |
summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
|
| 269 |
full_audio_output = gr.Audio(label="Generated Audio", visible=True)
|
| 270 |
-
ner_output = gr.Textbox(label="Extracted Entities", visible=
|
|
|
|
| 271 |
|
| 272 |
# Step 1: Fetch Text & Detect Language First
|
| 273 |
process_text_button.click(
|
|
@@ -276,6 +304,14 @@ with gr.Blocks() as demo:
|
|
| 276 |
|
| 277 |
outputs=[extracted_text, metadata_output, detected_lang, process_summary_button, process_audio_button,process_ner_button, extracted_text, metadata_output]
|
| 278 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
process_summary_button.click(hierarchical_summarization, inputs=[extracted_text], outputs=[summary_output])
|
| 281 |
|
|
|
|
| 15 |
import stanza
|
| 16 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
| 17 |
from nltk.tokenize import sent_tokenize
|
| 18 |
+
from wordcloud import WordCloud
|
| 19 |
+
import matplotlib.pyplot as plt
|
| 20 |
+
from PIL import Image
|
| 21 |
+
import io
|
| 22 |
|
| 23 |
nltk.download("punkt")
|
| 24 |
nltk.download("punkt_tab")
|
|
|
|
| 175 |
return formatted_entities
|
| 176 |
return entities
|
| 177 |
|
| 178 |
+
def generate_wordcloud(text):
|
| 179 |
+
"""Generate a word cloud from the given text."""
|
| 180 |
+
if not text:
|
| 181 |
+
return None
|
| 182 |
+
|
| 183 |
+
# Generate word cloud
|
| 184 |
+
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
|
| 185 |
+
|
| 186 |
+
# Convert word cloud to PIL image
|
| 187 |
+
plt.figure(figsize=(10, 5))
|
| 188 |
+
plt.imshow(wordcloud, interpolation='bilinear')
|
| 189 |
+
plt.axis('off')
|
| 190 |
+
|
| 191 |
+
# Save the plot to a BytesIO object
|
| 192 |
+
buf = io.BytesIO()
|
| 193 |
+
plt.savefig(buf, format='png', bbox_inches='tight', pad_inches=0)
|
| 194 |
+
buf.seek(0)
|
| 195 |
+
plt.close()
|
| 196 |
+
|
| 197 |
+
# Convert to PIL image
|
| 198 |
+
image = Image.open(buf)
|
| 199 |
+
return image
|
| 200 |
+
|
| 201 |
### 4️⃣ TTS Functionality (KokoroTTS)
|
| 202 |
@spaces.GPU(duration=1000)
|
| 203 |
def generate_audio_kokoro(text, lang, selected_voice):
|
|
|
|
| 294 |
detected_lang = gr.Textbox(label="Detected Language", visible=False)
|
| 295 |
summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
|
| 296 |
full_audio_output = gr.Audio(label="Generated Audio", visible=True)
|
| 297 |
+
ner_output = gr.Textbox(label="Extracted Entities", visible=False, interactive=False)
|
| 298 |
+
wordcloud_output = gr.Image(label="Word Cloud", visible=False)
|
| 299 |
|
| 300 |
# Step 1: Fetch Text & Detect Language First
|
| 301 |
process_text_button.click(
|
|
|
|
| 304 |
|
| 305 |
outputs=[extracted_text, metadata_output, detected_lang, process_summary_button, process_audio_button,process_ner_button, extracted_text, metadata_output]
|
| 306 |
)
|
| 307 |
+
|
| 308 |
+
# Automatically generate word cloud when extracted_text changes
|
| 309 |
+
extracted_text.change(
|
| 310 |
+
generate_wordcloud,
|
| 311 |
+
inputs=[extracted_text],
|
| 312 |
+
outputs=[wordcloud_output],
|
| 313 |
+
show_progress=False
|
| 314 |
+
)
|
| 315 |
|
| 316 |
process_summary_button.click(hierarchical_summarization, inputs=[extracted_text], outputs=[summary_output])
|
| 317 |
|