Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -12,12 +12,17 @@ from kokoro import KPipeline
|
|
12 |
import re
|
13 |
import json
|
14 |
import nltk
|
|
|
15 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
16 |
from nltk.tokenize import sent_tokenize
|
17 |
|
18 |
nltk.download("punkt")
|
19 |
nltk.download("punkt_tab")
|
20 |
|
|
|
|
|
|
|
|
|
21 |
# Initialize KokoroTTS with default English
|
22 |
kokoro_tts = KPipeline(lang_code='a', device="cpu") # Load initially on CPU
|
23 |
|
@@ -136,6 +141,36 @@ def detect_language(text):
|
|
136 |
except:
|
137 |
return "en" # Default to English if detection fails
|
138 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
### 4️⃣ TTS Functionality (KokoroTTS)
|
140 |
@spaces.GPU(duration=1000)
|
141 |
def generate_audio_kokoro(text, lang, selected_voice):
|
@@ -221,6 +256,7 @@ with gr.Blocks() as demo:
|
|
221 |
process_text_button = gr.Button("Fetch Text & Detect Language")
|
222 |
process_summary_button = gr.Button("Summarize Text", visible=False)
|
223 |
process_audio_button = gr.Button("Generate Audio", visible=False)
|
|
|
224 |
|
225 |
# Layout: Two adjacent columns (Text and Metadata)
|
226 |
with gr.Row():
|
@@ -233,6 +269,7 @@ with gr.Blocks() as demo:
|
|
233 |
detected_lang = gr.Textbox(label="Detected Language", visible=False)
|
234 |
summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
|
235 |
full_audio_output = gr.Audio(label="Generated Audio", visible=True)
|
|
|
236 |
|
237 |
# Step 1: Fetch Text & Detect Language First
|
238 |
process_text_button.click(
|
@@ -254,6 +291,12 @@ with gr.Blocks() as demo:
|
|
254 |
outputs=[full_audio_output]
|
255 |
)
|
256 |
|
|
|
|
|
|
|
|
|
|
|
|
|
257 |
#process_button.click(process_url, inputs=[url_input], outputs=[extracted_text, detected_language, full_audio_output])
|
258 |
|
259 |
demo.launch()
|
|
|
12 |
import re
|
13 |
import json
|
14 |
import nltk
|
15 |
+
import stanza
|
16 |
from transformers import BartForConditionalGeneration, BartTokenizer
|
17 |
from nltk.tokenize import sent_tokenize
|
18 |
|
19 |
nltk.download("punkt")
|
20 |
nltk.download("punkt_tab")
|
21 |
|
22 |
+
# Load Stanza's NER model
|
23 |
+
stanza.download("en") # Load English pipeline (can be changed for other languages)
|
24 |
+
nlp = stanza.Pipeline("en", processors="tokenize,ner", use_gpu=False) # Disable GPU for Hugging Face Spaces
|
25 |
+
|
26 |
# Initialize KokoroTTS with default English
|
27 |
kokoro_tts = KPipeline(lang_code='a', device="cpu") # Load initially on CPU
|
28 |
|
|
|
141 |
except:
|
142 |
return "en" # Default to English if detection fails
|
143 |
|
144 |
+
### 2️⃣ Named Entity Recognition (NER) Using Stanza
|
145 |
+
def extract_entities_with_stanza(text, chunk_size=1000):
|
146 |
+
"""Splits text into chunks, runs Stanza NER, and combines results."""
|
147 |
+
sentences = sent_tokenize(text)
|
148 |
+
chunks = []
|
149 |
+
current_chunk = []
|
150 |
+
current_length = 0
|
151 |
+
|
152 |
+
# Split text into manageable chunks
|
153 |
+
for sentence in sentences:
|
154 |
+
if current_length + len(sentence) > chunk_size:
|
155 |
+
chunks.append(" ".join(current_chunk))
|
156 |
+
current_chunk = [sentence]
|
157 |
+
current_length = len(sentence)
|
158 |
+
else:
|
159 |
+
current_chunk.append(sentence)
|
160 |
+
current_length += len(sentence)
|
161 |
+
|
162 |
+
if current_chunk:
|
163 |
+
chunks.append(" ".join(current_chunk))
|
164 |
+
|
165 |
+
# Process each chunk separately with Stanza
|
166 |
+
entities = []
|
167 |
+
for chunk in chunks:
|
168 |
+
doc = nlp(chunk)
|
169 |
+
for ent in doc.ents:
|
170 |
+
entities.append({"text": ent.text, "type": ent.type})
|
171 |
+
|
172 |
+
return entities
|
173 |
+
|
174 |
### 4️⃣ TTS Functionality (KokoroTTS)
|
175 |
@spaces.GPU(duration=1000)
|
176 |
def generate_audio_kokoro(text, lang, selected_voice):
|
|
|
256 |
process_text_button = gr.Button("Fetch Text & Detect Language")
|
257 |
process_summary_button = gr.Button("Summarize Text", visible=False)
|
258 |
process_audio_button = gr.Button("Generate Audio", visible=False)
|
259 |
+
process_ner_button = gr.Button("Extract Entities", visible=True) # ✅ New button for NER
|
260 |
|
261 |
# Layout: Two adjacent columns (Text and Metadata)
|
262 |
with gr.Row():
|
|
|
269 |
detected_lang = gr.Textbox(label="Detected Language", visible=False)
|
270 |
summary_output = gr.Textbox(label="Summary", visible=True, interactive=False)
|
271 |
full_audio_output = gr.Audio(label="Generated Audio", visible=True)
|
272 |
+
ner_output = gr.JSON(label="Extracted Entities", visible=True) # ✅ New output for NER
|
273 |
|
274 |
# Step 1: Fetch Text & Detect Language First
|
275 |
process_text_button.click(
|
|
|
291 |
outputs=[full_audio_output]
|
292 |
)
|
293 |
|
294 |
+
process_ner_button.click(
|
295 |
+
extract_entities_with_stanza,
|
296 |
+
inputs=[extracted_text],
|
297 |
+
outputs=[ner_output]
|
298 |
+
)
|
299 |
+
|
300 |
#process_button.click(process_url, inputs=[url_input], outputs=[extracted_text, detected_language, full_audio_output])
|
301 |
|
302 |
demo.launch()
|