VocalWeb / app.py
PuristanLabs1's picture
Create app.py
183aa58 verified
raw
history blame
3.31 kB
import gradio as gr
import trafilatura
import docling
import torch
import soundfile as sf
import numpy as np
from langdetect import detect
from kokoro import KPipeline
import re
import json
import nltk
nltk.download("punkt")
# Initialize KokoroTTS with default English
kokoro_tts = KPipeline(lang_code='a') # 'a' is for American English
# Supported TTS Languages
SUPPORTED_TTS_LANGUAGES = {
"en": "a", # English (default)
"fr": "f", # French
"hi": "h", # Hindi
"it": "i", # Italian
"pt": "p", # Brazilian Portuguese
}
### 1️⃣ Fetch and Extract Content
def fetch_content(url):
"""Fetch and extract text from a given URL (HTML or PDF)."""
if url.endswith(".pdf") or "pdf" in url:
text = docling.extract_text(url)
else:
downloaded = trafilatura.fetch_url(url)
text = trafilatura.extract(downloaded, output_format="markdown", with_metadata=False)
return text
### 2️⃣ Cleaning Function
def extract_and_clean_text(data):
"""Removes citations, links, markdown elements, and unnecessary sections."""
def clean_text(text):
text = re.sub(r'\[\d+\]', '', text) # Remove citations like [2][4]
text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs
text = re.sub(r'[*_`]', '', text) # Remove markdown formatting
text = re.sub(r'\n\s*\n+', '\n\n', text).strip() # Remove excessive whitespace
return text
return clean_text(data)
### 3️⃣ Language Detection
def detect_language(text):
"""Detects the language of extracted text."""
try:
lang = detect(text)
return lang if lang in SUPPORTED_TTS_LANGUAGES else "en" # Default to English if not supported
except:
return "en" # Default to English if detection fails
### 4️⃣ TTS Functionality (KokoroTTS)
def generate_audio_kokoro(text, lang):
"""Generate speech using KokoroTTS for supported languages."""
lang_code = SUPPORTED_TTS_LANGUAGES.get(lang, "a") # Default to English
generator = kokoro_tts(text, voice="af_bella", speed=1, split_pattern=r'\n+')
# Combine audio segments into a single file
audio_data = np.concatenate([audio for gs, ps, audio in generator])
output_file = f"audio_{lang}.wav"
sf.write(output_file, audio_data, 24000) # Save as WAV file
return output_file
### 5️⃣ Main Processing Function
def process_url(url):
"""Processes the URL, extracts text, detects language, and converts to audio."""
content = fetch_content(url)
cleaned_text = extract_and_clean_text(content)
detected_lang = detect_language(cleaned_text)
audio_file = generate_audio_kokoro(cleaned_text, detected_lang)
return cleaned_text, detected_lang, audio_file
### 6️⃣ Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# 🌍 Web-to-Audio Converter 🎙️")
url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com/article")
process_button = gr.Button("Generate Audio")
extracted_text = gr.Markdown(label="Extracted Content")
detected_language = gr.Textbox(label="Detected Language")
full_audio_output = gr.Audio(label="Generated Audio")
process_button.click(process_url, inputs=[url_input], outputs=[extracted_text, detected_language, full_audio_output])
demo.launch()