Spaces:

PuristanLabs1
/

VocalWeb

Running on Zero

App Files Files Community

VocalWeb / app.py

PuristanLabs1

Create app.py

183aa58 verified 6 months ago

raw

history blame

3.31 kB

	import gradio as gr
	import trafilatura
	import docling
	import torch
	import soundfile as sf
	import numpy as np
	from langdetect import detect
	from kokoro import KPipeline
	import re
	import json
	import nltk

	nltk.download("punkt")

	# Initialize KokoroTTS with default English
	kokoro_tts = KPipeline(lang_code='a') # 'a' is for American English

	# Supported TTS Languages
	SUPPORTED_TTS_LANGUAGES = {
	"en": "a", # English (default)
	"fr": "f", # French
	"hi": "h", # Hindi
	"it": "i", # Italian
	"pt": "p", # Brazilian Portuguese
	}

	### 1️⃣ Fetch and Extract Content
	def fetch_content(url):
	"""Fetch and extract text from a given URL (HTML or PDF)."""
	if url.endswith(".pdf") or "pdf" in url:
	text = docling.extract_text(url)
	else:
	downloaded = trafilatura.fetch_url(url)
	text = trafilatura.extract(downloaded, output_format="markdown", with_metadata=False)
	return text

	### 2️⃣ Cleaning Function
	def extract_and_clean_text(data):
	"""Removes citations, links, markdown elements, and unnecessary sections."""
	def clean_text(text):
	text = re.sub(r'\[\d+\]', '', text) # Remove citations like [2][4]
	text = re.sub(r'http[s]?://\S+', '', text) # Remove URLs
	text = re.sub(r'[*_`]', '', text) # Remove markdown formatting
	text = re.sub(r'\n\s*\n+', '\n\n', text).strip() # Remove excessive whitespace
	return text

	return clean_text(data)

	### 3️⃣ Language Detection
	def detect_language(text):
	"""Detects the language of extracted text."""
	try:
	lang = detect(text)
	return lang if lang in SUPPORTED_TTS_LANGUAGES else "en" # Default to English if not supported
	except:
	return "en" # Default to English if detection fails

	### 4️⃣ TTS Functionality (KokoroTTS)
	def generate_audio_kokoro(text, lang):
	"""Generate speech using KokoroTTS for supported languages."""
	lang_code = SUPPORTED_TTS_LANGUAGES.get(lang, "a") # Default to English
	generator = kokoro_tts(text, voice="af_bella", speed=1, split_pattern=r'\n+')

	# Combine audio segments into a single file
	audio_data = np.concatenate([audio for gs, ps, audio in generator])
	output_file = f"audio_{lang}.wav"
	sf.write(output_file, audio_data, 24000) # Save as WAV file
	return output_file

	### 5️⃣ Main Processing Function
	def process_url(url):
	"""Processes the URL, extracts text, detects language, and converts to audio."""
	content = fetch_content(url)
	cleaned_text = extract_and_clean_text(content)
	detected_lang = detect_language(cleaned_text)
	audio_file = generate_audio_kokoro(cleaned_text, detected_lang)

	return cleaned_text, detected_lang, audio_file

	### 6️⃣ Gradio Interface
	with gr.Blocks() as demo:
	gr.Markdown("# 🌍 Web-to-Audio Converter 🎙️")

	url_input = gr.Textbox(label="Enter URL", placeholder="https://example.com/article")
	process_button = gr.Button("Generate Audio")

	extracted_text = gr.Markdown(label="Extracted Content")
	detected_language = gr.Textbox(label="Detected Language")
	full_audio_output = gr.Audio(label="Generated Audio")

	process_button.click(process_url, inputs=[url_input], outputs=[extracted_text, detected_language, full_audio_output])

	demo.launch()