speechtranslate / app.py
ghostai1's picture
Update app.py
c0f9fa0 verified
raw
history blame
3.3 kB
# Text β†’ Translate β†’ Speech | CPU-only Hugging-Face Space
import tempfile
from pathlib import Path
import torch
import TTS.utils.radam as tts_radam
from transformers import pipeline
from TTS.api import TTS
import gradio as gr
# ─────────────────────────────
# 1. Allow-list Coqui’s custom RAdam class (Torch β‰₯2.6)
# ─────────────────────────────
torch.serialization.add_safe_globals({"TTS.utils.radam.RAdam": tts_radam.RAdam})
# ─────────────────────────────
# 2. Translation pipelines
# ─────────────────────────────
PIPE_EN_ES = pipeline("translation", model="Helsinki-NLP/opus-mt-en-es", device=-1)
PIPE_ES_EN = pipeline("translation", model="Helsinki-NLP/opus-mt-es-en", device=-1)
# ─────────────────────────────
# 3. TTS models (Coqui TTS)
# ─────────────────────────────
TTS_EN = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False)
TTS_ES = TTS(model_name="tts_models/es/mai/tacotron2-DDC", progress_bar=False)
# ─────────────────────────────
# 4. Helper: synthesize WAV to temp file
# ─────────────────────────────
def synthesize(tts_model: TTS, text: str) -> str:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
wav_path = Path(tmp.name)
tts_model.tts_to_file(text=text, file_path=wav_path)
return str(wav_path)
# ─────────────────────────────
# 5. Core translate-and-speak function
# ─────────────────────────────
def translate_and_speak(txt: str, target_lang: str):
if not txt.strip():
return "", None
if target_lang == "Spanish":
translated = PIPE_EN_ES(txt)[0]["translation_text"]
audio_file = synthesize(TTS_ES, translated)
else: # English
translated = PIPE_ES_EN(txt)[0]["translation_text"]
audio_file = synthesize(TTS_EN, translated)
return translated, audio_file
# ─────────────────────────────
# 6. Gradio UI
# ─────────────────────────────
with gr.Blocks(title="Translator & TTS") as demo:
gr.Markdown(
"# πŸŒπŸ’¬ Text β†’ Translate β†’ Speech\n"
"Type a sentence, choose target language, and hear it spoken."
)
text_in = gr.Textbox(label="Sentence (English or Spanish)", lines=2)
lang = gr.Radio(["Spanish", "English"], value="Spanish", label="Translate to")
run_btn = gr.Button("Translate & Speak", variant="primary")
text_out = gr.Textbox(label="Translated text", interactive=False)
wav_out = gr.Audio(label="Speech output", type="filepath")
run_btn.click(translate_and_speak, [text_in, lang], [text_out, wav_out])
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0")