Spaces:
Sleeping
Sleeping
import gradio as gr | |
from faster_whisper import WhisperModel | |
from transformers import MarianMTModel, MarianTokenizer | |
from TTS.api import TTS | |
import os | |
# Load Whisper for Hindi ASR | |
whisper_model = WhisperModel("medium", compute_type="float32", download_root="./models") | |
# Load Helsinki-NLP Hindi-to-English Translator | |
translator_name = "Helsinki-NLP/opus-mt-hi-en" | |
translator_tokenizer = MarianTokenizer.from_pretrained(translator_name) | |
translator_model = MarianMTModel.from_pretrained(translator_name) | |
# Load TTS model for English voice | |
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=True, gpu=False) | |
def translate_hi_to_en(text): | |
inputs = translator_tokenizer(text, return_tensors="pt", padding=True, truncation=True) | |
translated = translator_model.generate(**inputs) | |
return translator_tokenizer.decode(translated[0], skip_special_tokens=True) | |
def transcribe_and_translate(audio_path): | |
# Step 1: Transcribe Hindi audio | |
segments, _ = whisper_model.transcribe(audio_path, language="hi") | |
hindi_text = " ".join([segment.text for segment in segments]) | |
# Step 2: Translate to English | |
english_text = translate_hi_to_en(hindi_text) | |
# Step 3: Convert English text to speech | |
output_audio_path = "output.wav" | |
tts.tts_to_file(text=english_text, file_path=output_audio_path) | |
return english_text, output_audio_path | |
# Gradio Interface | |
iface = gr.Interface( | |
fn=transcribe_and_translate, | |
inputs=gr.Audio(type="filepath", label="Speak in Hindi"), | |
outputs=[ | |
gr.Textbox(label="Translated English Text"), | |
gr.Audio(type="filepath", label="English Speech") | |
], | |
title="Hindi to English Speech Translator", | |
description="π€ Speak Hindi β π Translate to English β π English Speech" | |
) | |
iface.launch() | |