import gradio as gr from faster_whisper import WhisperModel from transformers import MarianMTModel, MarianTokenizer from TTS.api import TTS import os # Load Whisper for Hindi ASR whisper_model = WhisperModel("medium", compute_type="float32", download_root="./models") # Load Helsinki-NLP Hindi-to-English Translator translator_name = "Helsinki-NLP/opus-mt-hi-en" translator_tokenizer = MarianTokenizer.from_pretrained(translator_name) translator_model = MarianMTModel.from_pretrained(translator_name) # Load TTS model for English voice tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=True, gpu=False) def translate_hi_to_en(text): inputs = translator_tokenizer(text, return_tensors="pt", padding=True, truncation=True) translated = translator_model.generate(**inputs) return translator_tokenizer.decode(translated[0], skip_special_tokens=True) def transcribe_and_translate(audio_path): # Step 1: Transcribe Hindi audio segments, _ = whisper_model.transcribe(audio_path, language="hi") hindi_text = " ".join([segment.text for segment in segments]) # Step 2: Translate to English english_text = translate_hi_to_en(hindi_text) # Step 3: Convert English text to speech output_audio_path = "output.wav" tts.tts_to_file(text=english_text, file_path=output_audio_path) return english_text, output_audio_path # Gradio Interface iface = gr.Interface( fn=transcribe_and_translate, inputs=gr.Audio(type="filepath", label="Speak in Hindi"), outputs=[ gr.Textbox(label="Translated English Text"), gr.Audio(type="filepath", label="English Speech") ], title="Hindi to English Speech Translator", description="🎤 Speak Hindi → 📘 Translate to English → 🔊 English Speech" ) iface.launch()