Mahwishsada commited on
Commit
694474b
Β·
verified Β·
1 Parent(s): 04d8ef1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -29
app.py CHANGED
@@ -1,46 +1,61 @@
1
  import gradio as gr
2
  import whisper
3
- from transformers import MarianMTModel, MarianTokenizer
 
4
  from TTS.api import TTS
5
 
6
- # Load models
7
- print("Loading Whisper (for Hindi STT)...")
8
- whisper_model = whisper.load_model("small")
9
 
10
- print("Loading MarianMT (for Hindi to English)...")
11
- translator_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-hi-en")
12
- translator_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-hi-en")
 
13
 
14
- print("Loading TTS model (Tacotron2 + HiFi-GAN)...")
15
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
16
 
17
- # Pipeline function
18
- def hindi_speech_to_english_audio(audio):
19
- # Step 1: Hindi Speech to Text
20
- result = whisper_model.transcribe(audio, language="hi")
21
- hindi_text = result["text"]
22
 
23
- # Step 2: Hindi to English Translation
24
- inputs = translator_tokenizer(hindi_text, return_tensors="pt", padding=True)
25
- translated_tokens = translator_model.generate(**inputs)
26
- english_text = translator_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
 
 
27
 
28
- # Step 3: English Text to Speech
29
- tts.tts_to_file(text=english_text, file_path="output.wav")
 
 
 
 
 
30
 
31
- return english_text, "output.wav"
 
32
 
33
- # Gradio Interface
 
 
 
 
 
 
34
  interface = gr.Interface(
35
- fn=hindi_speech_to_english_audio,
36
- inputs=gr.Audio(sources=["microphone"], type="filepath", label="πŸŽ™οΈ Speak in Hindi"),
37
  outputs=[
38
- gr.Textbox(label="πŸ”€ Translated English Text"),
39
- gr.Audio(type="filepath", label="πŸ—£οΈ English Audio Output")
 
40
  ],
41
- title="Hindi Speech to English Audio Translator",
42
- description="🎧 Speak in Hindi and hear it back in English!",
43
  )
44
 
45
- # Run app
46
- interface.launch()
 
1
  import gradio as gr
2
  import whisper
3
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
+ import torch
5
  from TTS.api import TTS
6
 
7
+ # Load Whisper model (better accuracy with 'medium')
8
+ asr_model = whisper.load_model("medium")
 
9
 
10
+ # Load NLLB Hindi to English translator
11
+ translation_model_name = "facebook/nllb-200-distilled-600M"
12
+ translator_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
13
+ translator_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name)
14
 
15
+ # Load English TTS model
16
  tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
17
 
18
+ # Utility: Get Hindi text from audio
19
+ def speech_to_text(audio_path):
20
+ result = asr_model.transcribe(audio_path, language="hi")
21
+ return result["text"]
 
22
 
23
+ # Utility: Translate Hindi to English
24
+ def translate_hi_to_en(text_hi):
25
+ inputs = translator_tokenizer(text_hi, return_tensors="pt")
26
+ translated_tokens = translator_model.generate(**inputs, forced_bos_token_id=translator_tokenizer.lang_code_to_id["eng_Latn"])
27
+ translated_text = translator_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
28
+ return translated_text
29
 
30
+ # Main app logic
31
+ def translate_audio(audio):
32
+ if audio is None:
33
+ return "No audio input", "", None
34
+
35
+ # Step 1: Convert Hindi speech to Hindi text
36
+ hindi_text = speech_to_text(audio)
37
 
38
+ # Step 2: Translate to English
39
+ english_text = translate_hi_to_en(hindi_text)
40
 
41
+ # Step 3: Generate English speech
42
+ english_audio_path = "output.wav"
43
+ tts.tts_to_file(text=english_text, file_path=english_audio_path)
44
+
45
+ return hindi_text, english_text, english_audio_path
46
+
47
+ # Gradio UI
48
  interface = gr.Interface(
49
+ fn=translate_audio,
50
+ inputs=gr.Audio(source="microphone", type="filepath"),
51
  outputs=[
52
+ gr.Textbox(label="Hindi Transcript"),
53
+ gr.Textbox(label="English Translation"),
54
+ gr.Audio(label="English Speech")
55
  ],
56
+ title="Hindi to English Speech Translator",
57
+ description="🎀 Speak in Hindi β†’ πŸ“„ Translated English Text β†’ πŸ”Š Spoken English Output"
58
  )
59
 
60
+ if __name__ == "__main__":
61
+ interface.launch()