Mahwishsada commited on
Commit
72e711f
·
verified ·
1 Parent(s): 777ac53

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -26
app.py CHANGED
@@ -1,35 +1,46 @@
1
  import gradio as gr
2
- import torch
3
- from transformers import pipeline
 
4
 
5
  # Load models
6
- speech_to_text = pipeline("automatic-speech-recognition", model="openai/whisper-small")
7
- translation = pipeline("translation", model="Helsinki-NLP/opus-mt-hi-en")
8
- text_to_speech = pipeline("text-to-speech", model="espnet/kan-bayashi_ljspeech_vits")
9
-
10
- def translate_speech(audio):
11
- # Step 1: Hindi speech to Hindi text
12
- hindi_text = speech_to_text(audio)["text"]
13
-
14
- # Step 2: Hindi text to English translation
15
- english_text = translation(hindi_text)[0]["translation_text"]
16
-
17
- # Step 3: English text to speech
18
- english_speech = text_to_speech(english_text)["audio"]
19
-
20
- return hindi_text, english_text, (48000, english_speech)
 
 
 
 
 
 
 
 
 
 
21
 
22
  # Gradio Interface
23
- demo = gr.Interface(
24
- fn=translate_speech,
25
- inputs=gr.Audio(source="microphone", type="filepath"),
26
  outputs=[
27
- gr.Textbox(label="Hindi Text"),
28
- gr.Textbox(label="English Translation"),
29
- gr.Audio(label="English Audio")
30
  ],
31
- title="Hindi to English Speech Translator",
32
- description="🎤 Speak in Hindi and get the English translation spoken aloud"
33
  )
34
 
35
- demo.launch()
 
 
1
  import gradio as gr
2
+ import whisper
3
+ from transformers import MarianMTModel, MarianTokenizer
4
+ from TTS.api import TTS
5
 
6
  # Load models
7
+ print("Loading Whisper (for Hindi STT)...")
8
+ whisper_model = whisper.load_model("small")
9
+
10
+ print("Loading MarianMT (for Hindi to English)...")
11
+ translator_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-hi-en")
12
+ translator_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-hi-en")
13
+
14
+ print("Loading TTS model (Tacotron2 + HiFi-GAN)...")
15
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
16
+
17
+ # Pipeline function
18
+ def hindi_speech_to_english_audio(audio):
19
+ # Step 1: Hindi Speech to Text
20
+ result = whisper_model.transcribe(audio, language="hi")
21
+ hindi_text = result["text"]
22
+
23
+ # Step 2: Hindi to English Translation
24
+ inputs = translator_tokenizer(hindi_text, return_tensors="pt", padding=True)
25
+ translated_tokens = translator_model.generate(**inputs)
26
+ english_text = translator_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
27
+
28
+ # Step 3: English Text to Speech
29
+ tts.tts_to_file(text=english_text, file_path="output.wav")
30
+
31
+ return english_text, "output.wav"
32
 
33
  # Gradio Interface
34
+ interface = gr.Interface(
35
+ fn=hindi_speech_to_english_audio,
36
+ inputs=gr.Audio(sources=["microphone"], type="filepath", label="🎙️ Speak in Hindi"),
37
  outputs=[
38
+ gr.Textbox(label="🔤 Translated English Text"),
39
+ gr.Audio(type="filepath", label="🗣️ English Audio Output")
 
40
  ],
41
+ title="Hindi Speech to English Audio Translator",
42
+ description="🎧 Speak in Hindi and hear it back in English!",
43
  )
44
 
45
+ # Run app
46
+ interface.launch()