Mahwishsada commited on
Commit
aaa6f27
Β·
verified Β·
1 Parent(s): 694474b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -44
app.py CHANGED
@@ -1,61 +1,49 @@
1
  import gradio as gr
2
- import whisper
3
- from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
4
  import torch
 
 
5
  from TTS.api import TTS
6
 
7
- # Load Whisper model (better accuracy with 'medium')
8
- asr_model = whisper.load_model("medium")
9
 
10
- # Load NLLB Hindi to English translator
11
- translation_model_name = "facebook/nllb-200-distilled-600M"
12
- translator_tokenizer = AutoTokenizer.from_pretrained(translation_model_name)
13
- translator_model = AutoModelForSeq2SeqLM.from_pretrained(translation_model_name)
14
 
15
- # Load English TTS model
16
- tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
17
 
18
- # Utility: Get Hindi text from audio
19
- def speech_to_text(audio_path):
20
  result = asr_model.transcribe(audio_path, language="hi")
21
- return result["text"]
22
-
23
- # Utility: Translate Hindi to English
24
- def translate_hi_to_en(text_hi):
25
- inputs = translator_tokenizer(text_hi, return_tensors="pt")
26
- translated_tokens = translator_model.generate(**inputs, forced_bos_token_id=translator_tokenizer.lang_code_to_id["eng_Latn"])
27
- translated_text = translator_tokenizer.decode(translated_tokens[0], skip_special_tokens=True)
28
- return translated_text
29
-
30
- # Main app logic
31
- def translate_audio(audio):
32
- if audio is None:
33
- return "No audio input", "", None
34
-
35
- # Step 1: Convert Hindi speech to Hindi text
36
- hindi_text = speech_to_text(audio)
37
 
38
- # Step 2: Translate to English
39
- english_text = translate_hi_to_en(hindi_text)
 
 
 
40
 
41
- # Step 3: Generate English speech
42
- english_audio_path = "output.wav"
43
- tts.tts_to_file(text=english_text, file_path=english_audio_path)
44
 
45
- return hindi_text, english_text, english_audio_path
46
 
47
- # Gradio UI
48
  interface = gr.Interface(
49
- fn=translate_audio,
50
- inputs=gr.Audio(source="microphone", type="filepath"),
51
  outputs=[
52
- gr.Textbox(label="Hindi Transcript"),
53
- gr.Textbox(label="English Translation"),
54
- gr.Audio(label="English Speech")
55
  ],
56
- title="Hindi to English Speech Translator",
57
- description="🎀 Speak in Hindi β†’ πŸ“„ Translated English Text β†’ πŸ”Š Spoken English Output"
 
58
  )
59
 
60
- if __name__ == "__main__":
61
- interface.launch()
 
1
  import gradio as gr
 
 
2
  import torch
3
+ import whisper
4
+ from transformers import M2M100ForConditionalGeneration, M2M100Tokenizer
5
  from TTS.api import TTS
6
 
7
+ # Load Whisper for Hindi speech recognition
8
+ asr_model = whisper.load_model("base")
9
 
10
+ # Load translation model and tokenizer
11
+ translation_model_name = "facebook/m2m100_418M"
12
+ translator_tokenizer = M2M100Tokenizer.from_pretrained(translation_model_name)
13
+ translator_model = M2M100ForConditionalGeneration.from_pretrained(translation_model_name)
14
 
15
+ # Load TTS model for English speech
16
+ tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=torch.cuda.is_available())
17
 
18
+ def translate_hindi_speech(audio_path):
19
+ # Transcribe Hindi speech
20
  result = asr_model.transcribe(audio_path, language="hi")
21
+ hindi_text = result["text"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
+ # Translate Hindi to English
24
+ translator_tokenizer.src_lang = "hi"
25
+ encoded = translator_tokenizer(hindi_text, return_tensors="pt")
26
+ generated_tokens = translator_model.generate(**encoded, forced_bos_token_id=translator_tokenizer.get_lang_id("en"))
27
+ english_text = translator_tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
28
 
29
+ # Convert English text to speech
30
+ output_audio_path = "output_en.wav"
31
+ tts.tts_to_file(text=english_text, file_path=output_audio_path)
32
 
33
+ return english_text, output_audio_path
34
 
35
+ # Gradio interface
36
  interface = gr.Interface(
37
+ fn=translate_hindi_speech,
38
+ inputs=gr.Audio(type="filepath", label="Speak in Hindi"),
39
  outputs=[
40
+ gr.Text(label="English Translation"),
41
+ gr.Audio(type="filepath", label="English Speech Output")
 
42
  ],
43
+ title="πŸŽ™οΈ Hindi to English Speech Translator",
44
+ description="Speak in Hindi β†’ See English translation + hear English output.",
45
+ live=False,
46
  )
47
 
48
+ # Launch the app
49
+ interface.launch()