argodinho commited on
Commit
5fc45d8
·
1 Parent(s): fc8c0c3

Updated app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -25
app.py CHANGED
@@ -1,32 +1,34 @@
 
1
  import torch
2
  from parler_tts import ParlerTTSForConditionalGeneration
3
  from transformers import AutoTokenizer
4
  import soundfile as sf
5
- from google.generativeai import GenerativeModel
6
  import gradio as gr
7
 
8
- # Initialize models
9
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
10
 
11
- # 1. Load Indic-TTS
 
12
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
13
  tts_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
14
  desc_tokenizer = AutoTokenizer.from_pretrained(tts_model.config.text_encoder._name_or_path)
15
 
16
- # 2. Load Gemini (API key via Secrets)
17
- gemini = GenerativeModel('gemini-pro')
18
-
19
  def hinglish_to_devnagri(text):
20
- response = gemini.generate_content(
21
- f"Convert this Hinglish to Devnagri Hindi you can retain complex words like compiler -> कंपाइलर:\n{text}\n\nOutput only the translation."
22
- )
23
- return response.text
 
 
 
24
 
25
  def generate_speech(text):
26
- # Convert to Devnagri
27
  hindi_text = hinglish_to_devnagri(text)
28
 
29
- # Generate speech
30
  desc = "एक महिला वक्ता स्पष्ट हिंदी में बोल रही हैं"
31
  desc_inputs = desc_tokenizer(desc, return_tensors="pt").to(device)
32
  text_inputs = tts_tokenizer(hindi_text, return_tensors="pt").to(device)
@@ -38,20 +40,19 @@ def generate_speech(text):
38
  prompt_attention_mask=text_inputs.attention_mask
39
  )
40
 
41
- # Save as WAV
42
  sf.write("output.wav", audio.cpu().numpy().squeeze(), tts_model.config.sampling_rate)
43
  return "output.wav", hindi_text
44
 
45
  # Gradio UI
46
- interface = gr.Interface(
47
- fn=generate_speech,
48
- inputs=gr.Textbox(label="Enter Hinglish Text"),
49
- outputs=[
50
- gr.Audio(label="Generated Speech"),
51
- gr.Textbox(label="Devnagri Translation")
52
- ],
53
- title="🚀 Hinglish-to-Speech",
54
- description="Gemini (Hinglish→Hindi) + Indic-TTS (Hindi→Speech)"
55
- )
56
 
57
- interface.launch()
 
1
+ import os
2
  import torch
3
  from parler_tts import ParlerTTSForConditionalGeneration
4
  from transformers import AutoTokenizer
5
  import soundfile as sf
6
+ from google.generativeai import GenerativeModel, configure
7
  import gradio as gr
8
 
9
+ # 1. Load Gemini (API key from environment variables)
10
+ GEMINI_KEY = os.environ.get('GEMINI_API_KEY') # Will read from HF Secrets
11
+ configure(api_key=GEMINI_KEY) # Initialize Gemini
12
+ gemini = GenerativeModel('gemini-pro')
13
 
14
+ # 2. Load Indic-TTS
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
  tts_model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
17
  tts_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
18
  desc_tokenizer = AutoTokenizer.from_pretrained(tts_model.config.text_encoder._name_or_path)
19
 
 
 
 
20
  def hinglish_to_devnagri(text):
21
+ try:
22
+ response = gemini.generate_content(
23
+ f"Convert this Hinglish to Devnagri Hindi while complex words are translated like (ex: compiler -> कंपाइलर) :\n{text}\n\nOutput only the translation."
24
+ )
25
+ return response.text
26
+ except Exception as e:
27
+ raise gr.Error(f"Gemini error: {str(e)}")
28
 
29
  def generate_speech(text):
 
30
  hindi_text = hinglish_to_devnagri(text)
31
 
 
32
  desc = "एक महिला वक्ता स्पष्ट हिंदी में बोल रही हैं"
33
  desc_inputs = desc_tokenizer(desc, return_tensors="pt").to(device)
34
  text_inputs = tts_tokenizer(hindi_text, return_tensors="pt").to(device)
 
40
  prompt_attention_mask=text_inputs.attention_mask
41
  )
42
 
 
43
  sf.write("output.wav", audio.cpu().numpy().squeeze(), tts_model.config.sampling_rate)
44
  return "output.wav", hindi_text
45
 
46
  # Gradio UI
47
+ with gr.Blocks() as app:
48
+ gr.Markdown("## 🚀 Hinglish-to-Speech (Gemini + Indic-TTS)")
49
+ with gr.Row():
50
+ inp = gr.Textbox(label="Enter Hinglish Text", placeholder="Aaj mood nahi hai...")
51
+ btn = gr.Button("Generate")
52
+ with gr.Row():
53
+ audio_out = gr.Audio(label="Speech Output")
54
+ text_out = gr.Textbox(label="Devnagri Translation")
55
+
56
+ btn.click(fn=generate_speech, inputs=inp, outputs=[audio_out, text_out])
57
 
58
+ app.launch()