argodinho commited on
Commit
fc8c0c3
·
1 Parent(s): de8a4ec

added app.py and requirments.txt

Browse files
Files changed (2) hide show
  1. app.py +57 -0
  2. requirements.txt +8 -0
app.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from parler_tts import ParlerTTSForConditionalGeneration
3
+ from transformers import AutoTokenizer
4
+ import soundfile as sf
5
+ from google.generativeai import GenerativeModel
6
+ import gradio as gr
7
+
8
+ # Initialize models
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ # 1. Load Indic-TTS
12
+ tts_model = ParlerTTSForConditionalGeneration.from_pretrained("ai4bharat/indic-parler-tts").to(device)
13
+ tts_tokenizer = AutoTokenizer.from_pretrained("ai4bharat/indic-parler-tts")
14
+ desc_tokenizer = AutoTokenizer.from_pretrained(tts_model.config.text_encoder._name_or_path)
15
+
16
+ # 2. Load Gemini (API key via Secrets)
17
+ gemini = GenerativeModel('gemini-pro')
18
+
19
+ def hinglish_to_devnagri(text):
20
+ response = gemini.generate_content(
21
+ f"Convert this Hinglish to Devnagri Hindi you can retain complex words like compiler -> कंपाइलर:\n{text}\n\nOutput only the translation."
22
+ )
23
+ return response.text
24
+
25
+ def generate_speech(text):
26
+ # Convert to Devnagri
27
+ hindi_text = hinglish_to_devnagri(text)
28
+
29
+ # Generate speech
30
+ desc = "एक महिला वक्ता स्पष्ट हिंदी में बोल रही हैं"
31
+ desc_inputs = desc_tokenizer(desc, return_tensors="pt").to(device)
32
+ text_inputs = tts_tokenizer(hindi_text, return_tensors="pt").to(device)
33
+
34
+ audio = tts_model.generate(
35
+ input_ids=desc_inputs.input_ids,
36
+ attention_mask=desc_inputs.attention_mask,
37
+ prompt_input_ids=text_inputs.input_ids,
38
+ prompt_attention_mask=text_inputs.attention_mask
39
+ )
40
+
41
+ # Save as WAV
42
+ sf.write("output.wav", audio.cpu().numpy().squeeze(), tts_model.config.sampling_rate)
43
+ return "output.wav", hindi_text
44
+
45
+ # Gradio UI
46
+ interface = gr.Interface(
47
+ fn=generate_speech,
48
+ inputs=gr.Textbox(label="Enter Hinglish Text"),
49
+ outputs=[
50
+ gr.Audio(label="Generated Speech"),
51
+ gr.Textbox(label="Devnagri Translation")
52
+ ],
53
+ title="🚀 Hinglish-to-Speech",
54
+ description="Gemini (Hinglish→Hindi) + Indic-TTS (Hindi→Speech)"
55
+ )
56
+
57
+ interface.launch()
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch>=2.0.0
2
+ transformers>=4.30.0
3
+ parler-tts>=0.1.0
4
+ soundfile>=0.12.0
5
+ google-generativeai>=0.3.0
6
+ gradio>=3.40.0
7
+
8
+