Spaces:

mails10
/

Expressive-TTS

Runtime error

App Files Files Community

mails10 commited on Apr 10

Commit

78dc3af

verified ·

1 Parent(s): e73342c

Create app.py

Browse files

Files changed (1) hide show

app.py +54 -0

app.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import gradio as gr
+from transformers import AutoModel
+import numpy as np
+import soundfile as sf
+import tempfile
+import whisper
+# Load TTS model (IndicF5)
+tts_model = AutoModel.from_pretrained("ai4bharat/IndicF5", trust_remote_code=True)
+# Load ASR model (Whisper)
+asr_model = whisper.load_model("medium")
+def generate_tts_and_transcribe(text, ref_audio, ref_text):
+    # Save uploaded ref_audio to a path
+    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+        tmp.write(ref_audio.read())
+        ref_audio_path = tmp.name
+    # Generate speech using IndicF5
+    audio = tts_model(text, ref_audio_path=ref_audio_path, ref_text=ref_text)
+    # Normalize
+    if audio.dtype == np.int16:
+        audio = audio.astype(np.float32) / 32768.0
+    # Save TTS output
+    tts_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
+    sf.write(tts_path, np.array(audio, dtype=np.float32), samplerate=24000)
+    # Transcribe using Whisper
+    asr_result = asr_model.transcribe(tts_path, language="ta")
+    transcript = asr_result["text"]
+    return tts_path, transcript
+# Gradio Interface
+demo = gr.Interface(
+    fn=generate_tts_and_transcribe,
+    inputs=[
+        gr.Textbox(label="Text to Synthesize (Tamil)"),
+        gr.Audio(label="Reference Audio (.wav)", type="file"),
+        gr.Textbox(label="Reference Text (Tamil)")
+    ],
+    outputs=[
+        gr.Audio(label="Generated Audio", type="filepath"),
+        gr.Textbox(label="ASR Transcription (Whisper)")
+    ],
+    title="IndicF5 Tamil TTS + Whisper ASR",
+    description="Give a reference audio and text, synthesize Tamil speech using IndicF5, and transcribe it with Whisper."
+)
+if __name__ == "__main__":
+    demo.launch()