shuka-1

Running on Zero

App Files Files Community

shubham24 commited on Jun 14

Commit

2f9ee0a

1 Parent(s): 3d72142

share true

Browse files

Files changed (1) hide show

app.py +19 -35

app.py CHANGED Viewed

@@ -1,61 +1,45 @@
-import transformers
 import gradio as gr
 import librosa
 import torch
-import numpy as np
 import spaces
 @spaces.GPU
 def transcribe_and_respond(audio_file):
     try:
-        # Load audio as waveform
         audio, sr = librosa.load(audio_file, sr=16000)
-        # Initialize the model using pipeline with remote code
-        pipe = transformers.pipeline(
             model="sarvamai/shuka_v1",
             trust_remote_code=True,
             device=0 if torch.cuda.is_available() else -1
         )
-        # Define structured turns with audio placeholder
-        turns = [
-            {"role": "system", "content": "Respond naturally and informatively."},
-            {"role": "user", "content": "<|audio|>"}
-        ]
-        # Run the pipeline with audio + turns
-        output = pipe(
-            {"audio": audio, "sampling_rate": sr, "turns": turns},
-            max_new_tokens=256
-        )
         return output
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio UI
-with gr.Blocks(title="Live Transcription with Shuka v1") as iface:
-    gr.Markdown("## 🎙️ Live Transcription with Shuka v1")
-    gr.Markdown("Upload or speak, and the model will respond naturally using SarvamAI's voice foundation model.")
     with gr.Row():
-        audio_input = gr.Audio(
-            sources=["microphone", "upload"],
-            type="filepath",
-            label="🎧 Audio Input"
-        )
-        text_output = gr.Textbox(
-            label="📝 Model Response",
-            placeholder="Response will appear here..."
-        )
-    audio_input.change(
-        fn=transcribe_and_respond,
-        inputs=audio_input,
-        outputs=text_output
-    )
 if __name__ == "__main__":
     iface.launch(share=True)

 import gradio as gr
 import librosa
 import torch
+from transformers import pipeline
 import spaces
 @spaces.GPU
 def transcribe_and_respond(audio_file):
     try:
+        # Load audio
         audio, sr = librosa.load(audio_file, sr=16000)
+        # Load Shuka model via pipeline
+        pipe = pipeline(
             model="sarvamai/shuka_v1",
             trust_remote_code=True,
             device=0 if torch.cuda.is_available() else -1
         )
+        # Use Shuka's expected format
+        output = pipe({
+            "audio": audio,
+            "sampling_rate": sr,
+            "turns": [
+                {"role": "system", "content": "Respond naturally and informatively."},
+                {"role": "user", "content": "<|audio|>"}
+            ]
+        })
         return output
     except Exception as e:
         return f"Error: {str(e)}"
+# Gradio interface
+with gr.Blocks(title="Shuka v1 Transcription") as iface:
+    gr.Markdown("## 🎙️ Shuka v1 - Voice Transcription")
     with gr.Row():
+        audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
+        text_output = gr.Textbox(label="Response")
+    audio_input.change(fn=transcribe_and_respond, inputs=audio_input, outputs=text_output)
 if __name__ == "__main__":
     iface.launch(share=True)