share true
Browse files
app.py
CHANGED
@@ -1,61 +1,45 @@
|
|
1 |
-
import transformers
|
2 |
import gradio as gr
|
3 |
import librosa
|
4 |
import torch
|
5 |
-
|
6 |
import spaces
|
7 |
|
8 |
@spaces.GPU
|
9 |
def transcribe_and_respond(audio_file):
|
10 |
try:
|
11 |
-
# Load audio
|
12 |
audio, sr = librosa.load(audio_file, sr=16000)
|
13 |
|
14 |
-
#
|
15 |
-
pipe =
|
16 |
model="sarvamai/shuka_v1",
|
17 |
trust_remote_code=True,
|
18 |
device=0 if torch.cuda.is_available() else -1
|
19 |
)
|
20 |
|
21 |
-
#
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
max_new_tokens=256
|
31 |
-
)
|
32 |
|
33 |
return output
|
34 |
|
35 |
except Exception as e:
|
36 |
return f"Error: {str(e)}"
|
37 |
|
38 |
-
# Gradio
|
39 |
-
with gr.Blocks(title="
|
40 |
-
gr.Markdown("## 🎙️
|
41 |
-
gr.Markdown("Upload or speak, and the model will respond naturally using SarvamAI's voice foundation model.")
|
42 |
-
|
43 |
with gr.Row():
|
44 |
-
audio_input = gr.Audio(
|
45 |
-
|
46 |
-
type="filepath",
|
47 |
-
label="🎧 Audio Input"
|
48 |
-
)
|
49 |
-
text_output = gr.Textbox(
|
50 |
-
label="📝 Model Response",
|
51 |
-
placeholder="Response will appear here..."
|
52 |
-
)
|
53 |
|
54 |
-
audio_input.change(
|
55 |
-
fn=transcribe_and_respond,
|
56 |
-
inputs=audio_input,
|
57 |
-
outputs=text_output
|
58 |
-
)
|
59 |
|
60 |
if __name__ == "__main__":
|
61 |
iface.launch(share=True)
|
|
|
|
|
1 |
import gradio as gr
|
2 |
import librosa
|
3 |
import torch
|
4 |
+
from transformers import pipeline
|
5 |
import spaces
|
6 |
|
7 |
@spaces.GPU
|
8 |
def transcribe_and_respond(audio_file):
|
9 |
try:
|
10 |
+
# Load audio
|
11 |
audio, sr = librosa.load(audio_file, sr=16000)
|
12 |
|
13 |
+
# Load Shuka model via pipeline
|
14 |
+
pipe = pipeline(
|
15 |
model="sarvamai/shuka_v1",
|
16 |
trust_remote_code=True,
|
17 |
device=0 if torch.cuda.is_available() else -1
|
18 |
)
|
19 |
|
20 |
+
# Use Shuka's expected format
|
21 |
+
output = pipe({
|
22 |
+
"audio": audio,
|
23 |
+
"sampling_rate": sr,
|
24 |
+
"turns": [
|
25 |
+
{"role": "system", "content": "Respond naturally and informatively."},
|
26 |
+
{"role": "user", "content": "<|audio|>"}
|
27 |
+
]
|
28 |
+
})
|
|
|
|
|
29 |
|
30 |
return output
|
31 |
|
32 |
except Exception as e:
|
33 |
return f"Error: {str(e)}"
|
34 |
|
35 |
+
# Gradio interface
|
36 |
+
with gr.Blocks(title="Shuka v1 Transcription") as iface:
|
37 |
+
gr.Markdown("## 🎙️ Shuka v1 - Voice Transcription")
|
|
|
|
|
38 |
with gr.Row():
|
39 |
+
audio_input = gr.Audio(sources=["microphone", "upload"], type="filepath", label="Audio")
|
40 |
+
text_output = gr.Textbox(label="Response")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
41 |
|
42 |
+
audio_input.change(fn=transcribe_and_respond, inputs=audio_input, outputs=text_output)
|
|
|
|
|
|
|
|
|
43 |
|
44 |
if __name__ == "__main__":
|
45 |
iface.launch(share=True)
|