shuka-1

Running on Zero

App Files Files Community

shubham24 commited on Jun 14

Commit

ed509ec

1 Parent(s): b0a12ad

works now

Browse files

Files changed (4) hide show

.gitignore +53 -0
.gradio/certificate.pem +31 -0
app.py +44 -19
requirements.txt +2 -1

.gitignore ADDED Viewed

	@@ -0,0 +1,53 @@

+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+# Virtual Environment
+.env
+.venv
+env/
+venv/
+ENV/
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+.DS_Store
+# Jupyter Notebook
+.ipynb_checkpoints
+# Hugging Face
+.cache/
+transformers/
+*.safetensors
+*.bin
+*.pt
+*.pth
+# Project specific
+*.wav
+*.mp3
+*.m4a
+*.ogg
+*.flac

.gradio/certificate.pem ADDED Viewed

	@@ -0,0 +1,31 @@

+-----BEGIN CERTIFICATE-----
+MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
+TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
+cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
+WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
+ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
+MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
+h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
+0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
+A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
+T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
+B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
+B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
+KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
+OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
+jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
+qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
+rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
+HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
+hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
+ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
+3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
+NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
+ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
+TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
+jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
+oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
+4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
+mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
+emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
+-----END CERTIFICATE-----

app.py CHANGED Viewed

@@ -2,8 +2,8 @@ import transformers
 import gradio as gr
 import librosa
 import torch
-import spaces
 import numpy as np
 def transcribe_and_respond(audio_file):
@@ -12,32 +12,57 @@ def transcribe_and_respond(audio_file):
         audio, sr = librosa.load(audio_file, sr=16000)
         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
-        # Initialize the model pipeline with an appropriate task
-        pipe = transformers.pipeline(
-            task="automatic-speech-recognition",  # Change to "text2text-generation" if needed
-            model="sarvamai/shuka_v1",
             trust_remote_code=True,
-            device=0
         )
-        # Pass raw audio for transcription
-        output = pipe(audio_file)
-        print(f"Model output: {output}")
-        return output["text"] if isinstance(output, dict) and "text" in output else str(output)
     except Exception as e:
         return f"Error: {str(e)}"
-# Gradio interface
-iface = gr.Interface(
-    fn=transcribe_and_respond,
-    inputs=gr.Audio(sources="microphone", type="filepath"),
-    outputs="text",
-    title="Live Transcription with Shuka v1",
-    description="Speak into your microphone, and the model will transcribe or respond using SarvamAI's Shuka v1.",
-    live=True
-)
 if __name__ == "__main__":
     iface.launch(share=True)

 import gradio as gr
 import librosa
 import torch
 import numpy as np
+from transformers import AutoModel, AutoTokenizer
 def transcribe_and_respond(audio_file):
         audio, sr = librosa.load(audio_file, sr=16000)
         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
+        # Initialize the model and tokenizer
+        device = 0 if torch.cuda.is_available() else -1
+        model = AutoModel.from_pretrained(
+            "sarvamai/shuka_v1",
             trust_remote_code=True,
+            device_map="auto" if device == 0 else None
+        )
+        tokenizer = AutoTokenizer.from_pretrained(
+            "sarvamai/shuka_v1",
+            trust_remote_code=True
         )
+        # Process audio and generate response
+        inputs = tokenizer(audio_file, return_tensors="pt")
+        if device == 0:
+            inputs = {k: v.to(device) for k, v in inputs.items()}
+            model = model.to(device)
+        with torch.no_grad():
+            outputs = model(**inputs)
+            response = tokenizer.decode(outputs.last_hidden_state[0].argmax(dim=-1), skip_special_tokens=True)
+        print(f"Model output: {response}")
+        return response
     except Exception as e:
+        print(f"Error details: {str(e)}")
         return f"Error: {str(e)}"
+# Create Gradio blocks instead of Interface
+with gr.Blocks(title="Live Transcription with Shuka v1") as iface:
+    gr.Markdown("# Live Transcription with Shuka v1")
+    gr.Markdown("Speak into your microphone or upload an audio file, and the model will transcribe it using SarvamAI's Shuka v1.")
+    with gr.Row():
+        audio_input = gr.Audio(
+            sources=["microphone", "upload"],
+            type="filepath",
+            label="Audio Input",
+            streaming=False
+        )
+        text_output = gr.Textbox(
+            label="Transcription",
+            placeholder="Transcription will appear here..."
+        )
+    audio_input.change(
+        fn=transcribe_and_respond,
+        inputs=audio_input,
+        outputs=text_output
+    )
 if __name__ == "__main__":
     iface.launch(share=True)

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
 transformers==4.41.2
 peft==0.11.1
 librosa==0.10.2
-gradio==4.44.1
 huggingface-hub>=0.19
 torch==2.5.1
 spaces==0.37.0

 transformers==4.41.2
 peft==0.11.1
 librosa==0.10.2
+gradio==5.23.2
+pydantic==2.10.6
 huggingface-hub>=0.19
 torch==2.5.1
 spaces==0.37.0