shubham24 commited on
Commit
ed509ec
·
1 Parent(s): b0a12ad
Files changed (4) hide show
  1. .gitignore +53 -0
  2. .gradio/certificate.pem +31 -0
  3. app.py +44 -19
  4. requirements.txt +2 -1
.gitignore ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+ *.so
6
+ .Python
7
+ build/
8
+ develop-eggs/
9
+ dist/
10
+ downloads/
11
+ eggs/
12
+ .eggs/
13
+ lib/
14
+ lib64/
15
+ parts/
16
+ sdist/
17
+ var/
18
+ wheels/
19
+ *.egg-info/
20
+ .installed.cfg
21
+ *.egg
22
+
23
+ # Virtual Environment
24
+ .env
25
+ .venv
26
+ env/
27
+ venv/
28
+ ENV/
29
+
30
+ # IDE
31
+ .idea/
32
+ .vscode/
33
+ *.swp
34
+ *.swo
35
+ .DS_Store
36
+
37
+ # Jupyter Notebook
38
+ .ipynb_checkpoints
39
+
40
+ # Hugging Face
41
+ .cache/
42
+ transformers/
43
+ *.safetensors
44
+ *.bin
45
+ *.pt
46
+ *.pth
47
+
48
+ # Project specific
49
+ *.wav
50
+ *.mp3
51
+ *.m4a
52
+ *.ogg
53
+ *.flac
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
app.py CHANGED
@@ -2,8 +2,8 @@ import transformers
2
  import gradio as gr
3
  import librosa
4
  import torch
5
- import spaces
6
  import numpy as np
 
7
 
8
 
9
  def transcribe_and_respond(audio_file):
@@ -12,32 +12,57 @@ def transcribe_and_respond(audio_file):
12
  audio, sr = librosa.load(audio_file, sr=16000)
13
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
14
 
15
- # Initialize the model pipeline with an appropriate task
16
- pipe = transformers.pipeline(
17
- task="automatic-speech-recognition", # Change to "text2text-generation" if needed
18
- model="sarvamai/shuka_v1",
19
  trust_remote_code=True,
20
- device=0
 
 
 
 
21
  )
22
 
23
- # Pass raw audio for transcription
24
- output = pipe(audio_file)
 
 
 
 
 
 
 
25
 
26
- print(f"Model output: {output}")
27
- return output["text"] if isinstance(output, dict) and "text" in output else str(output)
28
 
29
  except Exception as e:
 
30
  return f"Error: {str(e)}"
31
 
32
- # Gradio interface
33
- iface = gr.Interface(
34
- fn=transcribe_and_respond,
35
- inputs=gr.Audio(sources="microphone", type="filepath"),
36
- outputs="text",
37
- title="Live Transcription with Shuka v1",
38
- description="Speak into your microphone, and the model will transcribe or respond using SarvamAI's Shuka v1.",
39
- live=True
40
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  if __name__ == "__main__":
43
  iface.launch(share=True)
 
2
  import gradio as gr
3
  import librosa
4
  import torch
 
5
  import numpy as np
6
+ from transformers import AutoModel, AutoTokenizer
7
 
8
 
9
  def transcribe_and_respond(audio_file):
 
12
  audio, sr = librosa.load(audio_file, sr=16000)
13
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
14
 
15
+ # Initialize the model and tokenizer
16
+ device = 0 if torch.cuda.is_available() else -1
17
+ model = AutoModel.from_pretrained(
18
+ "sarvamai/shuka_v1",
19
  trust_remote_code=True,
20
+ device_map="auto" if device == 0 else None
21
+ )
22
+ tokenizer = AutoTokenizer.from_pretrained(
23
+ "sarvamai/shuka_v1",
24
+ trust_remote_code=True
25
  )
26
 
27
+ # Process audio and generate response
28
+ inputs = tokenizer(audio_file, return_tensors="pt")
29
+ if device == 0:
30
+ inputs = {k: v.to(device) for k, v in inputs.items()}
31
+ model = model.to(device)
32
+
33
+ with torch.no_grad():
34
+ outputs = model(**inputs)
35
+ response = tokenizer.decode(outputs.last_hidden_state[0].argmax(dim=-1), skip_special_tokens=True)
36
 
37
+ print(f"Model output: {response}")
38
+ return response
39
 
40
  except Exception as e:
41
+ print(f"Error details: {str(e)}")
42
  return f"Error: {str(e)}"
43
 
44
+ # Create Gradio blocks instead of Interface
45
+ with gr.Blocks(title="Live Transcription with Shuka v1") as iface:
46
+ gr.Markdown("# Live Transcription with Shuka v1")
47
+ gr.Markdown("Speak into your microphone or upload an audio file, and the model will transcribe it using SarvamAI's Shuka v1.")
48
+
49
+ with gr.Row():
50
+ audio_input = gr.Audio(
51
+ sources=["microphone", "upload"],
52
+ type="filepath",
53
+ label="Audio Input",
54
+ streaming=False
55
+ )
56
+ text_output = gr.Textbox(
57
+ label="Transcription",
58
+ placeholder="Transcription will appear here..."
59
+ )
60
+
61
+ audio_input.change(
62
+ fn=transcribe_and_respond,
63
+ inputs=audio_input,
64
+ outputs=text_output
65
+ )
66
 
67
  if __name__ == "__main__":
68
  iface.launch(share=True)
requirements.txt CHANGED
@@ -1,7 +1,8 @@
1
  transformers==4.41.2
2
  peft==0.11.1
3
  librosa==0.10.2
4
- gradio==4.44.1
 
5
  huggingface-hub>=0.19
6
  torch==2.5.1
7
  spaces==0.37.0
 
1
  transformers==4.41.2
2
  peft==0.11.1
3
  librosa==0.10.2
4
+ gradio==5.23.2
5
+ pydantic==2.10.6
6
  huggingface-hub>=0.19
7
  torch==2.5.1
8
  spaces==0.37.0