shubham24 commited on
Commit
12aa081
·
1 Parent(s): 47c53f6

Initial commit

Browse files
Files changed (1) hide show
  1. app.py +15 -25
app.py CHANGED
@@ -8,46 +8,36 @@ import numpy as np
8
  @spaces.GPU(duration=60)
9
  def transcribe_and_respond(audio_file):
10
  try:
11
- pipe = transformers.pipeline(
12
- model='sarvamai/shuka_v1',
13
- trust_remote_code=True,
14
- device=0,
15
- torch_dtype=torch.bfloat16
16
- )
17
-
18
- # Load the audio file
19
  audio, sr = librosa.load(audio_file, sr=16000)
20
-
21
- # Print audio properties for debugging
22
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
23
 
24
- turns = [
25
- {'role': 'system', 'content': 'Respond naturally and informatively.'},
26
- {'role': 'user', 'content': '<|audio|>'}
27
- ]
28
-
29
- # Debug: Print the initial turns
30
- print(f"Initial turns: {turns}")
31
 
32
- # Call the model with the audio and prompt
33
- output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=512)
34
 
35
- # Debug: Print the final output from the model
36
  print(f"Model output: {output}")
37
-
38
- return output
39
 
40
  except Exception as e:
41
  return f"Error: {str(e)}"
42
 
 
43
  iface = gr.Interface(
44
  fn=transcribe_and_respond,
45
  inputs=gr.Audio(sources="microphone", type="filepath"),
46
  outputs="text",
47
- title="Live Transcription and Response",
48
- description="Speak into your microphone, and the model will respond naturally and informatively.",
49
  live=True
50
  )
51
 
52
  if __name__ == "__main__":
53
- iface.launch()
 
8
  @spaces.GPU(duration=60)
9
  def transcribe_and_respond(audio_file):
10
  try:
11
+ # Load audio
 
 
 
 
 
 
 
12
  audio, sr = librosa.load(audio_file, sr=16000)
 
 
13
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
14
 
15
+ # Initialize the model pipeline with an appropriate task
16
+ pipe = transformers.pipeline(
17
+ task="automatic-speech-recognition", # Change to "text2text-generation" if needed
18
+ model="sarvamai/shuka_v1",
19
+ trust_remote_code=True,
20
+ device=0
21
+ )
22
 
23
+ # Pass raw audio for transcription
24
+ output = pipe(audio_file)
25
 
 
26
  print(f"Model output: {output}")
27
+ return output["text"] if isinstance(output, dict) and "text" in output else str(output)
 
28
 
29
  except Exception as e:
30
  return f"Error: {str(e)}"
31
 
32
+ # Gradio interface
33
  iface = gr.Interface(
34
  fn=transcribe_and_respond,
35
  inputs=gr.Audio(sources="microphone", type="filepath"),
36
  outputs="text",
37
+ title="Live Transcription with Shuka v1",
38
+ description="Speak into your microphone, and the model will transcribe or respond using SarvamAI's Shuka v1.",
39
  live=True
40
  )
41
 
42
  if __name__ == "__main__":
43
+ iface.launch(share=True)