helvekami commited on
Commit
a9b9492
·
verified ·
1 Parent(s): 02b1ff9

Updated app.py with email (placeholder) and transcript download functionality

Browse files
Files changed (1) hide show
  1. app.py +32 -19
app.py CHANGED
@@ -4,9 +4,10 @@ import librosa
4
  import torch
5
  import spaces
6
  import numpy as np
 
7
 
8
  @spaces.GPU(duration=60)
9
- def transcribe_and_respond(audio_file):
10
  try:
11
  pipe = transformers.pipeline(
12
  model='sarvamai/shuka_v1',
@@ -17,43 +18,55 @@ def transcribe_and_respond(audio_file):
17
 
18
  # Load the audio file at 16kHz
19
  audio, sr = librosa.load(audio_file, sr=16000)
20
-
21
- # Print audio properties for debugging
 
 
 
 
 
22
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
23
-
 
24
  turns = [
25
- {'role': 'system', 'content': 'Compile the information'},
26
  {'role': 'user', 'content': '<|audio|>'}
27
  ]
28
-
29
- # Debug: Print the initial turns
30
  print(f"Initial turns: {turns}")
31
-
32
- # Call the model with the audio and prompt
33
- output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=1000)
34
-
35
- # Debug: Print the final output from the model
36
  print(f"Model output: {output}")
37
-
38
- return output
 
 
 
 
 
 
 
 
 
 
 
39
 
40
  except Exception as e:
41
- return f"Error: {str(e)}"
42
 
43
  iface = gr.Interface(
44
  fn=transcribe_and_respond,
45
  inputs=[
46
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
47
-
48
  ],
49
  outputs=[
50
  gr.Textbox(label="Transcript"),
51
  gr.File(label="Download Transcript")
52
  ],
53
  title="ShukaNotesApp",
54
- description="Note Maker for Indian Offices and Their Many Languages.",
55
- live=True
56
  )
57
 
58
  if __name__ == "__main__":
59
- iface.launch()
 
4
  import torch
5
  import spaces
6
  import numpy as np
7
+ import tempfile
8
 
9
  @spaces.GPU(duration=60)
10
+ def transcribe_and_respond(audio_file, email):
11
  try:
12
  pipe = transformers.pipeline(
13
  model='sarvamai/shuka_v1',
 
18
 
19
  # Load the audio file at 16kHz
20
  audio, sr = librosa.load(audio_file, sr=16000)
21
+ # Convert the audio to a contiguous float32 array
22
+ audio = np.ascontiguousarray(audio, dtype=np.float32)
23
+ # If audio is multi-channel, convert to mono by averaging channels
24
+ if audio.ndim > 1:
25
+ audio = np.mean(audio, axis=-1)
26
+
27
+ # Debug: Print audio properties
28
  print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
29
+
30
+ # Set up the prompt to get key takeaways
31
  turns = [
32
+ {'role': 'system', 'content': 'Share the Key Take Aways and Action Steps'},
33
  {'role': 'user', 'content': '<|audio|>'}
34
  ]
 
 
35
  print(f"Initial turns: {turns}")
36
+
37
+ # Run the model inference (this call is synchronous)
38
+ output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=10000)
 
 
39
  print(f"Model output: {output}")
40
+
41
+ # Extract transcript text from the output
42
+ transcript = str(output)
43
+ if email and email.strip():
44
+ transcript = f"Email provided: {email}\n\n{transcript}"
45
+
46
+ # Write the transcript to a temporary file for download
47
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as tmp:
48
+ tmp.write(transcript)
49
+ transcript_file = tmp.name
50
+
51
+ # Return transcript text and file download path
52
+ return transcript, transcript_file
53
 
54
  except Exception as e:
55
+ return f"Error: {str(e)}", ""
56
 
57
  iface = gr.Interface(
58
  fn=transcribe_and_respond,
59
  inputs=[
60
  gr.Audio(sources=["upload", "microphone"], type="filepath"),
61
+ gr.Textbox(label="Email", placeholder="Enter your email address (optional)")
62
  ],
63
  outputs=[
64
  gr.Textbox(label="Transcript"),
65
  gr.File(label="Download Transcript")
66
  ],
67
  title="ShukaNotesApp",
68
+ description="Upload or record your meeting audio, optionally provide your email, and download the transcript."
 
69
  )
70
 
71
  if __name__ == "__main__":
72
+ iface.launch()