Spaces:

helvekami
/

ShukaNote

Running

App Files Files Community

helvekami commited on Mar 7

Commit

a9b9492

verified ·

1 Parent(s): 02b1ff9

Updated app.py with email (placeholder) and transcript download functionality

Browse files

Files changed (1) hide show

app.py +32 -19

app.py CHANGED Viewed

@@ -4,9 +4,10 @@ import librosa
 import torch
 import spaces
 import numpy as np
 @spaces.GPU(duration=60)
-def transcribe_and_respond(audio_file):
     try:
         pipe = transformers.pipeline(
             model='sarvamai/shuka_v1',
@@ -17,43 +18,55 @@ def transcribe_and_respond(audio_file):
         # Load the audio file at 16kHz
         audio, sr = librosa.load(audio_file, sr=16000)
-        # Print audio properties for debugging
         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
         turns = [
-            {'role': 'system', 'content': 'Compile the information'},
             {'role': 'user', 'content': '<|audio|>'}
         ]
-        # Debug: Print the initial turns
         print(f"Initial turns: {turns}")
-        # Call the model with the audio and prompt
-        output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=1000)
-        # Debug: Print the final output from the model
         print(f"Model output: {output}")
-        return output
     except Exception as e:
-        return f"Error: {str(e)}"
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=[
         gr.Audio(sources=["upload", "microphone"], type="filepath"),
     ],
     outputs=[
         gr.Textbox(label="Transcript"),
         gr.File(label="Download Transcript")
     ],
     title="ShukaNotesApp",
-    description="Note Maker for Indian Offices and Their Many Languages.",
-    live=True
 )
 if __name__ == "__main__":
-    iface.launch()

 import torch
 import spaces
 import numpy as np
+import tempfile
 @spaces.GPU(duration=60)
+def transcribe_and_respond(audio_file, email):
     try:
         pipe = transformers.pipeline(
             model='sarvamai/shuka_v1',
         # Load the audio file at 16kHz
         audio, sr = librosa.load(audio_file, sr=16000)
+        # Convert the audio to a contiguous float32 array
+        audio = np.ascontiguousarray(audio, dtype=np.float32)
+        # If audio is multi-channel, convert to mono by averaging channels
+        if audio.ndim > 1:
+            audio = np.mean(audio, axis=-1)
+        # Debug: Print audio properties
         print(f"Audio dtype: {audio.dtype}, Audio shape: {audio.shape}, Sample rate: {sr}")
+        # Set up the prompt to get key takeaways
         turns = [
+            {'role': 'system', 'content': 'Share the Key Take Aways and Action Steps'},
             {'role': 'user', 'content': '<|audio|>'}
         ]
         print(f"Initial turns: {turns}")
+        # Run the model inference (this call is synchronous)
+        output = pipe({'audio': audio, 'turns': turns, 'sampling_rate': sr}, max_new_tokens=10000)
         print(f"Model output: {output}")
+        # Extract transcript text from the output
+        transcript = str(output)
+        if email and email.strip():
+            transcript = f"Email provided: {email}\n\n{transcript}"
+        # Write the transcript to a temporary file for download
+        with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.txt') as tmp:
+            tmp.write(transcript)
+            transcript_file = tmp.name
+        # Return transcript text and file download path
+        return transcript, transcript_file
     except Exception as e:
+        return f"Error: {str(e)}", ""
 iface = gr.Interface(
     fn=transcribe_and_respond,
     inputs=[
         gr.Audio(sources=["upload", "microphone"], type="filepath"),
+        gr.Textbox(label="Email", placeholder="Enter your email address (optional)")
     ],
     outputs=[
         gr.Textbox(label="Transcript"),
         gr.File(label="Download Transcript")
     ],
     title="ShukaNotesApp",
+    description="Upload or record your meeting audio, optionally provide your email, and download the transcript."
 )
 if __name__ == "__main__":
+    iface.launch()