Spaces:

MicroHealth
/

AV-to-transcripts

Paused

App Files Files Community

bluenevus commited on Apr 26

Commit

b3174ad

verified ·

1 Parent(s): dce154d

Update app.py

Browse files

Files changed (1) hide show

app.py +16 -8

app.py CHANGED Viewed

@@ -72,7 +72,7 @@ app.layout = dbc.Container([
 def transcribe_and_diarize_audio(contents, filename):
     global generated_file, transcription_text
     temp_audio_file = None
-    wav_file = None
     try:
         content_type, content_string = contents.split(',')
         decoded = base64.b64decode(content_string)
@@ -100,13 +100,21 @@ def transcribe_and_diarize_audio(contents, filename):
                 # Rewind the file for diarization
                 audio_file.seek(0)
-                # Perform diarization (speaker separation)
-                diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, speaker_detection=2)
             # Format the diarized transcript
             formatted_transcript = ""
-            for segment in diarized_transcript["segments"]:
-                formatted_transcript += f"Speaker {segment['speaker']}: {segment['text']}\n\n"
             transcription_text = formatted_transcript
             logger.info("Transcription and diarization completed successfully")
@@ -121,10 +129,10 @@ def transcribe_and_diarize_audio(contents, filename):
         return f"An error occurred during transcription and diarization: {str(e)}", False
     finally:
         # Clean up temporary files
-        if temp_audio_file:
             os.unlink(temp_audio_file.name)
-        if wav_file:
-            os.unlink(wav_file)
 @app.callback(
     [Output('output-audio-upload', 'children'),

 def transcribe_and_diarize_audio(contents, filename):
     global generated_file, transcription_text
     temp_audio_file = None
+    wav_path = None
     try:
         content_type, content_string = contents.split(',')
         decoded = base64.b64decode(content_string)
                 # Rewind the file for diarization
                 audio_file.seek(0)
+                # Perform diarization (speaker detection)
+                diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
+            logger.info(f"OpenAI API Response: {diarized_transcript}")
             # Format the diarized transcript
             formatted_transcript = ""
+            if 'segments' in diarized_transcript:
+                for segment in diarized_transcript["segments"]:
+                    speaker = segment.get('speaker', 'Unknown')
+                    text = segment.get('text', '')
+                    formatted_transcript += f"Speaker {speaker}: {text}\n\n"
+            else:
+                # If no segments, use the full transcript
+                formatted_transcript = transcript.get('text', 'No transcription available.')
             transcription_text = formatted_transcript
             logger.info("Transcription and diarization completed successfully")
         return f"An error occurred during transcription and diarization: {str(e)}", False
     finally:
         # Clean up temporary files
+        if temp_audio_file and os.path.exists(temp_audio_file.name):
             os.unlink(temp_audio_file.name)
+        if wav_path and os.path.exists(wav_path):
+            os.unlink(wav_path)
 @app.callback(
     [Output('output-audio-upload', 'children'),