Update app.py
Browse files
app.py
CHANGED
@@ -72,7 +72,7 @@ app.layout = dbc.Container([
|
|
72 |
def transcribe_and_diarize_audio(contents, filename):
|
73 |
global generated_file, transcription_text
|
74 |
temp_audio_file = None
|
75 |
-
|
76 |
try:
|
77 |
content_type, content_string = contents.split(',')
|
78 |
decoded = base64.b64decode(content_string)
|
@@ -100,13 +100,21 @@ def transcribe_and_diarize_audio(contents, filename):
|
|
100 |
# Rewind the file for diarization
|
101 |
audio_file.seek(0)
|
102 |
|
103 |
-
# Perform diarization (speaker
|
104 |
-
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file,
|
|
|
|
|
105 |
|
106 |
# Format the diarized transcript
|
107 |
formatted_transcript = ""
|
108 |
-
|
109 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
110 |
|
111 |
transcription_text = formatted_transcript
|
112 |
logger.info("Transcription and diarization completed successfully")
|
@@ -121,10 +129,10 @@ def transcribe_and_diarize_audio(contents, filename):
|
|
121 |
return f"An error occurred during transcription and diarization: {str(e)}", False
|
122 |
finally:
|
123 |
# Clean up temporary files
|
124 |
-
if temp_audio_file:
|
125 |
os.unlink(temp_audio_file.name)
|
126 |
-
if
|
127 |
-
os.unlink(
|
128 |
|
129 |
@app.callback(
|
130 |
[Output('output-audio-upload', 'children'),
|
|
|
72 |
def transcribe_and_diarize_audio(contents, filename):
|
73 |
global generated_file, transcription_text
|
74 |
temp_audio_file = None
|
75 |
+
wav_path = None
|
76 |
try:
|
77 |
content_type, content_string = contents.split(',')
|
78 |
decoded = base64.b64decode(content_string)
|
|
|
100 |
# Rewind the file for diarization
|
101 |
audio_file.seek(0)
|
102 |
|
103 |
+
# Perform diarization (speaker detection)
|
104 |
+
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
|
105 |
+
|
106 |
+
logger.info(f"OpenAI API Response: {diarized_transcript}")
|
107 |
|
108 |
# Format the diarized transcript
|
109 |
formatted_transcript = ""
|
110 |
+
if 'segments' in diarized_transcript:
|
111 |
+
for segment in diarized_transcript["segments"]:
|
112 |
+
speaker = segment.get('speaker', 'Unknown')
|
113 |
+
text = segment.get('text', '')
|
114 |
+
formatted_transcript += f"Speaker {speaker}: {text}\n\n"
|
115 |
+
else:
|
116 |
+
# If no segments, use the full transcript
|
117 |
+
formatted_transcript = transcript.get('text', 'No transcription available.')
|
118 |
|
119 |
transcription_text = formatted_transcript
|
120 |
logger.info("Transcription and diarization completed successfully")
|
|
|
129 |
return f"An error occurred during transcription and diarization: {str(e)}", False
|
130 |
finally:
|
131 |
# Clean up temporary files
|
132 |
+
if temp_audio_file and os.path.exists(temp_audio_file.name):
|
133 |
os.unlink(temp_audio_file.name)
|
134 |
+
if wav_path and os.path.exists(wav_path):
|
135 |
+
os.unlink(wav_path)
|
136 |
|
137 |
@app.callback(
|
138 |
[Output('output-audio-upload', 'children'),
|