Update app.py
Browse files
app.py
CHANGED
|
@@ -87,7 +87,8 @@ def chunk_audio(audio_segment, chunk_size_ms=60000):
|
|
| 87 |
|
| 88 |
def transcribe_audio_chunks(chunks):
|
| 89 |
transcriptions = []
|
| 90 |
-
for chunk in chunks:
|
|
|
|
| 91 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio_file:
|
| 92 |
chunk.export(temp_audio_file.name, format="wav")
|
| 93 |
with open(temp_audio_file.name, 'rb') as audio_file:
|
|
@@ -204,9 +205,13 @@ def process_media(file_path, is_url=False):
|
|
| 204 |
audio = AudioSegment.from_wav(wav_path)
|
| 205 |
chunks = chunk_audio(audio)
|
| 206 |
|
|
|
|
|
|
|
| 207 |
# Transcribe chunks
|
| 208 |
transcription = transcribe_audio_chunks(chunks)
|
| 209 |
|
|
|
|
|
|
|
| 210 |
# Diarization using OpenAI
|
| 211 |
diarization_prompt = f"""
|
| 212 |
The following is a transcription of a conversation. Please identify different speakers and label them as Speaker 1, Speaker 2, etc. Format the output as a series of speaker labels followed by their dialogue. Here's the transcription:
|
|
|
|
| 87 |
|
| 88 |
def transcribe_audio_chunks(chunks):
|
| 89 |
transcriptions = []
|
| 90 |
+
for i, chunk in enumerate(chunks):
|
| 91 |
+
logger.info(f"Transcribing chunk {i+1}/{len(chunks)}")
|
| 92 |
with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_audio_file:
|
| 93 |
chunk.export(temp_audio_file.name, format="wav")
|
| 94 |
with open(temp_audio_file.name, 'rb') as audio_file:
|
|
|
|
| 205 |
audio = AudioSegment.from_wav(wav_path)
|
| 206 |
chunks = chunk_audio(audio)
|
| 207 |
|
| 208 |
+
logger.info(f"Audio chunked into {len(chunks)} segments")
|
| 209 |
+
|
| 210 |
# Transcribe chunks
|
| 211 |
transcription = transcribe_audio_chunks(chunks)
|
| 212 |
|
| 213 |
+
logger.info(f"Transcription completed. Total length: {len(transcription)} characters")
|
| 214 |
+
|
| 215 |
# Diarization using OpenAI
|
| 216 |
diarization_prompt = f"""
|
| 217 |
The following is a transcription of a conversation. Please identify different speakers and label them as Speaker 1, Speaker 2, etc. Format the output as a series of speaker labels followed by their dialogue. Here's the transcription:
|