File size: 6,292 Bytes
4e7ec06 1100e65 b09f327 639051f 4e7ec06 53bdf99 4e7ec06 7a3a01f 4e7ec06 953582f 4e7ec06 ce4312e 639051f 4e7ec06 82b85b5 4e7ec06 82b85b5 4e7ec06 4ed1e63 4e7ec06 53bdf99 4e7ec06 a123d64 4e7ec06 a123d64 4e7ec06 a123d64 4e7ec06 a123d64 4e7ec06 a123d64 4e7ec06 53bdf99 a123d64 53bdf99 4e7ec06 dce154d 4e7ec06 dce154d 4e7ec06 dce154d 4e7ec06 dce154d 4e7ec06 dce154d 04933a2 53bdf99 4e7ec06 53bdf99 4e7ec06 53bdf99 26cf8bb 4e7ec06 26cf8bb 4e7ec06 81f702f 6575bf4 4e7ec06 31b9df5 4e7ec06 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 |
import base64
import io
import os
import threading
from dash import Dash, dcc, html, Input, Output, State, callback
import dash_bootstrap_components as dbc
import tempfile
import logging
import openai
from pydub import AudioSegment
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Initialize the Dash app
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
# Global variables
generated_file = None
transcription_text = ""
# Set up OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Layout
app.layout = dbc.Container([
html.H1("Audio Transcription and Diarization App", className="text-center my-4"),
dbc.Row([
# Left card for input
dbc.Col([
dbc.Card([
dbc.CardBody([
dcc.Upload(
id='upload-audio',
children=html.Div([
'Drag and Drop or ',
html.A('Select Audio File')
]),
style={
'width': '100%',
'height': '60px',
'lineHeight': '60px',
'borderWidth': '1px',
'borderStyle': 'dashed',
'borderRadius': '5px',
'textAlign': 'center',
'margin': '10px'
},
multiple=False
),
html.Div(id='output-audio-upload'),
dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
])
], className="mb-4")
], md=6),
# Right card for output
dbc.Col([
dbc.Card([
dbc.CardBody([
html.H4("Diarized Transcription Preview", className="card-title"),
html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
html.Br(),
dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
dcc.Download(id="download-transcription")
])
])
], md=6)
])
], fluid=True)
def transcribe_and_diarize_audio(contents, filename):
global generated_file, transcription_text
temp_audio_file = None
wav_file = None
try:
content_type, content_string = contents.split(',')
decoded = base64.b64decode(content_string)
# Create a temporary file that won't be immediately deleted
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1])
temp_audio_file.write(decoded)
temp_audio_file.close() # Close the file but don't delete it yet
temp_audio_file_path = temp_audio_file.name
logger.info(f"File uploaded: {temp_audio_file_path}")
if filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
logger.info("Audio file detected, transcribing with OpenAI")
# Convert audio to wav format if needed
audio = AudioSegment.from_file(temp_audio_file_path)
wav_path = temp_audio_file_path + ".wav"
audio.export(wav_path, format="wav")
with open(wav_path, "rb") as audio_file:
# Transcribe
transcript = openai.Audio.transcribe("whisper-1", audio_file)
# Rewind the file for diarization
audio_file.seek(0)
# Perform diarization (speaker separation)
diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, speaker_detection=2)
# Format the diarized transcript
formatted_transcript = ""
for segment in diarized_transcript["segments"]:
formatted_transcript += f"Speaker {segment['speaker']}: {segment['text']}\n\n"
transcription_text = formatted_transcript
logger.info("Transcription and diarization completed successfully")
# Prepare the transcription for download
generated_file = io.BytesIO(transcription_text.encode())
return "Transcription and diarization completed successfully!", True
else:
return "Unsupported file format. Please upload an audio file.", False
except Exception as e:
logger.error(f"Error during transcription and diarization: {str(e)}")
return f"An error occurred during transcription and diarization: {str(e)}", False
finally:
# Clean up temporary files
if temp_audio_file:
os.unlink(temp_audio_file.name)
if wav_file:
os.unlink(wav_file)
@app.callback(
[Output('output-audio-upload', 'children'),
Output('transcription-status', 'children'),
Output('transcription-preview', 'children'),
Output('btn-download', 'disabled')],
[Input('upload-audio', 'contents')],
[State('upload-audio', 'filename')]
)
def update_output(contents, filename):
if contents is None:
return "No file uploaded.", "", "", True
status_message, success = transcribe_and_diarize_audio(contents, filename)
if success:
preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
return f"File {filename} processed successfully.", status_message, preview, False
else:
return f"File {filename} could not be processed.", status_message, "", True
@app.callback(
Output("download-transcription", "data"),
Input("btn-download", "n_clicks"),
prevent_initial_call=True,
)
def download_transcription(n_clicks):
if n_clicks is None:
return None
return dcc.send_bytes(generated_file.getvalue(), "diarized_transcription.txt")
if __name__ == '__main__':
print("Starting the Dash application...")
app.run(debug=True, host='0.0.0.0', port=7860)
print("Dash application has finished running.") |