bluenevus commited on
Commit
110c781
·
verified ·
1 Parent(s): b3174ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +106 -105
app.py CHANGED
@@ -2,12 +2,15 @@ import base64
2
  import io
3
  import os
4
  import threading
5
- from dash import Dash, dcc, html, Input, Output, State, callback
6
- import dash_bootstrap_components as dbc
7
  import tempfile
8
  import logging
9
  import openai
 
 
10
  from pydub import AudioSegment
 
 
 
11
 
12
  # Configure logging
13
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -25,134 +28,132 @@ openai.api_key = os.getenv("OPENAI_API_KEY")
25
 
26
  # Layout
27
  app.layout = dbc.Container([
28
- html.H1("Audio Transcription and Diarization App", className="text-center my-4"),
29
- dbc.Row([
30
- # Left card for input
31
- dbc.Col([
32
- dbc.Card([
33
- dbc.CardBody([
34
- dcc.Upload(
35
- id='upload-audio',
36
- children=html.Div([
37
- 'Drag and Drop or ',
38
- html.A('Select Audio File')
39
- ]),
40
- style={
41
- 'width': '100%',
42
- 'height': '60px',
43
- 'lineHeight': '60px',
44
- 'borderWidth': '1px',
45
- 'borderStyle': 'dashed',
46
- 'borderRadius': '5px',
47
- 'textAlign': 'center',
48
- 'margin': '10px'
49
- },
50
- multiple=False
51
- ),
52
- html.Div(id='output-audio-upload'),
53
- dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
54
- ])
55
- ], className="mb-4")
56
- ], md=6),
57
- # Right card for output
58
- dbc.Col([
59
- dbc.Card([
60
- dbc.CardBody([
61
- html.H4("Diarized Transcription Preview", className="card-title"),
62
- html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
63
- html.Br(),
64
- dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
65
- dcc.Download(id="download-transcription")
66
- ])
67
- ])
68
- ], md=6)
69
  ])
70
  ], fluid=True)
71
 
72
- def transcribe_and_diarize_audio(contents, filename):
73
  global generated_file, transcription_text
74
  temp_audio_file = None
75
- wav_path = None
76
  try:
77
- content_type, content_string = contents.split(',')
78
- decoded = base64.b64decode(content_string)
79
-
80
- # Create a temporary file that won't be immediately deleted
81
- temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=os.path.splitext(filename)[1])
82
- temp_audio_file.write(decoded)
83
- temp_audio_file.close() # Close the file but don't delete it yet
84
- temp_audio_file_path = temp_audio_file.name
 
 
 
 
 
 
 
85
 
86
- logger.info(f"File uploaded: {temp_audio_file_path}")
87
 
88
- if filename.lower().endswith(('.wav', '.mp3', '.ogg', '.flac')):
89
- logger.info("Audio file detected, transcribing with OpenAI")
90
-
91
- # Convert audio to wav format if needed
92
- audio = AudioSegment.from_file(temp_audio_file_path)
93
- wav_path = temp_audio_file_path + ".wav"
 
 
 
94
  audio.export(wav_path, format="wav")
95
-
96
- with open(wav_path, "rb") as audio_file:
97
- # Transcribe
98
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
99
-
100
- # Rewind the file for diarization
101
- audio_file.seek(0)
102
-
103
- # Perform diarization (speaker detection)
104
- diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
105
-
106
- logger.info(f"OpenAI API Response: {diarized_transcript}")
107
-
108
- # Format the diarized transcript
109
- formatted_transcript = ""
110
- if 'segments' in diarized_transcript:
111
- for segment in diarized_transcript["segments"]:
112
- speaker = segment.get('speaker', 'Unknown')
113
- text = segment.get('text', '')
114
- formatted_transcript += f"Speaker {speaker}: {text}\n\n"
115
- else:
116
- # If no segments, use the full transcript
117
- formatted_transcript = transcript.get('text', 'No transcription available.')
118
-
119
- transcription_text = formatted_transcript
120
- logger.info("Transcription and diarization completed successfully")
121
-
122
- # Prepare the transcription for download
123
- generated_file = io.BytesIO(transcription_text.encode())
124
- return "Transcription and diarization completed successfully!", True
125
  else:
126
- return "Unsupported file format. Please upload an audio file.", False
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  except Exception as e:
128
- logger.error(f"Error during transcription and diarization: {str(e)}")
129
- return f"An error occurred during transcription and diarization: {str(e)}", False
130
  finally:
131
- # Clean up temporary files
132
  if temp_audio_file and os.path.exists(temp_audio_file.name):
133
  os.unlink(temp_audio_file.name)
134
- if wav_path and os.path.exists(wav_path):
135
  os.unlink(wav_path)
136
 
137
  @app.callback(
138
- [Output('output-audio-upload', 'children'),
139
  Output('transcription-status', 'children'),
140
  Output('transcription-preview', 'children'),
141
  Output('btn-download', 'disabled')],
142
- [Input('upload-audio', 'contents')],
143
- [State('upload-audio', 'filename')]
 
 
144
  )
145
- def update_output(contents, filename):
146
- if contents is None:
147
- return "No file uploaded.", "", "", True
 
 
 
 
 
 
 
 
 
 
 
 
148
 
149
- status_message, success = transcribe_and_diarize_audio(contents, filename)
150
-
151
  if success:
152
  preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
153
- return f"File {filename} processed successfully.", status_message, preview, False
154
  else:
155
- return f"File {filename} could not be processed.", status_message, "", True
156
 
157
  @app.callback(
158
  Output("download-transcription", "data"),
 
2
  import io
3
  import os
4
  import threading
 
 
5
  import tempfile
6
  import logging
7
  import openai
8
+ from dash import Dash, dcc, html, Input, Output, State, callback
9
+ import dash_bootstrap_components as dbc
10
  from pydub import AudioSegment
11
+ import requests
12
+ from pytube import YouTube
13
+ import moviepy.editor as mp
14
 
15
  # Configure logging
16
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
28
 
29
  # Layout
30
  app.layout = dbc.Container([
31
+ html.H1("Audio/Video Transcription and Diarization App", className="text-center my-4"),
32
+ dbc.Card([
33
+ dbc.CardBody([
34
+ dcc.Upload(
35
+ id='upload-media',
36
+ children=html.Div([
37
+ 'Drag and Drop or ',
38
+ html.A('Select Audio/Video File')
39
+ ]),
40
+ style={
41
+ 'width': '100%',
42
+ 'height': '60px',
43
+ 'lineHeight': '60px',
44
+ 'borderWidth': '1px',
45
+ 'borderStyle': 'dashed',
46
+ 'borderRadius': '5px',
47
+ 'textAlign': 'center',
48
+ 'margin': '10px'
49
+ },
50
+ multiple=False
51
+ ),
52
+ html.Div(id='output-media-upload'),
53
+ dbc.Input(id="url-input", type="text", placeholder="Enter audio/video URL (including YouTube)", className="mb-3"),
54
+ dbc.Button("Process URL", id="process-url-button", color="primary", className="mb-3"),
55
+ dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
56
+ html.H4("Diarized Transcription Preview", className="mt-4"),
57
+ html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
58
+ html.Br(),
59
+ dbc.Button("Download Transcription", id="btn-download", color="primary", className="mt-3", disabled=True),
60
+ dcc.Download(id="download-transcription")
61
+ ])
 
 
 
 
 
 
 
 
 
 
62
  ])
63
  ], fluid=True)
64
 
65
+ def process_media(file_path, is_url=False):
66
  global generated_file, transcription_text
67
  temp_audio_file = None
 
68
  try:
69
+ if is_url:
70
+ if 'youtube.com' in file_path or 'youtu.be' in file_path:
71
+ yt = YouTube(file_path)
72
+ stream = yt.streams.filter(only_audio=True).first()
73
+ temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
74
+ stream.download(output_path=os.path.dirname(temp_audio_file.name), filename=os.path.basename(temp_audio_file.name))
75
+ else:
76
+ response = requests.get(file_path)
77
+ temp_audio_file = tempfile.NamedTemporaryFile(delete=False)
78
+ temp_audio_file.write(response.content)
79
+ temp_audio_file.close()
80
+ else:
81
+ temp_audio_file = tempfile.NamedTemporaryFile(delete=False)
82
+ temp_audio_file.write(file_path)
83
+ temp_audio_file.close()
84
 
85
+ file_extension = os.path.splitext(temp_audio_file.name)[1].lower()
86
 
87
+ if file_extension in ['.mp4', '.avi', '.mov', '.flv', '.wmv']:
88
+ video = mp.VideoFileClip(temp_audio_file.name)
89
+ audio = video.audio
90
+ wav_path = temp_audio_file.name + ".wav"
91
+ audio.write_audiofile(wav_path)
92
+ video.close()
93
+ elif file_extension in ['.wav', '.mp3', '.ogg', '.flac']:
94
+ audio = AudioSegment.from_file(temp_audio_file.name)
95
+ wav_path = temp_audio_file.name + ".wav"
96
  audio.export(wav_path, format="wav")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  else:
98
+ return "Unsupported file format. Please upload an audio or video file.", False
99
+
100
+ with open(wav_path, "rb") as audio_file:
101
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
102
+ audio_file.seek(0)
103
+ diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
104
+
105
+ formatted_transcript = ""
106
+ if 'segments' in diarized_transcript:
107
+ for segment in diarized_transcript["segments"]:
108
+ speaker = segment.get('speaker', 'Unknown')
109
+ text = segment.get('text', '')
110
+ formatted_transcript += f"Speaker {speaker}: {text}\n\n"
111
+ else:
112
+ formatted_transcript = transcript.get('text', 'No transcription available.')
113
+
114
+ transcription_text = formatted_transcript
115
+ generated_file = io.BytesIO(transcription_text.encode())
116
+ return "Transcription and diarization completed successfully!", True
117
  except Exception as e:
118
+ logger.error(f"Error during processing: {str(e)}")
119
+ return f"An error occurred: {str(e)}", False
120
  finally:
 
121
  if temp_audio_file and os.path.exists(temp_audio_file.name):
122
  os.unlink(temp_audio_file.name)
123
+ if 'wav_path' in locals() and os.path.exists(wav_path):
124
  os.unlink(wav_path)
125
 
126
  @app.callback(
127
+ [Output('output-media-upload', 'children'),
128
  Output('transcription-status', 'children'),
129
  Output('transcription-preview', 'children'),
130
  Output('btn-download', 'disabled')],
131
+ [Input('upload-media', 'contents'),
132
+ Input('process-url-button', 'n_clicks')],
133
+ [State('upload-media', 'filename'),
134
+ State('url-input', 'value')]
135
  )
136
+ def update_output(contents, n_clicks, filename, url):
137
+ ctx = callback_context
138
+ if not ctx.triggered:
139
+ return "No file uploaded or URL processed.", "", "", True
140
+
141
+ trigger_id = ctx.triggered[0]['prop_id'].split('.')[0]
142
+
143
+ if trigger_id == 'upload-media' and contents is not None:
144
+ content_type, content_string = contents.split(',')
145
+ decoded = base64.b64decode(content_string)
146
+ status_message, success = process_media(decoded)
147
+ elif trigger_id == 'process-url-button' and url:
148
+ status_message, success = process_media(url, is_url=True)
149
+ else:
150
+ return "No file uploaded or URL processed.", "", "", True
151
 
 
 
152
  if success:
153
  preview = transcription_text[:1000] + "..." if len(transcription_text) > 1000 else transcription_text
154
+ return f"File processed successfully.", status_message, preview, False
155
  else:
156
+ return "Processing failed.", status_message, "", True
157
 
158
  @app.callback(
159
  Output("download-transcription", "data"),