bluenevus commited on
Commit
e99776b
·
verified ·
1 Parent(s): 2e288e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -55
app.py CHANGED
@@ -9,7 +9,6 @@ from dash import Dash, dcc, html, Input, Output, State, callback, callback_conte
9
  import dash_bootstrap_components as dbc
10
  from pydub import AudioSegment
11
  import requests
12
- import yt_dlp
13
  import mimetypes
14
  import urllib.parse
15
 
@@ -65,8 +64,8 @@ app.layout = dbc.Container([
65
  multiple=False
66
  ),
67
  html.Div(id='output-media-upload'),
68
- dbc.Input(id="url-input", type="text", placeholder="Enter audio/video URL (including YouTube)", className="mb-3"),
69
- dbc.Button("Process URL", id="process-url-button", color="primary", className="mb-3"),
70
  dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
71
  html.H4("Diarized Transcription Preview", className="mt-4"),
72
  html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
@@ -77,6 +76,12 @@ app.layout = dbc.Container([
77
  ])
78
  ], fluid=True)
79
 
 
 
 
 
 
 
80
  def process_media(file_path, is_url=False):
81
  global generated_file, transcription_text
82
  temp_file = None
@@ -84,68 +89,64 @@ def process_media(file_path, is_url=False):
84
  try:
85
  if is_url:
86
  logger.info(f"Processing URL: {file_path}")
87
- try:
88
- ydl_opts = {
89
- 'format': 'bestaudio/best',
90
- 'postprocessors': [{
91
- 'key': 'FFmpegExtractAudio',
92
- 'preferredcodec': 'wav',
93
- }],
94
- 'outtmpl': '%(id)s.%(ext)s',
95
- }
96
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
97
- info = ydl.extract_info(file_path, download=True)
98
- wav_path = f"{info['id']}.wav"
99
- logger.info(f"Audio downloaded: {wav_path}")
100
- except Exception as e:
101
- logger.error(f"Error downloading audio from URL: {str(e)}")
102
- return f"Error downloading audio from URL: {str(e)}", False
103
  else:
104
  logger.info("Processing uploaded file")
105
  temp_file = tempfile.NamedTemporaryFile(delete=False)
106
  temp_file.write(file_path)
107
  temp_file.close()
108
- logger.info(f"Uploaded file saved: {temp_file.name}")
109
 
110
- file_extension = os.path.splitext(temp_file.name)[1].lower()
111
- logger.info(f"Detected file extension: {file_extension}")
112
-
113
- if file_extension in VIDEO_FORMATS:
114
- logger.info("Processing video file")
115
- video = VideoFileClip(temp_file.name)
116
- audio = video.audio
117
- wav_path = temp_file.name + ".wav"
118
- audio.write_audiofile(wav_path)
119
- video.close()
120
- elif file_extension in AUDIO_FORMATS:
121
- logger.info("Processing audio file")
122
- audio = AudioSegment.from_file(temp_file.name, format=file_extension[1:])
123
- wav_path = temp_file.name + ".wav"
124
- audio.export(wav_path, format="wav")
125
- else:
126
- logger.error(f"Unsupported file format: {file_extension}")
127
- return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
128
 
129
  logger.info(f"Audio extracted to WAV: {wav_path}")
130
 
131
- with open(wav_path, "rb") as audio_file:
132
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
133
- audio_file.seek(0)
134
- diarized_transcript = openai.Audio.transcribe("whisper-1", audio_file, response_format="verbose_json")
135
-
136
- formatted_transcript = ""
137
- if 'segments' in diarized_transcript:
138
- for segment in diarized_transcript["segments"]:
139
- speaker = segment.get('speaker', 'Unknown')
140
- text = segment.get('text', '')
141
- formatted_transcript += f"Speaker {speaker}: {text}\n\n"
142
- else:
143
- formatted_transcript = transcript.get('text', 'No transcription available.')
144
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  transcription_text = formatted_transcript
146
  generated_file = io.BytesIO(transcription_text.encode())
147
- logger.info("Transcription and diarization completed successfully")
148
- return "Transcription and diarization completed successfully!", True
149
  except Exception as e:
150
  logger.error(f"Error during processing: {str(e)}")
151
  return f"An error occurred: {str(e)}", False
@@ -154,7 +155,7 @@ def process_media(file_path, is_url=False):
154
  os.unlink(temp_file.name)
155
  if wav_path and os.path.exists(wav_path):
156
  os.unlink(wav_path)
157
-
158
  @app.callback(
159
  [Output('output-media-upload', 'children'),
160
  Output('transcription-status', 'children'),
@@ -196,7 +197,7 @@ def update_output(contents, n_clicks, filename, url):
196
  def download_transcription(n_clicks):
197
  if n_clicks is None:
198
  return None
199
- return dcc.send_bytes(generated_file.getvalue(), "diarized_transcription.txt")
200
 
201
  if __name__ == '__main__':
202
  print("Starting the Dash application...")
 
9
  import dash_bootstrap_components as dbc
10
  from pydub import AudioSegment
11
  import requests
 
12
  import mimetypes
13
  import urllib.parse
14
 
 
64
  multiple=False
65
  ),
66
  html.Div(id='output-media-upload'),
67
+ dbc.Input(id="url-input", type="text", placeholder="Enter audio/video URL", className="mb-3"),
68
+ dbc.Button("Process Media", id="process-url-button", color="primary", className="mb-3"),
69
  dbc.Spinner(html.Div(id='transcription-status'), color="primary", type="grow"),
70
  html.H4("Diarized Transcription Preview", className="mt-4"),
71
  html.Div(id='transcription-preview', style={'whiteSpace': 'pre-wrap'}),
 
76
  ])
77
  ], fluid=True)
78
 
79
+ def chunk_audio(audio_segment, chunk_length_ms=60000):
80
+ chunks = []
81
+ for i in range(0, len(audio_segment), chunk_length_ms):
82
+ chunks.append(audio_segment[i:i+chunk_length_ms])
83
+ return chunks
84
+
85
  def process_media(file_path, is_url=False):
86
  global generated_file, transcription_text
87
  temp_file = None
 
89
  try:
90
  if is_url:
91
  logger.info(f"Processing URL: {file_path}")
92
+ response = requests.get(file_path)
93
+ content_type = response.headers.get('content-type', '')
94
+ if 'audio' in content_type:
95
+ suffix = '.mp3'
96
+ elif 'video' in content_type:
97
+ suffix = '.mp4'
98
+ else:
99
+ suffix = ''
100
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
101
+ temp_file.write(response.content)
102
+ temp_file.close()
 
 
 
 
 
103
  else:
104
  logger.info("Processing uploaded file")
105
  temp_file = tempfile.NamedTemporaryFile(delete=False)
106
  temp_file.write(file_path)
107
  temp_file.close()
 
108
 
109
+ file_extension = os.path.splitext(temp_file.name)[1].lower()
110
+ logger.info(f"Detected file extension: {file_extension}")
111
+
112
+ if file_extension in VIDEO_FORMATS:
113
+ logger.info("Processing video file")
114
+ video = VideoFileClip(temp_file.name)
115
+ audio = video.audio
116
+ wav_path = temp_file.name + ".wav"
117
+ audio.write_audiofile(wav_path)
118
+ video.close()
119
+ elif file_extension in AUDIO_FORMATS:
120
+ logger.info("Processing audio file")
121
+ audio = AudioSegment.from_file(temp_file.name, format=file_extension[1:])
122
+ wav_path = temp_file.name + ".wav"
123
+ audio.export(wav_path, format="wav")
124
+ else:
125
+ logger.error(f"Unsupported file format: {file_extension}")
126
+ return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
127
 
128
  logger.info(f"Audio extracted to WAV: {wav_path}")
129
 
130
+ audio = AudioSegment.from_wav(wav_path)
131
+ chunks = chunk_audio(audio)
 
 
 
 
 
 
 
 
 
 
 
132
 
133
+ full_transcript = ""
134
+ for i, chunk in enumerate(chunks):
135
+ logger.info(f"Processing chunk {i+1}/{len(chunks)}")
136
+ chunk_file = tempfile.NamedTemporaryFile(delete=False, suffix='.wav')
137
+ chunk.export(chunk_file.name, format="wav")
138
+
139
+ with open(chunk_file.name, "rb") as audio_file:
140
+ transcript = openai.Audio.transcribe("whisper-1", audio_file)
141
+ full_transcript += transcript.get('text', '') + " "
142
+
143
+ os.unlink(chunk_file.name)
144
+
145
+ formatted_transcript = full_transcript.strip()
146
  transcription_text = formatted_transcript
147
  generated_file = io.BytesIO(transcription_text.encode())
148
+ logger.info("Transcription completed successfully")
149
+ return "Transcription completed successfully!", True
150
  except Exception as e:
151
  logger.error(f"Error during processing: {str(e)}")
152
  return f"An error occurred: {str(e)}", False
 
155
  os.unlink(temp_file.name)
156
  if wav_path and os.path.exists(wav_path):
157
  os.unlink(wav_path)
158
+
159
  @app.callback(
160
  [Output('output-media-upload', 'children'),
161
  Output('transcription-status', 'children'),
 
197
  def download_transcription(n_clicks):
198
  if n_clicks is None:
199
  return None
200
+ return dcc.send_bytes(generated_file.getvalue(), "transcription.txt")
201
 
202
  if __name__ == '__main__':
203
  print("Starting the Dash application...")