bluenevus commited on
Commit
a123d64
·
verified ·
1 Parent(s): 369e1f7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +61 -140
app.py CHANGED
@@ -46,134 +46,50 @@ if not OPENAI_API_KEY:
46
 
47
  openai.api_key = OPENAI_API_KEY
48
 
49
- def is_valid_url(url):
50
- try:
51
- result = urlparse(url)
52
- return all([result.scheme, result.netloc])
53
- except ValueError:
54
- logger.error(f"Invalid URL: {url}")
55
- return False
56
-
57
- def download_media(url):
58
- logger.info(f"Attempting to download media from URL: {url}")
59
- try:
60
- if "youtube.com" in url or "youtu.be" in url:
61
- yt = YouTube(url)
62
- stream = yt.streams.filter(progressive=True, file_extension='mp4').first()
63
- with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as temp_file:
64
- stream.download(output_path=os.path.dirname(temp_file.name), filename=temp_file.name)
65
- logger.info(f"YouTube video downloaded: {temp_file.name}")
66
- return temp_file.name
67
- else:
68
- response = requests.get(url)
69
- content_type = response.headers.get('content-type', '')
70
- if 'video' in content_type:
71
- suffix = '.mp4'
72
- elif 'audio' in content_type:
73
- suffix = '.mp3'
74
- else:
75
- suffix = ''
76
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
77
- temp_file.write(response.content)
78
- logger.info(f"Media downloaded: {temp_file.name}")
79
- return temp_file.name
80
- except Exception as e:
81
- logger.error(f"Error downloading media: {str(e)}")
82
- raise
83
-
84
- def extract_audio(file_path):
85
- logger.info(f"Extracting audio from video: {file_path}")
86
- try:
87
- if VideoFileClip is None:
88
- raise ImportError("VideoFileClip is not available. Cannot extract audio.")
89
- video = VideoFileClip(file_path)
90
- audio = video.audio
91
- audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
92
- audio.write_audiofile(audio_file.name)
93
- video.close()
94
- audio.close()
95
- logger.info(f"Audio extracted: {audio_file.name}")
96
- return audio_file.name
97
- except Exception as e:
98
- logger.error(f"Error extracting audio: {str(e)}")
99
- raise
100
-
101
- def transcribe_audio(file_path):
102
- logger.info(f"Transcribing audio: {file_path}")
103
- try:
104
- with open(file_path, "rb") as audio_file:
105
- transcript = openai.Audio.transcribe("whisper-1", audio_file)
106
- logger.info("Transcription completed successfully")
107
- return transcript["text"]
108
- except Exception as e:
109
- logger.error(f"Error during transcription: {str(e)}")
110
- raise
111
-
112
- def process_media(contents, filename, url):
113
- logger.info("Starting media processing")
114
- try:
115
- if contents:
116
- content_type, content_string = contents.split(',')
117
- decoded = base64.b64decode(content_string)
118
- suffix = os.path.splitext(filename)[1]
119
- with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as temp_file:
120
- temp_file.write(decoded)
121
- temp_file_path = temp_file.name
122
- logger.info(f"File uploaded: {temp_file_path}")
123
- elif url:
124
- temp_file_path = download_media(url)
125
- else:
126
- logger.error("No input provided")
127
- raise ValueError("No input provided")
128
-
129
- if temp_file_path.lower().endswith(('.mp4', '.avi', '.mov', '.flv', '.wmv')):
130
- logger.info("Video file detected, extracting audio")
131
- audio_file_path = extract_audio(temp_file_path)
132
- transcript = transcribe_audio(audio_file_path)
133
- os.unlink(audio_file_path)
134
- else:
135
- logger.info("Audio file detected, transcribing directly")
136
- transcript = transcribe_audio(temp_file_path)
137
-
138
- os.unlink(temp_file_path)
139
- return transcript
140
- except Exception as e:
141
- logger.error(f"Error in process_media: {str(e)}")
142
- raise
143
 
144
  app.layout = dbc.Container([
145
- html.H1("Audio/Video Transcription App", className="text-center my-4"),
146
- dbc.Card([
147
- dbc.CardBody([
148
- dcc.Upload(
149
- id='upload-media',
150
- children=html.Div([
151
- 'Drag and Drop or ',
152
- html.A('Select Audio/Video File')
153
- ]),
154
- style={
155
- 'width': '100%',
156
- 'height': '60px',
157
- 'lineHeight': '60px',
158
- 'borderWidth': '1px',
159
- 'borderStyle': 'dashed',
160
- 'borderRadius': '5px',
161
- 'textAlign': 'center',
162
- 'margin': '10px'
163
- },
164
- multiple=False
165
- ),
166
- html.Div(id='file-info', className="mt-2"),
167
- dbc.Input(id="media-url", type="text", placeholder="Enter audio/video URL or YouTube link", className="my-3"),
168
- dbc.Button("Transcribe", id="transcribe-button", color="primary", className="w-100 mb-3"),
169
- dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
170
- html.Div(id="progress-indicator", className="text-center mt-3"),
171
- dbc.Button("Download Transcript", id="download-button", color="secondary", className="w-100 mt-3", style={'display': 'none'}),
172
- dcc.Download(id="download-transcript"),
173
- dcc.Interval(id='progress-interval', interval=500, n_intervals=0, disabled=True)
174
  ])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
175
  ])
176
- ])
177
 
178
  @app.callback(
179
  Output("file-info", "children"),
@@ -190,14 +106,16 @@ def update_file_info(filename, last_modified):
190
  Output("download-button", "style"),
191
  Output("progress-indicator", "children"),
192
  Output("progress-interval", "disabled"),
 
193
  Input("transcribe-button", "n_clicks"),
194
  Input("progress-interval", "n_intervals"),
195
  State("upload-media", "contents"),
196
  State("upload-media", "filename"),
197
  State("media-url", "value"),
 
198
  prevent_initial_call=True
199
  )
200
- def update_transcription(n_clicks, n_intervals, contents, filename, url):
201
  ctx = callback_context
202
  if ctx.triggered_id == "transcribe-button":
203
  if not contents and not url:
@@ -212,40 +130,43 @@ def update_transcription(n_clicks, n_intervals, contents, filename, url):
212
 
213
  thread = threading.Thread(target=transcribe)
214
  thread.start()
215
- return html.Div("Processing..."), {'display': 'none'}, "", False
216
 
217
  elif ctx.triggered_id == "progress-interval":
 
 
218
  dots = "." * (n_intervals % 4)
219
- return html.Div("Processing" + dots), {'display': 'none'}, "", False
220
 
221
  thread = threading.current_thread()
222
  if hasattr(thread, 'result'):
223
  transcript = thread.result
224
  if transcript and not transcript.startswith("An error occurred"):
225
  logger.info("Transcription successful")
226
- return dbc.Card([
227
- dbc.CardBody([
228
- html.H5("Transcription Result"),
229
- html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
230
- ])
231
- ]), {'display': 'block'}, "", True
232
  else:
233
  logger.error(f"Transcription failed: {transcript}")
234
- return transcript, {'display': 'none'}, "", True
235
 
236
- return dash.no_update, dash.no_update, dash.no_update, dash.no_update
 
 
 
 
 
 
 
 
237
 
238
  @app.callback(
239
  Output("download-transcript", "data"),
240
  Input("download-button", "n_clicks"),
241
- State("transcription-output", "children"),
242
  prevent_initial_call=True
243
  )
244
- def download_transcript(n_clicks, transcription_output):
245
- if not transcription_output:
246
  raise PreventUpdate
247
-
248
- transcript = transcription_output['props']['children'][0]['props']['children'][1]['props']['children']
249
  return dict(content=transcript, filename="transcript.txt")
250
 
251
  if __name__ == '__main__':
 
46
 
47
  openai.api_key = OPENAI_API_KEY
48
 
49
+ # ... (keep all the helper functions as they are) ...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  app.layout = dbc.Container([
52
+ dbc.Row([
53
+ dbc.Col([
54
+ html.H1("Audio/Video Transcription App", className="text-center my-4"),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  ])
56
+ ]),
57
+ dbc.Row([
58
+ dbc.Col([
59
+ dbc.Card([
60
+ dbc.CardBody([
61
+ dcc.Upload(
62
+ id='upload-media',
63
+ children=html.Div([
64
+ 'Drag and Drop or ',
65
+ html.A('Select Audio/Video File')
66
+ ]),
67
+ style={
68
+ 'width': '100%',
69
+ 'height': '60px',
70
+ 'lineHeight': '60px',
71
+ 'borderWidth': '1px',
72
+ 'borderStyle': 'dashed',
73
+ 'borderRadius': '5px',
74
+ 'textAlign': 'center',
75
+ 'margin': '10px'
76
+ },
77
+ multiple=False
78
+ ),
79
+ html.Div(id='file-info', className="mt-2"),
80
+ dbc.Input(id="media-url", type="text", placeholder="Enter audio/video URL or YouTube link", className="my-3"),
81
+ dbc.Button("Transcribe", id="transcribe-button", color="primary", className="w-100 mb-3"),
82
+ dbc.Spinner(html.Div(id="transcription-output", className="mt-3")),
83
+ html.Div(id="progress-indicator", className="text-center mt-3"),
84
+ dbc.Button("Download Transcript", id="download-button", color="secondary", className="w-100 mt-3", style={'display': 'none'}),
85
+ dcc.Download(id="download-transcript"),
86
+ dcc.Store(id="transcription-store"),
87
+ dcc.Interval(id='progress-interval', interval=500, n_intervals=0, disabled=True)
88
+ ])
89
+ ])
90
+ ], width=12)
91
  ])
92
+ ], fluid=True)
93
 
94
  @app.callback(
95
  Output("file-info", "children"),
 
106
  Output("download-button", "style"),
107
  Output("progress-indicator", "children"),
108
  Output("progress-interval", "disabled"),
109
+ Output("transcription-store", "data"),
110
  Input("transcribe-button", "n_clicks"),
111
  Input("progress-interval", "n_intervals"),
112
  State("upload-media", "contents"),
113
  State("upload-media", "filename"),
114
  State("media-url", "value"),
115
+ State("transcription-store", "data"),
116
  prevent_initial_call=True
117
  )
118
+ def update_transcription(n_clicks, n_intervals, contents, filename, url, stored_transcript):
119
  ctx = callback_context
120
  if ctx.triggered_id == "transcribe-button":
121
  if not contents and not url:
 
130
 
131
  thread = threading.Thread(target=transcribe)
132
  thread.start()
133
+ return html.Div("Processing..."), {'display': 'none'}, "", False, None
134
 
135
  elif ctx.triggered_id == "progress-interval":
136
+ if stored_transcript:
137
+ return display_transcript(stored_transcript), {'display': 'block'}, "", True, stored_transcript
138
  dots = "." * (n_intervals % 4)
139
+ return html.Div("Processing" + dots), {'display': 'none'}, "", False, None
140
 
141
  thread = threading.current_thread()
142
  if hasattr(thread, 'result'):
143
  transcript = thread.result
144
  if transcript and not transcript.startswith("An error occurred"):
145
  logger.info("Transcription successful")
146
+ return display_transcript(transcript), {'display': 'block'}, "", True, transcript
 
 
 
 
 
147
  else:
148
  logger.error(f"Transcription failed: {transcript}")
149
+ return html.Div(transcript), {'display': 'none'}, "", True, None
150
 
151
+ return dash.no_update, dash.no_update, dash.no_update, dash.no_update, dash.no_update
152
+
153
+ def display_transcript(transcript):
154
+ return dbc.Card([
155
+ dbc.CardBody([
156
+ html.H5("Transcription Result"),
157
+ html.Pre(transcript, style={"white-space": "pre-wrap", "word-wrap": "break-word"})
158
+ ])
159
+ ])
160
 
161
  @app.callback(
162
  Output("download-transcript", "data"),
163
  Input("download-button", "n_clicks"),
164
+ State("transcription-store", "data"),
165
  prevent_initial_call=True
166
  )
167
+ def download_transcript(n_clicks, transcript):
168
+ if not transcript:
169
  raise PreventUpdate
 
 
170
  return dict(content=transcript, filename="transcript.txt")
171
 
172
  if __name__ == '__main__':