bluenevus commited on
Commit
6a11fc5
·
verified ·
1 Parent(s): 57c2b38

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +42 -17
app.py CHANGED
@@ -10,6 +10,8 @@ import dash_bootstrap_components as dbc
10
  from pydub import AudioSegment
11
  import requests
12
  from pytube import YouTube
 
 
13
 
14
  # Configure logging
15
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
@@ -81,43 +83,65 @@ def process_media(file_path, is_url=False):
81
  wav_path = None
82
  try:
83
  if is_url:
 
84
  if 'youtube.com' in file_path or 'youtu.be' in file_path:
85
- yt = YouTube(file_path)
86
- stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
87
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
88
- stream.download(output_path=os.path.dirname(temp_file.name), filename=os.path.basename(temp_file.name))
 
 
 
 
 
89
  else:
90
- response = requests.get(file_path)
91
- content_type = response.headers.get('content-type', '')
92
- if 'audio' in content_type:
93
- suffix = '.mp3'
94
- elif 'video' in content_type:
95
- suffix = '.mp4'
96
- else:
97
- suffix = ''
98
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
99
- temp_file.write(response.content)
100
- temp_file.close()
 
 
 
 
 
 
 
 
101
  else:
 
102
  temp_file = tempfile.NamedTemporaryFile(delete=False)
103
  temp_file.write(file_path)
104
  temp_file.close()
 
105
 
106
  file_extension = os.path.splitext(temp_file.name)[1].lower()
 
107
 
108
  if file_extension in VIDEO_FORMATS:
 
109
  video = VideoFileClip(temp_file.name)
110
  audio = video.audio
111
  wav_path = temp_file.name + ".wav"
112
  audio.write_audiofile(wav_path)
113
  video.close()
114
- elif file_extension in AUDIO_FORMATS:
115
- audio = AudioSegment.from_file(temp_file.name)
 
116
  wav_path = temp_file.name + ".wav"
117
  audio.export(wav_path, format="wav")
118
  else:
 
119
  return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
120
 
 
 
121
  with open(wav_path, "rb") as audio_file:
122
  transcript = openai.Audio.transcribe("whisper-1", audio_file)
123
  audio_file.seek(0)
@@ -134,6 +158,7 @@ def process_media(file_path, is_url=False):
134
 
135
  transcription_text = formatted_transcript
136
  generated_file = io.BytesIO(transcription_text.encode())
 
137
  return "Transcription and diarization completed successfully!", True
138
  except Exception as e:
139
  logger.error(f"Error during processing: {str(e)}")
 
10
  from pydub import AudioSegment
11
  import requests
12
  from pytube import YouTube
13
+ import mimetypes
14
+ import urllib.parse
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
 
83
  wav_path = None
84
  try:
85
  if is_url:
86
+ logger.info(f"Processing URL: {file_path}")
87
  if 'youtube.com' in file_path or 'youtu.be' in file_path:
88
+ try:
89
+ yt = YouTube(file_path)
90
+ stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
91
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
92
+ stream.download(output_path=os.path.dirname(temp_file.name), filename=os.path.basename(temp_file.name))
93
+ logger.info(f"YouTube video downloaded: {temp_file.name}")
94
+ except Exception as e:
95
+ logger.error(f"Error downloading YouTube video: {str(e)}")
96
+ return f"Error downloading YouTube video: {str(e)}", False
97
  else:
98
+ try:
99
+ response = requests.get(file_path)
100
+ response.raise_for_status()
101
+ content_type = response.headers.get('content-type', '')
102
+ logger.info(f"URL content type: {content_type}")
103
+
104
+ # Determine file extension from URL or content type
105
+ url_path = urllib.parse.urlparse(file_path).path
106
+ ext = os.path.splitext(url_path)[1]
107
+ if not ext:
108
+ ext = mimetypes.guess_extension(content_type) or ''
109
+
110
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
111
+ temp_file.write(response.content)
112
+ temp_file.close()
113
+ logger.info(f"URL content downloaded: {temp_file.name}")
114
+ except Exception as e:
115
+ logger.error(f"Error downloading URL content: {str(e)}")
116
+ return f"Error downloading URL content: {str(e)}", False
117
  else:
118
+ logger.info("Processing uploaded file")
119
  temp_file = tempfile.NamedTemporaryFile(delete=False)
120
  temp_file.write(file_path)
121
  temp_file.close()
122
+ logger.info(f"Uploaded file saved: {temp_file.name}")
123
 
124
  file_extension = os.path.splitext(temp_file.name)[1].lower()
125
+ logger.info(f"Detected file extension: {file_extension}")
126
 
127
  if file_extension in VIDEO_FORMATS:
128
+ logger.info("Processing video file")
129
  video = VideoFileClip(temp_file.name)
130
  audio = video.audio
131
  wav_path = temp_file.name + ".wav"
132
  audio.write_audiofile(wav_path)
133
  video.close()
134
+ elif file_extension in AUDIO_FORMATS or not file_extension:
135
+ logger.info("Processing audio file")
136
+ audio = AudioSegment.from_file(temp_file.name, format=file_extension[1:] if file_extension else None)
137
  wav_path = temp_file.name + ".wav"
138
  audio.export(wav_path, format="wav")
139
  else:
140
+ logger.error(f"Unsupported file format: {file_extension}")
141
  return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
142
 
143
+ logger.info(f"Audio extracted to WAV: {wav_path}")
144
+
145
  with open(wav_path, "rb") as audio_file:
146
  transcript = openai.Audio.transcribe("whisper-1", audio_file)
147
  audio_file.seek(0)
 
158
 
159
  transcription_text = formatted_transcript
160
  generated_file = io.BytesIO(transcription_text.encode())
161
+ logger.info("Transcription and diarization completed successfully")
162
  return "Transcription and diarization completed successfully!", True
163
  except Exception as e:
164
  logger.error(f"Error during processing: {str(e)}")