Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ import dash_bootstrap_components as dbc
|
|
10 |
from pydub import AudioSegment
|
11 |
import requests
|
12 |
from pytube import YouTube
|
|
|
|
|
13 |
|
14 |
# Configure logging
|
15 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
@@ -81,43 +83,65 @@ def process_media(file_path, is_url=False):
|
|
81 |
wav_path = None
|
82 |
try:
|
83 |
if is_url:
|
|
|
84 |
if 'youtube.com' in file_path or 'youtu.be' in file_path:
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
|
|
|
|
|
|
|
|
|
|
89 |
else:
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
|
100 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
101 |
else:
|
|
|
102 |
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
103 |
temp_file.write(file_path)
|
104 |
temp_file.close()
|
|
|
105 |
|
106 |
file_extension = os.path.splitext(temp_file.name)[1].lower()
|
|
|
107 |
|
108 |
if file_extension in VIDEO_FORMATS:
|
|
|
109 |
video = VideoFileClip(temp_file.name)
|
110 |
audio = video.audio
|
111 |
wav_path = temp_file.name + ".wav"
|
112 |
audio.write_audiofile(wav_path)
|
113 |
video.close()
|
114 |
-
elif file_extension in AUDIO_FORMATS:
|
115 |
-
audio
|
|
|
116 |
wav_path = temp_file.name + ".wav"
|
117 |
audio.export(wav_path, format="wav")
|
118 |
else:
|
|
|
119 |
return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
|
120 |
|
|
|
|
|
121 |
with open(wav_path, "rb") as audio_file:
|
122 |
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
123 |
audio_file.seek(0)
|
@@ -134,6 +158,7 @@ def process_media(file_path, is_url=False):
|
|
134 |
|
135 |
transcription_text = formatted_transcript
|
136 |
generated_file = io.BytesIO(transcription_text.encode())
|
|
|
137 |
return "Transcription and diarization completed successfully!", True
|
138 |
except Exception as e:
|
139 |
logger.error(f"Error during processing: {str(e)}")
|
|
|
10 |
from pydub import AudioSegment
|
11 |
import requests
|
12 |
from pytube import YouTube
|
13 |
+
import mimetypes
|
14 |
+
import urllib.parse
|
15 |
|
16 |
# Configure logging
|
17 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
83 |
wav_path = None
|
84 |
try:
|
85 |
if is_url:
|
86 |
+
logger.info(f"Processing URL: {file_path}")
|
87 |
if 'youtube.com' in file_path or 'youtu.be' in file_path:
|
88 |
+
try:
|
89 |
+
yt = YouTube(file_path)
|
90 |
+
stream = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first()
|
91 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
|
92 |
+
stream.download(output_path=os.path.dirname(temp_file.name), filename=os.path.basename(temp_file.name))
|
93 |
+
logger.info(f"YouTube video downloaded: {temp_file.name}")
|
94 |
+
except Exception as e:
|
95 |
+
logger.error(f"Error downloading YouTube video: {str(e)}")
|
96 |
+
return f"Error downloading YouTube video: {str(e)}", False
|
97 |
else:
|
98 |
+
try:
|
99 |
+
response = requests.get(file_path)
|
100 |
+
response.raise_for_status()
|
101 |
+
content_type = response.headers.get('content-type', '')
|
102 |
+
logger.info(f"URL content type: {content_type}")
|
103 |
+
|
104 |
+
# Determine file extension from URL or content type
|
105 |
+
url_path = urllib.parse.urlparse(file_path).path
|
106 |
+
ext = os.path.splitext(url_path)[1]
|
107 |
+
if not ext:
|
108 |
+
ext = mimetypes.guess_extension(content_type) or ''
|
109 |
+
|
110 |
+
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=ext)
|
111 |
+
temp_file.write(response.content)
|
112 |
+
temp_file.close()
|
113 |
+
logger.info(f"URL content downloaded: {temp_file.name}")
|
114 |
+
except Exception as e:
|
115 |
+
logger.error(f"Error downloading URL content: {str(e)}")
|
116 |
+
return f"Error downloading URL content: {str(e)}", False
|
117 |
else:
|
118 |
+
logger.info("Processing uploaded file")
|
119 |
temp_file = tempfile.NamedTemporaryFile(delete=False)
|
120 |
temp_file.write(file_path)
|
121 |
temp_file.close()
|
122 |
+
logger.info(f"Uploaded file saved: {temp_file.name}")
|
123 |
|
124 |
file_extension = os.path.splitext(temp_file.name)[1].lower()
|
125 |
+
logger.info(f"Detected file extension: {file_extension}")
|
126 |
|
127 |
if file_extension in VIDEO_FORMATS:
|
128 |
+
logger.info("Processing video file")
|
129 |
video = VideoFileClip(temp_file.name)
|
130 |
audio = video.audio
|
131 |
wav_path = temp_file.name + ".wav"
|
132 |
audio.write_audiofile(wav_path)
|
133 |
video.close()
|
134 |
+
elif file_extension in AUDIO_FORMATS or not file_extension:
|
135 |
+
logger.info("Processing audio file")
|
136 |
+
audio = AudioSegment.from_file(temp_file.name, format=file_extension[1:] if file_extension else None)
|
137 |
wav_path = temp_file.name + ".wav"
|
138 |
audio.export(wav_path, format="wav")
|
139 |
else:
|
140 |
+
logger.error(f"Unsupported file format: {file_extension}")
|
141 |
return f"Unsupported file format: {file_extension}. Please upload a supported audio or video file.", False
|
142 |
|
143 |
+
logger.info(f"Audio extracted to WAV: {wav_path}")
|
144 |
+
|
145 |
with open(wav_path, "rb") as audio_file:
|
146 |
transcript = openai.Audio.transcribe("whisper-1", audio_file)
|
147 |
audio_file.seek(0)
|
|
|
158 |
|
159 |
transcription_text = formatted_transcript
|
160 |
generated_file = io.BytesIO(transcription_text.encode())
|
161 |
+
logger.info("Transcription and diarization completed successfully")
|
162 |
return "Transcription and diarization completed successfully!", True
|
163 |
except Exception as e:
|
164 |
logger.error(f"Error during processing: {str(e)}")
|