Spaces:
Paused
Paused
WIP
Browse files
app.py
CHANGED
@@ -127,70 +127,153 @@ def download_youtube_audio(url):
|
|
127 |
Raises:
|
128 |
gr.Error: If download fails or API key is not set
|
129 |
"""
|
|
|
|
|
130 |
if not SIEVE_API_KEY:
|
|
|
131 |
raise gr.Error("SIEVE_API_KEY environment variable is not set")
|
132 |
-
|
133 |
try:
|
134 |
# Create a temporary file for the audio
|
135 |
temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
|
136 |
temp_file.close()
|
137 |
output_path = temp_file.name
|
|
|
138 |
|
139 |
-
# Prepare the request to Sieve API
|
140 |
payload = {
|
141 |
"function": "sieve/youtube-downloader",
|
142 |
"inputs": {
|
143 |
"url": url,
|
144 |
-
"download_type": "audio",
|
145 |
-
"
|
|
|
|
|
|
|
146 |
"include_metadata": False,
|
147 |
-
"
|
|
|
|
|
|
|
|
|
148 |
}
|
149 |
}
|
|
|
150 |
|
151 |
-
# Send request to Sieve API
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
|
160 |
-
if not job_id:
|
161 |
-
raise gr.Error("Failed to get job ID from Sieve API")
|
162 |
-
|
163 |
-
# Poll for job completion
|
164 |
while True:
|
165 |
-
|
166 |
-
|
167 |
-
headers={"X-API-Key": SIEVE_API_KEY}
|
168 |
-
)
|
169 |
-
job_response.raise_for_status()
|
170 |
-
job_data = job_response.json()
|
171 |
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
|
|
|
|
|
|
|
|
180 |
|
181 |
-
|
182 |
-
|
|
|
|
|
|
|
|
|
|
|
183 |
|
184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
185 |
|
186 |
-
|
187 |
-
|
188 |
|
189 |
-
|
190 |
-
|
|
|
|
|
|
|
191 |
|
|
|
|
|
|
|
192 |
except Exception as e:
|
193 |
-
logger.exception(f"
|
194 |
raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
|
195 |
|
196 |
def transcribe_youtube(url, return_timestamps, generate_subs):
|
@@ -204,16 +287,25 @@ def transcribe_youtube(url, return_timestamps, generate_subs):
|
|
204 |
Returns:
|
205 |
tuple: (formatted_result, srt_file, correction_text)
|
206 |
"""
|
|
|
|
|
|
|
207 |
try:
|
208 |
# Download audio from YouTube
|
|
|
209 |
audio_path = download_youtube_audio(url)
|
|
|
210 |
|
211 |
# Transcribe the downloaded audio
|
|
|
212 |
result = transcribe(audio_path, return_timestamps, generate_subs)
|
|
|
213 |
|
214 |
# Clean up the temporary file
|
|
|
215 |
try:
|
216 |
os.unlink(audio_path)
|
|
|
217 |
except Exception as e:
|
218 |
logger.warning(f"Failed to delete temporary file: {str(e)}")
|
219 |
|
@@ -240,32 +332,39 @@ def transcribe(inputs, return_timestamps, generate_subs):
|
|
240 |
Raises:
|
241 |
gr.Error: If no audio file is provided or transcription fails.
|
242 |
"""
|
|
|
|
|
|
|
243 |
if inputs is None:
|
244 |
logger.warning("No audio file submitted")
|
245 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
246 |
|
247 |
try:
|
248 |
-
logger.info(f"Processing audio file: {inputs}")
|
249 |
-
|
250 |
# Read the audio file
|
|
|
251 |
with open(inputs, "rb") as f:
|
252 |
data = f.read()
|
|
|
|
|
253 |
|
254 |
# Send request to API
|
|
|
255 |
response = requests.post(API_URL, headers=headers, data=data)
|
256 |
-
response.raise_for_status()
|
257 |
result = response.json()
|
258 |
-
|
259 |
logger.debug(f"API response: {result}")
|
|
|
260 |
|
261 |
# Format response as JSON
|
|
|
262 |
formatted_result = {
|
263 |
"text": result.get("text", "")
|
264 |
}
|
|
|
265 |
|
266 |
chunks = []
|
267 |
if return_timestamps and "chunks" in result:
|
268 |
-
logger.info(f"Processing {len(result['chunks'])} chunks")
|
269 |
for i, chunk in enumerate(result["chunks"]):
|
270 |
logger.debug(f"Processing chunk {i}: {chunk}")
|
271 |
try:
|
@@ -286,17 +385,19 @@ def transcribe(inputs, return_timestamps, generate_subs):
|
|
286 |
continue
|
287 |
|
288 |
formatted_result["chunks"] = chunks
|
289 |
-
logger.info(f"Successfully processed
|
290 |
|
291 |
# Generate subtitles if requested
|
292 |
srt_file = None
|
293 |
if generate_subs and chunks:
|
294 |
-
logger.info("Generating SRT subtitles")
|
295 |
srt_content = generate_srt(chunks)
|
296 |
srt_file = save_srt_to_file(srt_content)
|
297 |
-
logger.info("SRT
|
298 |
|
|
|
299 |
return formatted_result, srt_file, "" # Return empty string for correction textbox
|
|
|
300 |
except requests.exceptions.RequestException as e:
|
301 |
logger.exception(f"API request failed: {str(e)}")
|
302 |
raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
|
@@ -323,7 +424,10 @@ youtube_transcribe = gr.Interface(
|
|
323 |
description=(
|
324 |
"Transcribe Tajik language audio from YouTube videos. "
|
325 |
"Paste a YouTube URL and get accurate transcription with optional timestamps "
|
326 |
-
"and subtitles
|
|
|
|
|
|
|
327 |
)
|
328 |
)
|
329 |
|
@@ -365,11 +469,10 @@ file_transcribe = gr.Interface(
|
|
365 |
)
|
366 |
)
|
367 |
|
368 |
-
# Then set up the demo with the interfaces
|
369 |
with demo:
|
370 |
gr.TabbedInterface(
|
371 |
-
[
|
372 |
-
["
|
373 |
)
|
374 |
|
375 |
logger.info("Starting Gradio interface")
|
|
|
127 |
Raises:
|
128 |
gr.Error: If download fails or API key is not set
|
129 |
"""
|
130 |
+
logger.info(f"Starting YouTube audio download process for URL: {url}")
|
131 |
+
|
132 |
if not SIEVE_API_KEY:
|
133 |
+
logger.error("SIEVE_API_KEY environment variable is not set")
|
134 |
raise gr.Error("SIEVE_API_KEY environment variable is not set")
|
135 |
+
|
136 |
try:
|
137 |
# Create a temporary file for the audio
|
138 |
temp_file = tempfile.NamedTemporaryFile(suffix='.mp3', delete=False)
|
139 |
temp_file.close()
|
140 |
output_path = temp_file.name
|
141 |
+
logger.info(f"Created temporary file at: {output_path}")
|
142 |
|
143 |
+
# Prepare the request to Sieve API with exact parameters
|
144 |
payload = {
|
145 |
"function": "sieve/youtube-downloader",
|
146 |
"inputs": {
|
147 |
"url": url,
|
148 |
+
"download_type": "audio", # Ensure we're only downloading audio
|
149 |
+
"resolution": "highest-available",
|
150 |
+
"include_audio": True,
|
151 |
+
"start_time": 0,
|
152 |
+
"end_time": -1,
|
153 |
"include_metadata": False,
|
154 |
+
"metadata_fields": ["title", "thumbnail", "description", "tags", "duration"],
|
155 |
+
"include_subtitles": False,
|
156 |
+
"subtitle_languages": ["en"],
|
157 |
+
"video_format": "mp4",
|
158 |
+
"audio_format": "mp3"
|
159 |
}
|
160 |
}
|
161 |
+
logger.debug(f"Prepared Sieve API payload: {payload}")
|
162 |
|
163 |
+
# Send request to Sieve API with retries
|
164 |
+
max_retries = 3
|
165 |
+
retry_delay = 5 # seconds
|
166 |
+
|
167 |
+
for attempt in range(max_retries):
|
168 |
+
try:
|
169 |
+
logger.info(f"Sending request to Sieve API (attempt {attempt + 1}/{max_retries})...")
|
170 |
+
response = requests.post(
|
171 |
+
f"{SIEVE_API_URL}/push",
|
172 |
+
headers={"X-API-Key": SIEVE_API_KEY, "Content-Type": "application/json"},
|
173 |
+
json=payload,
|
174 |
+
timeout=30 # Add timeout
|
175 |
+
)
|
176 |
+
response.raise_for_status()
|
177 |
+
response_data = response.json()
|
178 |
+
logger.debug(f"Sieve API response: {response_data}")
|
179 |
+
|
180 |
+
job_id = response_data.get("id")
|
181 |
+
if not job_id:
|
182 |
+
logger.error("No job ID received from Sieve API")
|
183 |
+
if attempt < max_retries - 1:
|
184 |
+
logger.warning(f"Retrying in {retry_delay} seconds...")
|
185 |
+
time.sleep(retry_delay)
|
186 |
+
continue
|
187 |
+
raise gr.Error("Failed to get job ID from Sieve API")
|
188 |
+
break
|
189 |
+
|
190 |
+
except requests.exceptions.RequestException as e:
|
191 |
+
logger.warning(f"Request failed (attempt {attempt + 1}/{max_retries}): {str(e)}")
|
192 |
+
if attempt < max_retries - 1:
|
193 |
+
logger.info(f"Retrying in {retry_delay} seconds...")
|
194 |
+
time.sleep(retry_delay)
|
195 |
+
continue
|
196 |
+
raise
|
197 |
+
|
198 |
+
logger.info(f"Received job ID: {job_id}")
|
199 |
+
|
200 |
+
# Poll for job completion with timeout
|
201 |
+
poll_count = 0
|
202 |
+
max_polls = 60 # Maximum number of polls (2 minutes with 2-second delay)
|
203 |
+
start_time = time.time()
|
204 |
|
|
|
|
|
|
|
|
|
205 |
while True:
|
206 |
+
poll_count += 1
|
207 |
+
logger.info(f"Polling job status (attempt {poll_count}/{max_polls})...")
|
|
|
|
|
|
|
|
|
208 |
|
209 |
+
try:
|
210 |
+
job_response = requests.get(
|
211 |
+
f"{SIEVE_API_URL}/jobs/{job_id}",
|
212 |
+
headers={"X-API-Key": SIEVE_API_KEY},
|
213 |
+
timeout=10
|
214 |
+
)
|
215 |
+
job_response.raise_for_status()
|
216 |
+
job_data = job_response.json()
|
217 |
+
logger.debug(f"Job status response: {job_data}")
|
218 |
+
|
219 |
+
status = job_data.get("status")
|
220 |
+
logger.info(f"Current job status: {status}")
|
221 |
|
222 |
+
if status == "completed":
|
223 |
+
logger.info("Job completed successfully")
|
224 |
+
# Get the output data
|
225 |
+
output_data = job_data.get("output_0", {})
|
226 |
+
if not output_data:
|
227 |
+
logger.error("No output data found in completed job response")
|
228 |
+
raise gr.Error("No output data in job response")
|
229 |
|
230 |
+
# Get the audio URL from the output
|
231 |
+
audio_url = output_data.get("url")
|
232 |
+
if not audio_url:
|
233 |
+
logger.error("No audio URL found in output data")
|
234 |
+
raise gr.Error("No audio URL in output data")
|
235 |
+
|
236 |
+
logger.info(f"Received audio URL from Sieve: {audio_url}")
|
237 |
+
|
238 |
+
# Download the audio file
|
239 |
+
logger.info("Downloading audio file from Sieve storage...")
|
240 |
+
audio_response = requests.get(audio_url, timeout=30)
|
241 |
+
audio_response.raise_for_status()
|
242 |
+
|
243 |
+
file_size = len(audio_response.content)
|
244 |
+
logger.info(f"Downloaded audio file size: {file_size/1024/1024:.2f} MB")
|
245 |
+
|
246 |
+
# Save the file
|
247 |
+
with open(output_path, "wb") as f:
|
248 |
+
f.write(audio_response.content)
|
249 |
+
logger.info(f"Successfully saved audio to: {output_path}")
|
250 |
+
|
251 |
+
return output_path
|
252 |
+
|
253 |
+
elif status == "failed":
|
254 |
+
error_msg = job_data.get("error", "Unknown error")
|
255 |
+
logger.error(f"Job failed with error: {error_msg}")
|
256 |
+
raise gr.Error(f"Job failed: {error_msg}")
|
257 |
+
|
258 |
+
# Check for timeout
|
259 |
+
if time.time() - start_time > 120: # 2 minutes timeout
|
260 |
+
logger.error("Job polling timed out after 2 minutes")
|
261 |
+
raise gr.Error("Download timed out. Please try again.")
|
262 |
|
263 |
+
logger.info("Job still processing, waiting 2 seconds before next poll...")
|
264 |
+
time.sleep(2)
|
265 |
|
266 |
+
except requests.exceptions.RequestException as e:
|
267 |
+
logger.warning(f"Poll request failed: {str(e)}")
|
268 |
+
if poll_count >= max_polls:
|
269 |
+
raise gr.Error("Failed to check job status. Please try again.")
|
270 |
+
time.sleep(2)
|
271 |
|
272 |
+
except requests.exceptions.RequestException as e:
|
273 |
+
logger.exception(f"Network error during YouTube download: {str(e)}")
|
274 |
+
raise gr.Error(f"Failed to download YouTube audio: Network error - {str(e)}")
|
275 |
except Exception as e:
|
276 |
+
logger.exception(f"Unexpected error during YouTube download: {str(e)}")
|
277 |
raise gr.Error(f"Failed to download YouTube audio: {str(e)}")
|
278 |
|
279 |
def transcribe_youtube(url, return_timestamps, generate_subs):
|
|
|
287 |
Returns:
|
288 |
tuple: (formatted_result, srt_file, correction_text)
|
289 |
"""
|
290 |
+
logger.info(f"Starting YouTube transcription process for URL: {url}")
|
291 |
+
logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
|
292 |
+
|
293 |
try:
|
294 |
# Download audio from YouTube
|
295 |
+
logger.info("Step 1: Downloading audio from YouTube...")
|
296 |
audio_path = download_youtube_audio(url)
|
297 |
+
logger.info(f"Successfully downloaded audio to: {audio_path}")
|
298 |
|
299 |
# Transcribe the downloaded audio
|
300 |
+
logger.info("Step 2: Transcribing downloaded audio...")
|
301 |
result = transcribe(audio_path, return_timestamps, generate_subs)
|
302 |
+
logger.info("Successfully completed transcription")
|
303 |
|
304 |
# Clean up the temporary file
|
305 |
+
logger.info("Step 3: Cleaning up temporary files...")
|
306 |
try:
|
307 |
os.unlink(audio_path)
|
308 |
+
logger.info(f"Successfully deleted temporary file: {audio_path}")
|
309 |
except Exception as e:
|
310 |
logger.warning(f"Failed to delete temporary file: {str(e)}")
|
311 |
|
|
|
332 |
Raises:
|
333 |
gr.Error: If no audio file is provided or transcription fails.
|
334 |
"""
|
335 |
+
logger.info(f"Starting transcription process for file: {inputs}")
|
336 |
+
logger.info(f"Options - Timestamps: {return_timestamps}, Generate subtitles: {generate_subs}")
|
337 |
+
|
338 |
if inputs is None:
|
339 |
logger.warning("No audio file submitted")
|
340 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
341 |
|
342 |
try:
|
|
|
|
|
343 |
# Read the audio file
|
344 |
+
logger.info("Step 1: Reading audio file...")
|
345 |
with open(inputs, "rb") as f:
|
346 |
data = f.read()
|
347 |
+
file_size = len(data)
|
348 |
+
logger.info(f"Successfully read audio file, size: {file_size/1024/1024:.2f} MB")
|
349 |
|
350 |
# Send request to API
|
351 |
+
logger.info("Step 2: Sending request to Whisper API...")
|
352 |
response = requests.post(API_URL, headers=headers, data=data)
|
353 |
+
response.raise_for_status()
|
354 |
result = response.json()
|
|
|
355 |
logger.debug(f"API response: {result}")
|
356 |
+
logger.info("Successfully received response from API")
|
357 |
|
358 |
# Format response as JSON
|
359 |
+
logger.info("Step 3: Processing API response...")
|
360 |
formatted_result = {
|
361 |
"text": result.get("text", "")
|
362 |
}
|
363 |
+
logger.info(f"Transcribed text length: {len(formatted_result['text'])} characters")
|
364 |
|
365 |
chunks = []
|
366 |
if return_timestamps and "chunks" in result:
|
367 |
+
logger.info(f"Processing {len(result['chunks'])} chunks for timestamps")
|
368 |
for i, chunk in enumerate(result["chunks"]):
|
369 |
logger.debug(f"Processing chunk {i}: {chunk}")
|
370 |
try:
|
|
|
385 |
continue
|
386 |
|
387 |
formatted_result["chunks"] = chunks
|
388 |
+
logger.info(f"Successfully processed {len(chunks)} chunks with timestamps")
|
389 |
|
390 |
# Generate subtitles if requested
|
391 |
srt_file = None
|
392 |
if generate_subs and chunks:
|
393 |
+
logger.info("Step 4: Generating SRT subtitles...")
|
394 |
srt_content = generate_srt(chunks)
|
395 |
srt_file = save_srt_to_file(srt_content)
|
396 |
+
logger.info(f"Successfully generated SRT file: {srt_file}")
|
397 |
|
398 |
+
logger.info("Transcription process completed successfully")
|
399 |
return formatted_result, srt_file, "" # Return empty string for correction textbox
|
400 |
+
|
401 |
except requests.exceptions.RequestException as e:
|
402 |
logger.exception(f"API request failed: {str(e)}")
|
403 |
raise gr.Error(f"Failed to transcribe audio: API request failed - {str(e)}")
|
|
|
424 |
description=(
|
425 |
"Transcribe Tajik language audio from YouTube videos. "
|
426 |
"Paste a YouTube URL and get accurate transcription with optional timestamps "
|
427 |
+
"and subtitles.\n\n"
|
428 |
+
"⚠️ Note: YouTube downloads may occasionally fail due to YouTube's restrictions "
|
429 |
+
"or temporary service issues. If this happens, please try again in a few minutes "
|
430 |
+
"or use the audio file upload option instead."
|
431 |
)
|
432 |
)
|
433 |
|
|
|
469 |
)
|
470 |
)
|
471 |
|
|
|
472 |
with demo:
|
473 |
gr.TabbedInterface(
|
474 |
+
[file_transcribe, mf_transcribe, youtube_transcribe],
|
475 |
+
["Audio file", "Microphone", "YouTube"]
|
476 |
)
|
477 |
|
478 |
logger.info("Starting Gradio interface")
|