Spaces:
Paused
Paused
WIP
Browse files
app.py
CHANGED
@@ -226,20 +226,26 @@ def check_api_health():
|
|
226 |
def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
|
227 |
"""Transcribe audio from YouTube video using URL endpoint."""
|
228 |
logger.info(f"Starting YouTube transcription process for URL: {url}")
|
|
|
229 |
|
230 |
try:
|
231 |
# Check API health first
|
|
|
232 |
check_api_health()
|
233 |
|
234 |
# Validate URL scheme
|
235 |
if not url.startswith(('http://', 'https://')):
|
|
|
236 |
raise gr.Error("URL must start with http:// or https://")
|
237 |
|
238 |
# Get audio URL from Sieve
|
|
|
239 |
audio_url = download_youtube_audio(url)
|
|
|
240 |
|
241 |
# Validate audio URL scheme
|
242 |
if not audio_url.startswith(('http://', 'https://')):
|
|
|
243 |
raise gr.Error("Invalid audio URL scheme received from Sieve")
|
244 |
|
245 |
# Prepare request parameters
|
@@ -250,8 +256,10 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
|
|
250 |
"chunk_length_s": chunk_length_s,
|
251 |
"batch_size": batch_size
|
252 |
}
|
|
|
253 |
|
254 |
# Send request to API
|
|
|
255 |
response = requests.post(
|
256 |
f"{API_URL}/transcribe/url",
|
257 |
json={"url": audio_url},
|
@@ -259,12 +267,15 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
|
|
259 |
)
|
260 |
response.raise_for_status()
|
261 |
result = response.json()
|
|
|
262 |
|
263 |
# Log metadata
|
264 |
metadata = result.get("metadata", {})
|
265 |
logger.info(f"Transcription metadata: {metadata}")
|
|
|
266 |
|
267 |
# Format response with segments (without id)
|
|
|
268 |
formatted_result = {
|
269 |
"text": result["transcription"]["text"],
|
270 |
"segments": [
|
@@ -276,13 +287,17 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
|
|
276 |
for segment in result["transcription"]["segments"]
|
277 |
] if return_timestamps else None
|
278 |
}
|
|
|
279 |
|
280 |
# Generate subtitles if requested
|
281 |
srt_file = None
|
282 |
if generate_subs and return_timestamps and "segments" in result["transcription"]:
|
|
|
283 |
srt_content = generate_srt(result["transcription"]["segments"])
|
284 |
srt_file = save_srt_to_file(srt_content)
|
|
|
285 |
|
|
|
286 |
return formatted_result, srt_file, ""
|
287 |
|
288 |
except Exception as e:
|
@@ -292,15 +307,19 @@ def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30,
|
|
292 |
def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
|
293 |
"""Transcribe audio input using Whisper API."""
|
294 |
logger.info(f"Starting transcription process for file: {inputs}")
|
|
|
295 |
|
296 |
if inputs is None:
|
|
|
297 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
298 |
|
299 |
try:
|
300 |
# Check API health first
|
|
|
301 |
check_api_health()
|
302 |
|
303 |
# Read the audio file
|
|
|
304 |
with open(inputs, "rb") as f:
|
305 |
files = {"file": f}
|
306 |
|
@@ -312,10 +331,10 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
|
|
312 |
"chunk_length_s": chunk_length_s,
|
313 |
"batch_size": batch_size
|
314 |
}
|
315 |
-
|
316 |
-
logger.info(f"Sending request to API with parameters: {params}")
|
317 |
|
318 |
# Send request to API
|
|
|
319 |
response = requests.post(
|
320 |
f"{API_URL}/transcribe",
|
321 |
files=files,
|
@@ -323,12 +342,15 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
|
|
323 |
)
|
324 |
response.raise_for_status()
|
325 |
result = response.json()
|
|
|
326 |
|
327 |
# Log metadata
|
328 |
metadata = result.get("metadata", {})
|
329 |
logger.info(f"Transcription metadata: {metadata}")
|
|
|
330 |
|
331 |
# Format response with segments (without id)
|
|
|
332 |
formatted_result = {
|
333 |
"text": result["transcription"]["text"],
|
334 |
"segments": [
|
@@ -340,13 +362,17 @@ def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batc
|
|
340 |
for segment in result["transcription"]["segments"]
|
341 |
] if return_timestamps else None
|
342 |
}
|
|
|
343 |
|
344 |
# Generate subtitles if requested
|
345 |
srt_file = None
|
346 |
if generate_subs and return_timestamps and "segments" in result["transcription"]:
|
|
|
347 |
srt_content = generate_srt(result["transcription"]["segments"])
|
348 |
srt_file = save_srt_to_file(srt_content)
|
|
|
349 |
|
|
|
350 |
return formatted_result, srt_file, ""
|
351 |
|
352 |
except requests.exceptions.RequestException as e:
|
|
|
226 |
def transcribe_youtube(url, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
|
227 |
"""Transcribe audio from YouTube video using URL endpoint."""
|
228 |
logger.info(f"Starting YouTube transcription process for URL: {url}")
|
229 |
+
logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
|
230 |
|
231 |
try:
|
232 |
# Check API health first
|
233 |
+
logger.info("Performing API health check...")
|
234 |
check_api_health()
|
235 |
|
236 |
# Validate URL scheme
|
237 |
if not url.startswith(('http://', 'https://')):
|
238 |
+
logger.error(f"Invalid URL scheme: {url}")
|
239 |
raise gr.Error("URL must start with http:// or https://")
|
240 |
|
241 |
# Get audio URL from Sieve
|
242 |
+
logger.info("Starting YouTube audio download via Sieve API...")
|
243 |
audio_url = download_youtube_audio(url)
|
244 |
+
logger.info(f"Successfully obtained audio URL from Sieve: {audio_url}")
|
245 |
|
246 |
# Validate audio URL scheme
|
247 |
if not audio_url.startswith(('http://', 'https://')):
|
248 |
+
logger.error(f"Invalid audio URL scheme from Sieve: {audio_url}")
|
249 |
raise gr.Error("Invalid audio URL scheme received from Sieve")
|
250 |
|
251 |
# Prepare request parameters
|
|
|
256 |
"chunk_length_s": chunk_length_s,
|
257 |
"batch_size": batch_size
|
258 |
}
|
259 |
+
logger.info(f"Prepared API request parameters: {params}")
|
260 |
|
261 |
# Send request to API
|
262 |
+
logger.info("Sending transcription request to API...")
|
263 |
response = requests.post(
|
264 |
f"{API_URL}/transcribe/url",
|
265 |
json={"url": audio_url},
|
|
|
267 |
)
|
268 |
response.raise_for_status()
|
269 |
result = response.json()
|
270 |
+
logger.info("Successfully received response from API")
|
271 |
|
272 |
# Log metadata
|
273 |
metadata = result.get("metadata", {})
|
274 |
logger.info(f"Transcription metadata: {metadata}")
|
275 |
+
logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
|
276 |
|
277 |
# Format response with segments (without id)
|
278 |
+
logger.info("Formatting response...")
|
279 |
formatted_result = {
|
280 |
"text": result["transcription"]["text"],
|
281 |
"segments": [
|
|
|
287 |
for segment in result["transcription"]["segments"]
|
288 |
] if return_timestamps else None
|
289 |
}
|
290 |
+
logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
|
291 |
|
292 |
# Generate subtitles if requested
|
293 |
srt_file = None
|
294 |
if generate_subs and return_timestamps and "segments" in result["transcription"]:
|
295 |
+
logger.info("Generating SRT subtitles...")
|
296 |
srt_content = generate_srt(result["transcription"]["segments"])
|
297 |
srt_file = save_srt_to_file(srt_content)
|
298 |
+
logger.info(f"Generated SRT file: {srt_file}")
|
299 |
|
300 |
+
logger.info("YouTube transcription process completed successfully")
|
301 |
return formatted_result, srt_file, ""
|
302 |
|
303 |
except Exception as e:
|
|
|
307 |
def transcribe(inputs, return_timestamps, generate_subs, chunk_length_s=30, batch_size=128):
|
308 |
"""Transcribe audio input using Whisper API."""
|
309 |
logger.info(f"Starting transcription process for file: {inputs}")
|
310 |
+
logger.info(f"Parameters - return_timestamps: {return_timestamps}, generate_subs: {generate_subs}, chunk_length_s: {chunk_length_s}, batch_size: {batch_size}")
|
311 |
|
312 |
if inputs is None:
|
313 |
+
logger.error("No audio file submitted")
|
314 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
315 |
|
316 |
try:
|
317 |
# Check API health first
|
318 |
+
logger.info("Performing API health check...")
|
319 |
check_api_health()
|
320 |
|
321 |
# Read the audio file
|
322 |
+
logger.info(f"Reading audio file: {inputs}")
|
323 |
with open(inputs, "rb") as f:
|
324 |
files = {"file": f}
|
325 |
|
|
|
331 |
"chunk_length_s": chunk_length_s,
|
332 |
"batch_size": batch_size
|
333 |
}
|
334 |
+
logger.info(f"Prepared API request parameters: {params}")
|
|
|
335 |
|
336 |
# Send request to API
|
337 |
+
logger.info("Sending transcription request to API...")
|
338 |
response = requests.post(
|
339 |
f"{API_URL}/transcribe",
|
340 |
files=files,
|
|
|
342 |
)
|
343 |
response.raise_for_status()
|
344 |
result = response.json()
|
345 |
+
logger.info("Successfully received response from API")
|
346 |
|
347 |
# Log metadata
|
348 |
metadata = result.get("metadata", {})
|
349 |
logger.info(f"Transcription metadata: {metadata}")
|
350 |
+
logger.info(f"Transcription completed in {metadata.get('timing', {}).get('total_time', 0):.2f} seconds")
|
351 |
|
352 |
# Format response with segments (without id)
|
353 |
+
logger.info("Formatting response...")
|
354 |
formatted_result = {
|
355 |
"text": result["transcription"]["text"],
|
356 |
"segments": [
|
|
|
362 |
for segment in result["transcription"]["segments"]
|
363 |
] if return_timestamps else None
|
364 |
}
|
365 |
+
logger.info(f"Formatted result contains {len(formatted_result['segments'] or [])} segments")
|
366 |
|
367 |
# Generate subtitles if requested
|
368 |
srt_file = None
|
369 |
if generate_subs and return_timestamps and "segments" in result["transcription"]:
|
370 |
+
logger.info("Generating SRT subtitles...")
|
371 |
srt_content = generate_srt(result["transcription"]["segments"])
|
372 |
srt_file = save_srt_to_file(srt_content)
|
373 |
+
logger.info(f"Generated SRT file: {srt_file}")
|
374 |
|
375 |
+
logger.info("Transcription process completed successfully")
|
376 |
return formatted_result, srt_file, ""
|
377 |
|
378 |
except requests.exceptions.RequestException as e:
|