Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -421,58 +421,151 @@ def validate_youtube_url(url):
|
|
421 |
return False, "Invalid YouTube URL format"
|
422 |
|
423 |
def process_video(url, cookies_file, progress=gr.Progress()):
|
424 |
-
"""Main function to process YouTube video"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
426 |
# Check if required packages are available
|
427 |
if not YT_DLP_AVAILABLE:
|
428 |
-
|
|
|
|
|
|
|
|
|
429 |
|
430 |
if not WHISPER_AVAILABLE:
|
431 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
432 |
|
433 |
# Validate URL
|
434 |
is_valid, validation_msg = validate_youtube_url(url)
|
435 |
if not is_valid:
|
436 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
437 |
|
438 |
audio_path = None
|
439 |
cookies_temp_path = None
|
440 |
|
441 |
try:
|
442 |
progress(0.05, desc="π Validating URL...")
|
|
|
443 |
|
444 |
# Process cookies file if provided
|
445 |
progress(0.1, desc="πͺ Processing cookies...")
|
446 |
cookies_temp_path = process_cookies_file(cookies_file)
|
447 |
|
|
|
|
|
|
|
|
|
|
|
448 |
status_msg = "β
Cookies loaded" if cookies_temp_path else "β οΈ No cookies (may encounter restrictions)"
|
449 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
450 |
# Download audio
|
451 |
progress(0.2, desc="π₯ Downloading audio...")
|
|
|
452 |
audio_path = download_audio(url, cookies_temp_path)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
453 |
|
454 |
# Transcribe audio
|
455 |
progress(0.6, desc="ποΈ Transcribing audio...")
|
|
|
456 |
transcript = transcribe_audio(audio_path)
|
|
|
457 |
|
458 |
if not transcript.strip():
|
459 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
460 |
|
461 |
# Extract stock information
|
462 |
progress(0.9, desc="π Analyzing content...")
|
|
|
463 |
stock_details = extract_stock_info_enhanced(transcript)
|
|
|
464 |
|
465 |
progress(1.0, desc="β
Complete!")
|
466 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
467 |
|
468 |
except Exception as e:
|
469 |
-
error_msg =
|
470 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
471 |
|
472 |
finally:
|
473 |
# Clean up temporary files
|
474 |
-
|
475 |
-
|
|
|
|
|
|
|
|
|
476 |
|
477 |
# Create Gradio interface optimized for Gradio Cloud
|
478 |
with gr.Blocks(
|
@@ -545,9 +638,23 @@ with gr.Blocks(
|
|
545 |
with gr.Group():
|
546 |
gr.Markdown("### π₯ Input")
|
547 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
548 |
# Cookies upload with better instructions
|
549 |
cookies_input = gr.File(
|
550 |
-
label="πͺ Upload Cookies File (cookies.txt) -
|
551 |
file_types=[".txt"],
|
552 |
file_count="single"
|
553 |
)
|
@@ -588,7 +695,7 @@ with gr.Blocks(
|
|
588 |
# Status display
|
589 |
status_output = gr.Textbox(
|
590 |
label="π Status",
|
591 |
-
lines=
|
592 |
interactive=False,
|
593 |
info="Current processing status"
|
594 |
)
|
@@ -673,11 +780,62 @@ with gr.Blocks(
|
|
673 |
status = check_requirements()
|
674 |
return gr.update(value=status, visible=True)
|
675 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
676 |
check_req_btn.click(
|
677 |
fn=show_requirements,
|
678 |
outputs=[requirements_output]
|
679 |
)
|
680 |
|
|
|
|
|
|
|
|
|
|
|
681 |
process_btn.click(
|
682 |
fn=process_video,
|
683 |
inputs=[url_input, cookies_input],
|
|
|
421 |
return False, "Invalid YouTube URL format"
|
422 |
|
423 |
def process_video(url, cookies_file, progress=gr.Progress()):
|
424 |
+
"""Main function to process YouTube video with detailed debugging"""
|
425 |
+
|
426 |
+
# Detailed debugging info
|
427 |
+
debug_info = []
|
428 |
+
debug_info.append(f"π Starting process at {time.strftime('%H:%M:%S')}")
|
429 |
+
debug_info.append(f"π‘ Python version: {sys.version.split()[0]}")
|
430 |
+
debug_info.append(f"π¦ yt-dlp available: {YT_DLP_AVAILABLE}")
|
431 |
+
debug_info.append(f"ποΈ Whisper available: {WHISPER_AVAILABLE} (type: {WHISPER_TYPE})")
|
432 |
|
433 |
# Check if required packages are available
|
434 |
if not YT_DLP_AVAILABLE:
|
435 |
+
error_msg = "β ERROR: yt-dlp is not installed properly.\n\n"
|
436 |
+
error_msg += "SOLUTION: Install yt-dlp using:\n"
|
437 |
+
error_msg += "pip install yt-dlp\n\n"
|
438 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
439 |
+
return error_msg, "", "β Missing yt-dlp"
|
440 |
|
441 |
if not WHISPER_AVAILABLE:
|
442 |
+
error_msg = "β ERROR: OpenAI Whisper is not installed properly.\n\n"
|
443 |
+
error_msg += "SOLUTION: Install Whisper using:\n"
|
444 |
+
error_msg += "pip install openai-whisper\n"
|
445 |
+
error_msg += "OR\n"
|
446 |
+
error_msg += "pip install transformers torch torchaudio\n\n"
|
447 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
448 |
+
return error_msg, "", "β Missing Whisper"
|
449 |
|
450 |
# Validate URL
|
451 |
is_valid, validation_msg = validate_youtube_url(url)
|
452 |
if not is_valid:
|
453 |
+
error_msg = f"β ERROR: {validation_msg}\n\n"
|
454 |
+
error_msg += f"PROVIDED URL: {url}\n\n"
|
455 |
+
error_msg += "VALID URL FORMATS:\n"
|
456 |
+
error_msg += "β’ https://www.youtube.com/watch?v=VIDEO_ID\n"
|
457 |
+
error_msg += "β’ https://youtu.be/VIDEO_ID\n"
|
458 |
+
error_msg += "β’ https://www.youtube.com/embed/VIDEO_ID\n\n"
|
459 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
460 |
+
return error_msg, "", "β Invalid URL"
|
461 |
|
462 |
audio_path = None
|
463 |
cookies_temp_path = None
|
464 |
|
465 |
try:
|
466 |
progress(0.05, desc="π Validating URL...")
|
467 |
+
debug_info.append(f"β
URL validation passed: {url}")
|
468 |
|
469 |
# Process cookies file if provided
|
470 |
progress(0.1, desc="πͺ Processing cookies...")
|
471 |
cookies_temp_path = process_cookies_file(cookies_file)
|
472 |
|
473 |
+
if cookies_temp_path:
|
474 |
+
debug_info.append(f"β
Cookies processed: {cookies_temp_path}")
|
475 |
+
else:
|
476 |
+
debug_info.append("β οΈ No cookies provided - this may cause access errors")
|
477 |
+
|
478 |
status_msg = "β
Cookies loaded" if cookies_temp_path else "β οΈ No cookies (may encounter restrictions)"
|
479 |
|
480 |
+
# First, try to get video info for debugging
|
481 |
+
progress(0.15, desc="π Checking video accessibility...")
|
482 |
+
try:
|
483 |
+
video_info = get_video_info(url, cookies_temp_path)
|
484 |
+
if 'error' in video_info:
|
485 |
+
debug_info.append(f"β Video info error: {video_info['error']}")
|
486 |
+
raise Exception(f"Video accessibility check failed: {video_info['error']}")
|
487 |
+
else:
|
488 |
+
debug_info.append(f"β
Video info: {video_info}")
|
489 |
+
except Exception as e:
|
490 |
+
debug_info.append(f"β Video info check failed: {str(e)}")
|
491 |
+
# Continue anyway, but log the issue
|
492 |
+
|
493 |
# Download audio
|
494 |
progress(0.2, desc="π₯ Downloading audio...")
|
495 |
+
debug_info.append("π Starting audio download...")
|
496 |
audio_path = download_audio(url, cookies_temp_path)
|
497 |
+
debug_info.append(f"β
Audio downloaded: {audio_path}")
|
498 |
+
|
499 |
+
# Check if audio file exists and get size
|
500 |
+
if audio_path and os.path.exists(audio_path):
|
501 |
+
file_size = os.path.getsize(audio_path)
|
502 |
+
debug_info.append(f"π Audio file size: {file_size/1024/1024:.2f} MB")
|
503 |
+
else:
|
504 |
+
raise Exception("Audio file not found after download")
|
505 |
|
506 |
# Transcribe audio
|
507 |
progress(0.6, desc="ποΈ Transcribing audio...")
|
508 |
+
debug_info.append("π Starting transcription...")
|
509 |
transcript = transcribe_audio(audio_path)
|
510 |
+
debug_info.append(f"β
Transcription completed: {len(transcript)} characters")
|
511 |
|
512 |
if not transcript.strip():
|
513 |
+
error_msg = "β ERROR: No speech detected in the video\n\n"
|
514 |
+
error_msg += "POSSIBLE CAUSES:\n"
|
515 |
+
error_msg += "β’ Video has no audio track\n"
|
516 |
+
error_msg += "β’ Audio is too quiet or unclear\n"
|
517 |
+
error_msg += "β’ Video is not in English\n"
|
518 |
+
error_msg += "β’ Audio file is corrupted\n\n"
|
519 |
+
error_msg += "DEBUG INFO:\n" + "\n".join(debug_info)
|
520 |
+
return error_msg, "", "β No speech detected"
|
521 |
|
522 |
# Extract stock information
|
523 |
progress(0.9, desc="π Analyzing content...")
|
524 |
+
debug_info.append("π Starting stock analysis...")
|
525 |
stock_details = extract_stock_info_enhanced(transcript)
|
526 |
+
debug_info.append("β
Stock analysis completed")
|
527 |
|
528 |
progress(1.0, desc="β
Complete!")
|
529 |
+
|
530 |
+
# Add debug info to transcript
|
531 |
+
debug_section = "\n\n" + "="*50 + "\n"
|
532 |
+
debug_section += "π DEBUG INFORMATION\n"
|
533 |
+
debug_section += "="*50 + "\n"
|
534 |
+
debug_section += "\n".join(debug_info)
|
535 |
+
|
536 |
+
return transcript + debug_section, stock_details, "β
Processing completed successfully"
|
537 |
|
538 |
except Exception as e:
|
539 |
+
error_msg = f"β DETAILED ERROR INFORMATION:\n\n"
|
540 |
+
error_msg += f"ERROR MESSAGE: {str(e)}\n\n"
|
541 |
+
error_msg += f"ERROR TYPE: {type(e).__name__}\n\n"
|
542 |
+
|
543 |
+
# Add context based on where the error occurred
|
544 |
+
if "download" in str(e).lower():
|
545 |
+
error_msg += "π§ DOWNLOAD TROUBLESHOOTING:\n"
|
546 |
+
error_msg += "β’ Check if video URL is accessible in browser\n"
|
547 |
+
error_msg += "β’ Upload fresh cookies.txt file\n"
|
548 |
+
error_msg += "β’ Try a different video\n"
|
549 |
+
error_msg += "β’ Wait 10-15 minutes if rate limited\n\n"
|
550 |
+
elif "transcribe" in str(e).lower():
|
551 |
+
error_msg += "π§ TRANSCRIPTION TROUBLESHOOTING:\n"
|
552 |
+
error_msg += "β’ Check if audio file was downloaded properly\n"
|
553 |
+
error_msg += "β’ Ensure video has clear audio\n"
|
554 |
+
error_msg += "β’ Try a shorter video\n\n"
|
555 |
+
|
556 |
+
error_msg += "π PROCESSING STEPS COMPLETED:\n"
|
557 |
+
error_msg += "\n".join(debug_info)
|
558 |
+
|
559 |
+
return error_msg, "", f"β Error: {type(e).__name__}"
|
560 |
|
561 |
finally:
|
562 |
# Clean up temporary files
|
563 |
+
if audio_path:
|
564 |
+
debug_info.append(f"ποΈ Cleaning up: {audio_path}")
|
565 |
+
cleanup_file(audio_path)
|
566 |
+
if cookies_temp_path:
|
567 |
+
debug_info.append(f"ποΈ Cleaning up: {cookies_temp_path}")
|
568 |
+
cleanup_file(cookies_temp_path)
|
569 |
|
570 |
# Create Gradio interface optimized for Gradio Cloud
|
571 |
with gr.Blocks(
|
|
|
638 |
with gr.Group():
|
639 |
gr.Markdown("### π₯ Input")
|
640 |
|
641 |
+
# Add a test button first
|
642 |
+
test_btn = gr.Button(
|
643 |
+
"π§ͺ Test System (Click First!)",
|
644 |
+
variant="secondary",
|
645 |
+
size="sm"
|
646 |
+
)
|
647 |
+
|
648 |
+
test_output = gr.Textbox(
|
649 |
+
label="π§ͺ System Test Results",
|
650 |
+
lines=5,
|
651 |
+
visible=False,
|
652 |
+
interactive=False
|
653 |
+
)
|
654 |
+
|
655 |
# Cookies upload with better instructions
|
656 |
cookies_input = gr.File(
|
657 |
+
label="πͺ Upload Cookies File (cookies.txt) - HIGHLY RECOMMENDED",
|
658 |
file_types=[".txt"],
|
659 |
file_count="single"
|
660 |
)
|
|
|
695 |
# Status display
|
696 |
status_output = gr.Textbox(
|
697 |
label="π Status",
|
698 |
+
lines=3,
|
699 |
interactive=False,
|
700 |
info="Current processing status"
|
701 |
)
|
|
|
780 |
status = check_requirements()
|
781 |
return gr.update(value=status, visible=True)
|
782 |
|
783 |
+
def test_system():
|
784 |
+
"""Test system components and return detailed status"""
|
785 |
+
test_results = []
|
786 |
+
test_results.append("π§ͺ SYSTEM TEST RESULTS")
|
787 |
+
test_results.append("="*30)
|
788 |
+
|
789 |
+
# Test imports
|
790 |
+
test_results.append(f"β
yt-dlp: {'Available' if YT_DLP_AVAILABLE else 'NOT AVAILABLE'}")
|
791 |
+
test_results.append(f"β
Whisper: {'Available' if WHISPER_AVAILABLE else 'NOT AVAILABLE'} (Type: {WHISPER_TYPE})")
|
792 |
+
|
793 |
+
# Test yt-dlp functionality
|
794 |
+
if YT_DLP_AVAILABLE:
|
795 |
+
try:
|
796 |
+
from yt_dlp import YoutubeDL
|
797 |
+
test_ydl = YoutubeDL({'quiet': True})
|
798 |
+
test_results.append("β
yt-dlp: Can create YoutubeDL instance")
|
799 |
+
except Exception as e:
|
800 |
+
test_results.append(f"β yt-dlp: Error creating instance - {str(e)}")
|
801 |
+
|
802 |
+
# Test Whisper functionality
|
803 |
+
if WHISPER_AVAILABLE:
|
804 |
+
try:
|
805 |
+
if WHISPER_TYPE == "openai-whisper":
|
806 |
+
import whisper
|
807 |
+
test_results.append("β
Whisper: OpenAI Whisper can be imported")
|
808 |
+
elif WHISPER_TYPE == "transformers":
|
809 |
+
from transformers import pipeline
|
810 |
+
test_results.append("β
Whisper: Transformers Whisper can be imported")
|
811 |
+
except Exception as e:
|
812 |
+
test_results.append(f"β Whisper: Error testing - {str(e)}")
|
813 |
+
|
814 |
+
# Test file operations
|
815 |
+
try:
|
816 |
+
temp_file = tempfile.mktemp()
|
817 |
+
with open(temp_file, 'w') as f:
|
818 |
+
f.write("test")
|
819 |
+
os.remove(temp_file)
|
820 |
+
test_results.append("β
File operations: Working")
|
821 |
+
except Exception as e:
|
822 |
+
test_results.append(f"β File operations: Error - {str(e)}")
|
823 |
+
|
824 |
+
test_results.append("\nπ‘ If you see any β errors above, install missing packages:")
|
825 |
+
test_results.append("pip install yt-dlp openai-whisper torch torchaudio")
|
826 |
+
|
827 |
+
return gr.update(value="\n".join(test_results), visible=True)
|
828 |
+
|
829 |
check_req_btn.click(
|
830 |
fn=show_requirements,
|
831 |
outputs=[requirements_output]
|
832 |
)
|
833 |
|
834 |
+
test_btn.click(
|
835 |
+
fn=test_system,
|
836 |
+
outputs=[test_output]
|
837 |
+
)
|
838 |
+
|
839 |
process_btn.click(
|
840 |
fn=process_video,
|
841 |
inputs=[url_input, cookies_input],
|