HF_Final_Assignment_Template

Sleeping

App Files Files Community

Samuel Thomas commited on Jun 1

Commit

0f3a132

1 Parent(s): 0264a40

missing dependency

Browse files

Files changed (2) hide show

requirements.txt +2 -1
tools.py +165 -257

requirements.txt CHANGED Viewed

@@ -25,4 +25,5 @@ sentencepiece
 nltk
 SpeechRecognition
 pandas
-openai-whisper

 nltk
 SpeechRecognition
 pandas
+openai-whisper
+openpyxl

tools.py CHANGED Viewed

@@ -18,7 +18,7 @@ from langchain_huggingface import HuggingFacePipeline
 from typing import TypedDict, List, Optional, Dict, Any, Annotated, Literal, Union, Tuple, Set, Type
 import time
 from collections import Counter
-from pydantic import Field, BaseModel
 import hashlib
 import json
 import numpy as np
@@ -2151,43 +2151,45 @@ class YouTubeTranscriptExtractor(BaseTool):
         "Downloads a YouTube video and extracts the complete audio transcript using speech recognition. "
         "Use this tool for questions about what people say in YouTube videos. "
         "Input should be a dict with keys: 'youtube_url' and optional parameters. "
-        "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'language': 'en-US'}"
     )
-    # Define Pydantic fields for the attributes we need to set
     recognizer: Any = Field(default=None, exclude=True)
     class Config:
         arbitrary_types_allowed = True
-        extra = "allow"
-    def __init__(self, **kwargs):
         super().__init__(**kwargs)
-        # Initialize directories
         self.cache_dir = '/tmp/youtube_transcript_cache/'
         self.audio_dir = '/tmp/audio/'
         self.chunks_dir = '/tmp/audio_chunks/'
-        # Initialize speech recognizer
         self.recognizer = sr.Recognizer()
         self.recognizer.energy_threshold = 4000
         self.recognizer.pause_threshold = 0.8
-        # Create directories
         for dir_path in [self.cache_dir, self.audio_dir, self.chunks_dir]:
             os.makedirs(dir_path, exist_ok=True)
-    def _get_config(self, key: str, default_value=None, input_data: Dict[str, Any] = None):
-        """Get configuration value with fallback to defaults"""
         defaults = {
             'language': 'en-US',
-            'chunk_length_ms': 30000,  # 30 seconds
-            'silence_thresh': -40,     # dB
             'audio_quality': 'best',
             'cache_enabled': True,
-            'min_silence_len': 500,    # minimum silence length to split on
-            'overlap_ms': 1000,        # 1 second overlap between chunks
         }
         if input_data and key in input_data:
@@ -2195,15 +2197,12 @@ class YouTubeTranscriptExtractor(BaseTool):
         return defaults.get(key, default_value)
     def _get_video_hash(self, url: str) -> str:
-        """Generate hash for video URL for caching"""
         return hashlib.md5(url.encode()).hexdigest()
     def _get_cache_path(self, video_hash: str, cache_type: str) -> str:
-        """Get cache file path"""
         return os.path.join(self.cache_dir, f"{video_hash}_{cache_type}")
     def _load_from_cache(self, cache_path: str, cache_enabled: bool = True) -> Optional[Any]:
-        """Load data from cache"""
         if not cache_enabled or not os.path.exists(cache_path):
             return None
         try:
@@ -2214,7 +2213,6 @@ class YouTubeTranscriptExtractor(BaseTool):
             return None
     def _save_to_cache(self, cache_path: str, data: Any, cache_enabled: bool = True):
-        """Save data to cache"""
         if not cache_enabled:
             return
         try:
@@ -2224,7 +2222,6 @@ class YouTubeTranscriptExtractor(BaseTool):
             print(f"Error saving cache: {str(e)}")
     def _clean_directory(self, directory: str):
-        """Clean directory contents"""
         if os.path.exists(directory):
             for filename in os.listdir(directory):
                 file_path = os.path.join(directory, filename)
@@ -2236,29 +2233,28 @@ class YouTubeTranscriptExtractor(BaseTool):
                 except Exception as e:
                     print(f'Failed to delete {file_path}. Reason: {e}')
-    def download_youtube_audio(self, url: str, video_hash: str, input_data: Dict[str, Any] = None) -> Optional[str]:
-        """Download YouTube video as audio file"""
         audio_quality = self._get_config('audio_quality', 'best', input_data)
         output_filename = f'{video_hash}.wav'
         output_path = os.path.join(self.audio_dir, output_filename)
-        # Check cache
         cache_enabled = self._get_config('cache_enabled', True, input_data)
         if cache_enabled and os.path.exists(output_path):
             print(f"Using cached audio: {output_path}")
             return output_path
-        # Clean directory
         self._clean_directory(self.audio_dir)
         try:
-            # Updated yt-dlp configuration for better compatibility
-            ydl_opts = {
                 'format': 'bestaudio[ext=m4a]/bestaudio/best',
                 'outtmpl': os.path.join(self.audio_dir, f'{video_hash}.%(ext)s'),
-                'quiet': False,  # Set to False for debugging
                 'no_warnings': False,
-                'extract_flat': False,
                 'writethumbnail': False,
                 'writeinfojson': False,
                 'postprocessors': [{
@@ -2266,362 +2262,274 @@ class YouTubeTranscriptExtractor(BaseTool):
                     'preferredcodec': 'wav',
                     'preferredquality': '192' if audio_quality == 'best' else '128',
                 }],
-                # Add user agent and headers to avoid blocking
                 'http_headers': {
                     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                 },
-                # Add cookie handling
-                'cookiefile': None,
                 'nocheckcertificate': True,
             }
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                print(f"Downloading audio from: {url}")
                 ydl.download([url])
-            # Check if the output file exists
             if os.path.exists(output_path):
                 print(f"Audio downloaded successfully: {output_path}")
                 return output_path
             else:
-                # Look for any downloaded file with the video hash
                 possible_files = glob.glob(os.path.join(self.audio_dir, f'{video_hash}.*'))
                 if possible_files:
-                    # Convert to WAV if needed
                     source_file = possible_files[0]
                     if not source_file.endswith('.wav'):
                         try:
                             audio = AudioSegment.from_file(source_file)
                             audio.export(output_path, format="wav")
-                            os.remove(source_file)  # Clean up original
                             print(f"Audio converted to WAV: {output_path}")
                             return output_path
                         except Exception as e:
                             print(f"Error converting audio: {str(e)}")
                             return None
-                    else:
-                        return source_file
-                print("No audio file found after download")
                 return None
         except Exception as e:
-            print(f"Error downloading YouTube audio: {str(e)}")
-            # Try alternative format as fallback
-            try:
-                print("Trying alternative download method...")
-                fallback_opts = {
-                    'format': 'worst[ext=mp4]',
-                    'outtmpl': os.path.join(self.audio_dir, f'{video_hash}_fallback.%(ext)s'),
-                    'quiet': False,
-                }
-                with yt_dlp.YoutubeDL(fallback_opts) as ydl:
-                    ydl.download([url])
-                # Look for fallback file and convert
-                fallback_files = glob.glob(os.path.join(self.audio_dir, f'{video_hash}_fallback.*'))
-                if fallback_files:
-                    source_file = fallback_files[0]
-                    try:
-                        audio = AudioSegment.from_file(source_file)
-                        audio.export(output_path, format="wav")
-                        os.remove(source_file)
-                        print(f"Fallback audio converted: {output_path}")
-                        return output_path
-                    except Exception as conv_e:
-                        print(f"Error converting fallback audio: {str(conv_e)}")
-            except Exception as fallback_e:
-                print(f"Fallback download also failed: {str(fallback_e)}")
             return None
-    def _split_audio_intelligent(self, audio_path: str, input_data: Dict[str, Any] = None) -> List[Dict[str, Any]]:
-        """Split audio into chunks intelligently based on silence"""
         self._clean_directory(self.chunks_dir)
         try:
-            # Load audio
             audio = AudioSegment.from_wav(audio_path)
-            # Get configuration
             chunk_length_ms = self._get_config('chunk_length_ms', 30000, input_data)
             silence_thresh = self._get_config('silence_thresh', -40, input_data)
             min_silence_len = self._get_config('min_silence_len', 500, input_data)
-            overlap_ms = self._get_config('overlap_ms', 1000, input_data)
-            # First try to split on silence
             chunks = split_on_silence(
                 audio,
                 min_silence_len=min_silence_len,
                 silence_thresh=silence_thresh,
-                keep_silence=True
             )
-            # If no silence-based splits or chunks too large, split by time
-            if not chunks or any(len(chunk) > chunk_length_ms * 2 for chunk in chunks):
-                print("Using time-based splitting...")
-                chunks = []
-                for i in range(0, len(audio), chunk_length_ms - overlap_ms):
-                    chunk = audio[i:i + chunk_length_ms]
-                    if len(chunk) > 1000:  # Only add chunks longer than 1 second
-                        chunks.append(chunk)
-            # Save chunks and create metadata
-            chunk_data = []
-            current_time = 0
-            for i, chunk in enumerate(chunks):
-                if len(chunk) < 1000:  # Skip very short chunks
-                    continue
                 chunk_filename = os.path.join(self.chunks_dir, f"chunk_{i:04d}.wav")
-                chunk.export(chunk_filename, format="wav")
-                duration = len(chunk) / 1000.0  # in seconds
-                chunk_info = {
-                    'filename': chunk_filename,
-                    'index': i,
-                    'start_time': current_time,
-                    'duration': duration,
-                    'end_time': current_time + duration
-                }
-                chunk_data.append(chunk_info)
-                current_time += duration
             print(f"Split audio into {len(chunk_data)} chunks")
             return chunk_data
         except Exception as e:
             print(f"Error splitting audio: {str(e)}")
-            # Fallback: return original file as single chunk
-            try:
                 audio = AudioSegment.from_wav(audio_path)
                 duration = len(audio) / 1000.0
-                return [{
-                    'filename': audio_path,
-                    'index': 0,
-                    'start_time': 0,
-                    'duration': duration,
-                    'end_time': duration
-                }]
-            except:
-                return []
-    def _transcribe_audio_chunk(self, chunk_info: Dict[str, Any], input_data: Dict[str, Any] = None) -> Dict[str, Any]:
-        """Transcribe a single audio chunk"""
         chunk_path = chunk_info['filename']
         try:
             language = self._get_config('language', 'en-US', input_data)
             with sr.AudioFile(chunk_path) as source:
-                # Adjust for ambient noise
-                self.recognizer.adjust_for_ambient_noise(source, duration=0.5)
                 audio_data = self.recognizer.record(source)
-            # Try Google Speech Recognition
             try:
                 text = self.recognizer.recognize_google(audio_data, language=language)
-                return {
-                    'text': text,
-                    'confidence': 1.0,
-                    'start_time': chunk_info['start_time'],
-                    'end_time': chunk_info['end_time'],
-                    'duration': chunk_info['duration'],
-                    'index': chunk_info['index'],
-                    'success': True
-                }
             except sr.UnknownValueError:
-                # Try without language specification
-                try:
                     text = self.recognizer.recognize_google(audio_data)
-                    return {
-                        'text': text,
-                        'confidence': 0.8,
-                        'start_time': chunk_info['start_time'],
-                        'end_time': chunk_info['end_time'],
-                        'duration': chunk_info['duration'],
-                        'index': chunk_info['index'],
-                        'success': True
-                    }
                 except sr.UnknownValueError:
-                    return {
-                        'text': '[INAUDIBLE]',
-                        'confidence': 0.0,
-                        'start_time': chunk_info['start_time'],
-                        'end_time': chunk_info['end_time'],
-                        'duration': chunk_info['duration'],
-                        'index': chunk_info['index'],
-                        'success': False
-                    }
             except sr.RequestError as e:
-                return {
-                    'text': f'[RECOGNITION_ERROR: {str(e)}]',
-                    'confidence': 0.0,
-                    'start_time': chunk_info['start_time'],
-                    'end_time': chunk_info['end_time'],
-                    'duration': chunk_info['duration'],
-                    'index': chunk_info['index'],
-                    'success': False,
-                    'error': str(e)
-                }
         except Exception as e:
-            return {
-                'text': f'[ERROR: {str(e)}]',
-                'confidence': 0.0,
-                'start_time': chunk_info.get('start_time', 0),
-                'end_time': chunk_info.get('end_time', 0),
-                'duration': chunk_info.get('duration', 0),
-                'index': chunk_info.get('index', 0),
-                'success': False,
-                'error': str(e)
-            }
-    def _transcribe_chunks_parallel(self, chunk_data: List[Dict[str, Any]], input_data: Dict[str, Any] = None) -> List[Dict[str, Any]]:
-        """Transcribe audio chunks in parallel"""
         results = []
-        # Use fewer workers to avoid API rate limits
-        max_workers = min(3, len(chunk_data))
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             future_to_chunk = {
                 executor.submit(self._transcribe_audio_chunk, chunk_info, input_data): chunk_info
                 for chunk_info in chunk_data
             }
             for future in as_completed(future_to_chunk):
                 chunk_info = future_to_chunk[future]
                 try:
                     result = future.result()
                     results.append(result)
-                    if result['success']:
-                        preview = result['text'][:50] + "..." if len(result['text']) > 50 else result['text']
-                        print(f"Transcribed chunk {result['index']}: {preview}")
-                    else:
-                        print(f"Failed to transcribe chunk {result['index']}: {result['text']}")
                 except Exception as e:
                     print(f"Error processing chunk {chunk_info.get('index', '?')}: {str(e)}")
                     results.append({
-                        'text': f'[PROCESSING_ERROR: {str(e)}]',
-                        'confidence': 0.0,
-                        'start_time': chunk_info.get('start_time', 0),
-                        'end_time': chunk_info.get('end_time', 0),
-                        'duration': chunk_info.get('duration', 0),
-                        'index': chunk_info.get('index', 0),
-                        'success': False,
-                        'error': str(e)
                     })
-        # Sort results by chunk index to maintain order
         results.sort(key=lambda x: x['index'])
         return results
-    def extract_transcript(self, audio_path: str, video_hash: str, input_data: Dict[str, Any] = None) -> Dict[str, Any]:
-        """Extract complete transcript from audio file"""
         cache_enabled = self._get_config('cache_enabled', True, input_data)
         cache_path = self._get_cache_path(video_hash, "transcript.json")
-        # Check cache
         cached_transcript = self._load_from_cache(cache_path, cache_enabled)
         if cached_transcript:
             print("Using cached transcript")
             return cached_transcript
         try:
-            # Step 1: Split audio into manageable chunks
             print("Splitting audio into chunks...")
             chunk_data = self._split_audio_intelligent(audio_path, input_data)
             if not chunk_data:
-                return {
-                    'error': 'Failed to split audio into chunks',
-                    'full_transcript': '',
-                    'success_rate': 0.0
-                }
-            # Step 2: Transcribe all chunks
             print(f"Transcribing {len(chunk_data)} audio chunks...")
             transcript_results = self._transcribe_chunks_parallel(chunk_data, input_data)
-            # Step 3: Combine results
-            successful_results = [r for r in transcript_results if r['success']]
-            full_text = ' '.join([r['text'] for r in successful_results])
-            # Calculate statistics
-            total_chunks = len(transcript_results)
-            successful_chunks = len(successful_results)
-            success_rate = successful_chunks / total_chunks if total_chunks > 0 else 0
-            word_count = len(full_text.split()) if full_text else 0
             final_result = {
-                'full_transcript': full_text,
-                'word_count': word_count,
-                'total_chunks': total_chunks,
-                'successful_chunks': successful_chunks,
-                'success_rate': success_rate,
-                'extraction_timestamp': time.time(),
-                'extraction_date': time.strftime('%Y-%m-%d %H:%M:%S'),
                 'detailed_results': transcript_results
             }
-            # Cache results
             self._save_to_cache(cache_path, final_result, cache_enabled)
             print(f"Transcript extraction completed. Success rate: {success_rate:.1%}")
             return final_result
         except Exception as e:
             print(f"Error during transcript extraction: {str(e)}")
-            return {
-                'error': str(e),
-                'full_transcript': '',
-                'success_rate': 0.0
-            }
-    def _run(self, youtube_url: str, **kwargs) -> str:
-        """Main execution method"""
-        input_data = {
-            'youtube_url': youtube_url,
-            **kwargs
-        }
-        if not youtube_url:
-            return "Error: youtube_url is required."
         try:
-            # Generate video hash for caching
             video_hash = self._get_video_hash(youtube_url)
-            # Step 1: Download audio
-            print(f"Downloading YouTube audio from {youtube_url}...")
             audio_path = self.download_youtube_audio(youtube_url, video_hash, input_data)
             if not audio_path or not os.path.exists(audio_path):
-                return "Error: Failed to download the YouTube audio. Please check the URL and try again."
-            # Step 2: Extract transcript
             print("Extracting audio transcript...")
             transcript_result = self.extract_transcript(audio_path, video_hash, input_data)
-            if transcript_result.get("error"):
-                return f"Error: {transcript_result['error']}"
-            main_transcript = transcript_result.get('full_transcript', '')
-            if not main_transcript:
-                return "Error: No transcript could be extracted from the audio."
-            print(f"Transcript extracted successfully. Word count: {transcript_result.get('word_count', 0)}")
-            print(f"Success rate: {transcript_result.get('success_rate', 0):.1%}")
             return "TRANSCRIPT: " + main_transcript
         except Exception as e:
             return f"Error during transcript extraction: {str(e)}"
 # Factory function to create the tool

 from typing import TypedDict, List, Optional, Dict, Any, Annotated, Literal, Union, Tuple, Set, Type
 import time
 from collections import Counter
+from pydantic import Field, BaseModel, Extra
 import hashlib
 import json
 import numpy as np
         "Downloads a YouTube video and extracts the complete audio transcript using speech recognition. "
         "Use this tool for questions about what people say in YouTube videos. "
         "Input should be a dict with keys: 'youtube_url' and optional parameters. "
+        "Optional parameters include 'language' (e.g., 'en-US'), "
+        "'cookies_file_path' (path to a cookies TXT file for authentication), "
+        "or 'cookies_from_browser' (string specifying browser for cookies, e.g., 'chrome', 'firefox:profileName', 'edge+keyringName:profileName::containerName'). "
+        "Example: {'youtube_url': 'https://youtube.com/watch?v=xyz', 'language': 'en-US'} or "
+        "{'youtube_url': '...', 'cookies_file_path': '/path/to/cookies.txt'} or "
+        "{'youtube_url': '...', 'cookies_from_browser': 'chrome'}"
     )
     recognizer: Any = Field(default=None, exclude=True)
     class Config:
         arbitrary_types_allowed = True
+        extra = Extra.allow # Adjusted if pydantic v1 style
+    def __init__(self, **kwargs: Any):
         super().__init__(**kwargs)
         self.cache_dir = '/tmp/youtube_transcript_cache/'
         self.audio_dir = '/tmp/audio/'
         self.chunks_dir = '/tmp/audio_chunks/'
         self.recognizer = sr.Recognizer()
         self.recognizer.energy_threshold = 4000
         self.recognizer.pause_threshold = 0.8
         for dir_path in [self.cache_dir, self.audio_dir, self.chunks_dir]:
             os.makedirs(dir_path, exist_ok=True)
+    def _get_config(self, key: str, default_value: Any = None, input_data: Optional[Dict[str, Any]] = None) -> Any:
         defaults = {
             'language': 'en-US',
+            'chunk_length_ms': 30000,
+            'silence_thresh': -40,
             'audio_quality': 'best',
             'cache_enabled': True,
+            'min_silence_len': 500,
+            'overlap_ms': 1000,
+            'cookies_file_path': None,  # New: Path to a cookies file
+            'cookies_from_browser': None # New: Browser string e.g., "chrome", "firefox:profile_name"
         }
         if input_data and key in input_data:
         return defaults.get(key, default_value)
     def _get_video_hash(self, url: str) -> str:
         return hashlib.md5(url.encode()).hexdigest()
     def _get_cache_path(self, video_hash: str, cache_type: str) -> str:
         return os.path.join(self.cache_dir, f"{video_hash}_{cache_type}")
     def _load_from_cache(self, cache_path: str, cache_enabled: bool = True) -> Optional[Any]:
         if not cache_enabled or not os.path.exists(cache_path):
             return None
         try:
             return None
     def _save_to_cache(self, cache_path: str, data: Any, cache_enabled: bool = True):
         if not cache_enabled:
             return
         try:
             print(f"Error saving cache: {str(e)}")
     def _clean_directory(self, directory: str):
         if os.path.exists(directory):
             for filename in os.listdir(directory):
                 file_path = os.path.join(directory, filename)
                 except Exception as e:
                     print(f'Failed to delete {file_path}. Reason: {e}')
+    def download_youtube_audio(self, url: str, video_hash: str, input_data: Optional[Dict[str, Any]] = None) -> Optional[str]:
         audio_quality = self._get_config('audio_quality', 'best', input_data)
         output_filename = f'{video_hash}.wav'
         output_path = os.path.join(self.audio_dir, output_filename)
         cache_enabled = self._get_config('cache_enabled', True, input_data)
         if cache_enabled and os.path.exists(output_path):
             print(f"Using cached audio: {output_path}")
             return output_path
         self._clean_directory(self.audio_dir)
+        cookies_file_path = self._get_config('cookies_file_path', None, input_data)
+        cookies_from_browser_str = self._get_config('cookies_from_browser', None, input_data)
         try:
+            ydl_opts: Dict[str, Any] = {
                 'format': 'bestaudio[ext=m4a]/bestaudio/best',
                 'outtmpl': os.path.join(self.audio_dir, f'{video_hash}.%(ext)s'),
+                'quiet': False,
                 'no_warnings': False,
+                'extract_flat': False, # Ensure this is false for actual downloads
                 'writethumbnail': False,
                 'writeinfojson': False,
                 'postprocessors': [{
                     'preferredcodec': 'wav',
                     'preferredquality': '192' if audio_quality == 'best' else '128',
                 }],
                 'http_headers': {
                     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
                 },
                 'nocheckcertificate': True,
             }
+            if cookies_file_path:
+                ydl_opts['cookiefile'] = cookies_file_path
+                print(f"Using cookies from file: {cookies_file_path}")
+            elif cookies_from_browser_str:
+                parsed_browser, parsed_profile, parsed_keyring, parsed_container = None, None, None, None
+                temp_str = cookies_from_browser_str
+                if '::' in temp_str:
+                    main_part_before_container, parsed_container_val = temp_str.split('::', 1)
+                    parsed_container = parsed_container_val if parsed_container_val else None
+                    temp_str = main_part_before_container
+                if ':' in temp_str:
+                    browser_keyring_part, parsed_profile_val = temp_str.split(':', 1)
+                    parsed_profile = parsed_profile_val if parsed_profile_val else None
+                    temp_str = browser_keyring_part
+                if '+' in temp_str:
+                    parsed_browser_val, parsed_keyring_val = temp_str.split('+', 1)
+                    parsed_browser = parsed_browser_val
+                    parsed_keyring = parsed_keyring_val if parsed_keyring_val else None
+                else:
+                    parsed_browser = temp_str
+                if parsed_browser:
+                    # yt-dlp expects cookiesfrombrowser as a tuple: (BROWSER, PROFILE, KEYRING, CONTAINER)
+                    final_tuple: Tuple[Optional[str], ...] = (
+                        parsed_browser,
+                        parsed_profile,
+                        parsed_keyring,
+                        parsed_container
+                    )
+                    ydl_opts['cookiesfrombrowser'] = final_tuple
+                    print(f"Attempting to use cookies from browser spec '{cookies_from_browser_str}', parsed as: {final_tuple}")
+                else:
+                    print(f"Invalid or empty browser name in cookies_from_browser string: '{cookies_from_browser_str}'")
             with yt_dlp.YoutubeDL(ydl_opts) as ydl:
+                print(f"Downloading audio from: {url} with options: {ydl_opts}")
                 ydl.download([url])
             if os.path.exists(output_path):
                 print(f"Audio downloaded successfully: {output_path}")
                 return output_path
             else:
                 possible_files = glob.glob(os.path.join(self.audio_dir, f'{video_hash}.*'))
                 if possible_files:
                     source_file = possible_files[0]
                     if not source_file.endswith('.wav'):
                         try:
                             audio = AudioSegment.from_file(source_file)
                             audio.export(output_path, format="wav")
+                            os.remove(source_file)
                             print(f"Audio converted to WAV: {output_path}")
                             return output_path
                         except Exception as e:
                             print(f"Error converting audio: {str(e)}")
                             return None
+                    else: # Already a .wav, possibly due to postprocessor already creating it with a different ext pattern
+                        if source_file != output_path: # if names differ due to original extension
+                           shutil.move(source_file, output_path)
+                        print(f"Audio file found: {output_path}")
+                        return output_path
+                print(f"No audio file found at expected path after download: {output_path}")
                 return None
+        except yt_dlp.utils.DownloadError as de:
+            print(f"yt-dlp DownloadError: {str(de)}")
+            if "Sign in to confirm you're not a bot" in str(de) and not (cookies_file_path or cookies_from_browser_str):
+                print("Authentication required. Consider using 'cookies_file_path' or 'cookies_from_browser' options.")
+            return None
         except Exception as e:
+            print(f"Error downloading YouTube audio: {type(e).__name__} - {str(e)}")
+            # Fallback attempt is removed as it's unlikely to succeed if the primary authenticated attempt fails due to bot detection
             return None
+    def _split_audio_intelligent(self, audio_path: str, input_data: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
         self._clean_directory(self.chunks_dir)
         try:
             audio = AudioSegment.from_wav(audio_path)
             chunk_length_ms = self._get_config('chunk_length_ms', 30000, input_data)
             silence_thresh = self._get_config('silence_thresh', -40, input_data)
             min_silence_len = self._get_config('min_silence_len', 500, input_data)
+            overlap_ms = self._get_config('overlap_ms', 1000, input_data) # Not used in current split_on_silence
             chunks = split_on_silence(
                 audio,
                 min_silence_len=min_silence_len,
                 silence_thresh=silence_thresh,
+                keep_silence=True
             )
+            processed_chunks: List[AudioSegment] = [] # type: ignore
+            # Combine small chunks or re-chunk if silence splitting is ineffective
+            temp_chunk: Optional[AudioSegment] = None # type: ignore
+            for chunk in chunks:
+                if temp_chunk is None:
+                    temp_chunk = chunk
+                else:
+                    temp_chunk += chunk
+                if len(temp_chunk) > chunk_length_ms / 2 or chunk == chunks[-1]: # Arbitrary threshold to combine small chunks
+                    processed_chunks.append(temp_chunk)
+                    temp_chunk = None
+            if not processed_chunks or any(len(p_chunk) > chunk_length_ms * 1.5 for p_chunk in processed_chunks): # If still problematic
+                print("Using time-based splitting due to ineffective silence splitting or overly large chunks...")
+                processed_chunks = []
+                for i in range(0, len(audio), chunk_length_ms - overlap_ms):
+                    chunk_segment = audio[i:i + chunk_length_ms]
+                    if len(chunk_segment) > 1000:
+                        processed_chunks.append(chunk_segment)
+            chunk_data = []
+            current_time_ms = 0
+            for i, chunk_segment in enumerate(processed_chunks):
+                if len(chunk_segment) < 1000: continue
                 chunk_filename = os.path.join(self.chunks_dir, f"chunk_{i:04d}.wav")
+                chunk_segment.export(chunk_filename, format="wav")
+                duration_s = len(chunk_segment) / 1000.0
+                start_time_s = current_time_ms / 1000.0
+                end_time_s = start_time_s + duration_s
+                chunk_data.append({
+                    'filename': chunk_filename, 'index': i,
+                    'start_time': start_time_s, 'duration': duration_s, 'end_time': end_time_s
+                })
+                current_time_ms += len(chunk_segment) # Approximation, true timestamping is harder
             print(f"Split audio into {len(chunk_data)} chunks")
             return chunk_data
         except Exception as e:
             print(f"Error splitting audio: {str(e)}")
+            try: # Fallback: single chunk
                 audio = AudioSegment.from_wav(audio_path)
                 duration = len(audio) / 1000.0
+                return [{'filename': audio_path, 'index': 0, 'start_time': 0, 'duration': duration, 'end_time': duration}]
+            except: return []
+    def _transcribe_audio_chunk(self, chunk_info: Dict[str, Any], input_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         chunk_path = chunk_info['filename']
+        base_result = {
+            'start_time': chunk_info.get('start_time', 0), 'end_time': chunk_info.get('end_time', 0),
+            'duration': chunk_info.get('duration', 0), 'index': chunk_info.get('index', -1),
+            'success': False, 'confidence': 0.0
+        }
         try:
             language = self._get_config('language', 'en-US', input_data)
             with sr.AudioFile(chunk_path) as source:
+                self.recognizer.adjust_for_ambient_noise(source, duration=0.2) # Shorter adjustment
                 audio_data = self.recognizer.record(source)
             try:
                 text = self.recognizer.recognize_google(audio_data, language=language)
+                return {**base_result, 'text': text, 'confidence': 1.0, 'success': True}
             except sr.UnknownValueError:
+                try: # Try without specific language
                     text = self.recognizer.recognize_google(audio_data)
+                    return {**base_result, 'text': text, 'confidence': 0.8, 'success': True} # Lower confidence
                 except sr.UnknownValueError:
+                    return {**base_result, 'text': '[INAUDIBLE]'}
             except sr.RequestError as e:
+                return {**base_result, 'text': f'[RECOGNITION_ERROR: {str(e)}]', 'error': str(e)}
         except Exception as e:
+            return {**base_result, 'text': f'[ERROR: {str(e)}]', 'error': str(e)}
+    def _transcribe_chunks_parallel(self, chunk_data: List[Dict[str, Any]], input_data: Optional[Dict[str, Any]] = None) -> List[Dict[str, Any]]:
         results = []
+        max_workers = min(os.cpu_count() or 1, 4) # Limit workers
         with ThreadPoolExecutor(max_workers=max_workers) as executor:
             future_to_chunk = {
                 executor.submit(self._transcribe_audio_chunk, chunk_info, input_data): chunk_info
                 for chunk_info in chunk_data
             }
             for future in as_completed(future_to_chunk):
                 chunk_info = future_to_chunk[future]
                 try:
                     result = future.result()
                     results.append(result)
+                    status = "Transcribed" if result['success'] else "Failed"
+                    preview = result['text'][:50] + "..." if len(result['text']) > 50 else result['text']
+                    print(f"{status} chunk {result['index']}: {preview}")
                 except Exception as e:
                     print(f"Error processing chunk {chunk_info.get('index', '?')}: {str(e)}")
                     results.append({
+                        'text': f'[PROCESSING_ERROR: {str(e)}]', 'confidence': 0.0,
+                        'start_time': chunk_info.get('start_time', 0), 'end_time': chunk_info.get('end_time', 0),
+                        'duration': chunk_info.get('duration', 0), 'index': chunk_info.get('index', 0),
+                        'success': False, 'error': str(e)
                     })
         results.sort(key=lambda x: x['index'])
         return results
+    def extract_transcript(self, audio_path: str, video_hash: str, input_data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]:
         cache_enabled = self._get_config('cache_enabled', True, input_data)
         cache_path = self._get_cache_path(video_hash, "transcript.json")
         cached_transcript = self._load_from_cache(cache_path, cache_enabled)
         if cached_transcript:
             print("Using cached transcript")
             return cached_transcript
         try:
             print("Splitting audio into chunks...")
             chunk_data = self._split_audio_intelligent(audio_path, input_data)
             if not chunk_data:
+                return {'error': 'Failed to split audio', 'full_transcript': '', 'success_rate': 0.0}
             print(f"Transcribing {len(chunk_data)} audio chunks...")
             transcript_results = self._transcribe_chunks_parallel(chunk_data, input_data)
+            successful_chunks = [r for r in transcript_results if r['success']]
+            full_text = ' '.join([r['text'] for r in successful_chunks if r['text'] and '[INAUDIBLE]' not in r['text'] and 'ERROR' not in r['text']]).strip()
+            total_c = len(transcript_results)
+            successful_c = len(successful_chunks)
+            success_rate = successful_c / total_c if total_c > 0 else 0.0
             final_result = {
+                'full_transcript': full_text, 'word_count': len(full_text.split()),
+                'total_chunks': total_c, 'successful_chunks': successful_c, 'success_rate': success_rate,
+                'extraction_timestamp': time.time(), 'extraction_date': time.strftime('%Y-%m-%d %H:%M:%S'),
                 'detailed_results': transcript_results
             }
             self._save_to_cache(cache_path, final_result, cache_enabled)
             print(f"Transcript extraction completed. Success rate: {success_rate:.1%}")
             return final_result
         except Exception as e:
             print(f"Error during transcript extraction: {str(e)}")
+            return {'error': str(e), 'full_transcript': '', 'success_rate': 0.0}
+    def _run(self, youtube_url: str, **kwargs: Any) -> str:
+        input_data = {'youtube_url': youtube_url, **kwargs}
+        if not youtube_url: return "Error: youtube_url is required."
         try:
             video_hash = self._get_video_hash(youtube_url)
+            print(f"Processing YouTube URL: {youtube_url} (Hash: {video_hash})")
             audio_path = self.download_youtube_audio(youtube_url, video_hash, input_data)
             if not audio_path or not os.path.exists(audio_path):
+                return "Error: Failed to download YouTube audio. Check URL or authentication (cookies)."
             print("Extracting audio transcript...")
             transcript_result = self.extract_transcript(audio_path, video_hash, input_data)
+            if transcript_result.get("error"): return f"Error: {transcript_result['error']}"
+            main_transcript = transcript_result.get('full_transcript', '')
+            if not main_transcript: return "Error: No transcript could be extracted."
+            print(f"Transcript extracted. Word count: {transcript_result.get('word_count',0)}. Success: {transcript_result.get('success_rate',0):.1%}")
             return "TRANSCRIPT: " + main_transcript
         except Exception as e:
+            print(f"Unhandled error in _run: {str(e)}") # For debugging
             return f"Error during transcript extraction: {str(e)}"
 # Factory function to create the tool