Final_Assignment_Project

Sleeping

App Files Files Community

wt002 commited on 27 days ago

Commit

07f3838

verified ·

1 Parent(s): dd180a2

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -73

app.py CHANGED Viewed

@@ -363,85 +363,42 @@ class ImageAnalysisTool:
         return self._run(image_url)
-import os
-import requests
-from transformers import pipeline
-import yt_dlp
-# Assuming 'tool' decorator and other smolagents components are imported
-from smolagents import tool, FinalAnswerTool, DuckDuckGoSearchTool, HfApiModel, CodeAgent # Add other necessary imports
-# --- Custom VideoTranscriptionTool Class ---
-class VideoTranscriptionTool:
-    """
-    A tool for transcribing audio from YouTube videos using Whisper.
-    """
-    name = "video_transcription"
-    description = (
-        "Transcribes the audio from a given YouTube video URL and returns the text content. "
-        "Useful for getting text from video lectures, interviews, etc."
-    )
     inputs = {
-        "video_url": {
-            "type": "string",
-            "description": "The URL of the YouTube video to transcribe (e.g., 'https://www.youtube.com/watch?v=dQw4w9WgXcQ').",
-        }
     }
-    def __init__(self):
-        # Initialize the Whisper ASR pipeline only once
-        self.transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-    def _run(self, video_url: str) -> str:
-        """
-        Downloads the audio from the video and transcribes it.
-        """
-        temp_audio_file = "temp_audio.mp3"
         try:
-            # 1. Download audio from YouTube video
-            ydl_opts = {
-                'format': 'bestaudio/best',
-                'postprocessors': [{
-                    'key': 'FFmpegExtractAudio',
-                    'preferredcodec': 'mp3',
-                    'preferredquality': '192',
-                }],
-                'outtmpl': temp_audio_file, # Specify output filename
-            }
-            with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-                info_dict = ydl.extract_info(video_url, download=True)
-                # yt_dlp sometimes adds an extension, ensure we get the right name
-                downloaded_file = ydl.prepare_filename(info_dict)
-                if not downloaded_file.endswith(".mp3"):
-                     # This might happen if the original format was already mp3 or similar
-                     downloaded_file = os.path.splitext(downloaded_file)[0] + ".mp3"
-            if not os.path.exists(downloaded_file):
-                return f"Error: Could not download audio from {video_url}"
-            # 2. Transcribe the audio
-            transcription_result = self.transcriber(downloaded_file)
-            transcribed_text = transcription_result['text']
-            return transcribed_text
-        except yt_dlp.DownloadError as e:
-            return f"Error downloading video: {e}"
         except Exception as e:
-            return f"An error occurred during transcription: {e}"
-        finally:
-            # Clean up the temporary audio file
-            if os.path.exists(temp_audio_file):
-                os.remove(temp_audio_file)
-            # Remove any other potential temporary files created by yt_dlp
-            # This is a bit tricky, yt_dlp can create .ytdl files or similar
-            for f in os.listdir('.'):
-                if f.startswith(os.path.splitext(os.path.basename(temp_audio_file))[0]) and f != temp_audio_file:
-                    os.remove(f)
-    def __call__(self, video_url: str) -> str:
-        return self._run(video_url)
 class BasicAgent:

         return self._run(image_url)
+class VideoTranscriptionTool(Tool):
+    """Fetch transcripts from YouTube videos"""
+    name = "transcript_video"
+    description = "Fetch text transcript from YouTube movies with optional timestamps"
     inputs = {
+        "url": {"type": "string", "description": "YouTube video URL or ID"},
+        "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
     }
+    output_type = "string"
+    def forward(self, url: str, include_timestamps: bool = False) -> str:
+        if "youtube.com/watch" in url:
+            video_id = url.split("v=")[1].split("&")[0]
+        elif "youtu.be/" in url:
+            video_id = url.split("youtu.be/")[1].split("?")[0]
+        elif len(url.strip()) == 11:  # Direct ID
+            video_id = url.strip()
+        else:
+            return f"YouTube URL or ID: {url} is invalid!"
         try:
+            transcription = YouTubeTranscriptApi.get_transcript(video_id)
+            if include_timestamps:
+                formatted_transcription = []
+                for part in transcription:
+                    timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
+                    formatted_transcription.append(f"[{timestamp}] {part['text']}")
+                return "\n".join(formatted_transcription)
+            else:
+                return " ".join([part['text'] for part in transcription])
         except Exception as e:
+            return f"Error in extracting YouTube transcript: {str(e)}"
 class BasicAgent: