Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -363,85 +363,42 @@ class ImageAnalysisTool:
|
|
363 |
return self._run(image_url)
|
364 |
|
365 |
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
# Assuming 'tool' decorator and other smolagents components are imported
|
372 |
-
from smolagents import tool, FinalAnswerTool, DuckDuckGoSearchTool, HfApiModel, CodeAgent # Add other necessary imports
|
373 |
-
|
374 |
-
# --- Custom VideoTranscriptionTool Class ---
|
375 |
-
|
376 |
-
class VideoTranscriptionTool:
|
377 |
-
"""
|
378 |
-
A tool for transcribing audio from YouTube videos using Whisper.
|
379 |
-
"""
|
380 |
-
name = "video_transcription"
|
381 |
-
description = (
|
382 |
-
"Transcribes the audio from a given YouTube video URL and returns the text content. "
|
383 |
-
"Useful for getting text from video lectures, interviews, etc."
|
384 |
-
)
|
385 |
inputs = {
|
386 |
-
"
|
387 |
-
|
388 |
-
"description": "The URL of the YouTube video to transcribe (e.g., 'https://www.youtube.com/watch?v=dQw4w9WgXcQ').",
|
389 |
-
}
|
390 |
}
|
|
|
391 |
|
392 |
-
def
|
393 |
-
|
394 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
395 |
|
396 |
-
def _run(self, video_url: str) -> str:
|
397 |
-
"""
|
398 |
-
Downloads the audio from the video and transcribes it.
|
399 |
-
"""
|
400 |
-
temp_audio_file = "temp_audio.mp3"
|
401 |
try:
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
'
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
|
413 |
-
info_dict = ydl.extract_info(video_url, download=True)
|
414 |
-
# yt_dlp sometimes adds an extension, ensure we get the right name
|
415 |
-
downloaded_file = ydl.prepare_filename(info_dict)
|
416 |
-
if not downloaded_file.endswith(".mp3"):
|
417 |
-
# This might happen if the original format was already mp3 or similar
|
418 |
-
downloaded_file = os.path.splitext(downloaded_file)[0] + ".mp3"
|
419 |
-
|
420 |
-
if not os.path.exists(downloaded_file):
|
421 |
-
return f"Error: Could not download audio from {video_url}"
|
422 |
-
|
423 |
-
# 2. Transcribe the audio
|
424 |
-
transcription_result = self.transcriber(downloaded_file)
|
425 |
-
transcribed_text = transcription_result['text']
|
426 |
-
|
427 |
-
return transcribed_text
|
428 |
-
|
429 |
-
except yt_dlp.DownloadError as e:
|
430 |
-
return f"Error downloading video: {e}"
|
431 |
except Exception as e:
|
432 |
-
return f"
|
433 |
-
|
434 |
-
# Clean up the temporary audio file
|
435 |
-
if os.path.exists(temp_audio_file):
|
436 |
-
os.remove(temp_audio_file)
|
437 |
-
# Remove any other potential temporary files created by yt_dlp
|
438 |
-
# This is a bit tricky, yt_dlp can create .ytdl files or similar
|
439 |
-
for f in os.listdir('.'):
|
440 |
-
if f.startswith(os.path.splitext(os.path.basename(temp_audio_file))[0]) and f != temp_audio_file:
|
441 |
-
os.remove(f)
|
442 |
-
|
443 |
-
def __call__(self, video_url: str) -> str:
|
444 |
-
return self._run(video_url)
|
445 |
|
446 |
|
447 |
class BasicAgent:
|
|
|
363 |
return self._run(image_url)
|
364 |
|
365 |
|
366 |
+
class VideoTranscriptionTool(Tool):
|
367 |
+
"""Fetch transcripts from YouTube videos"""
|
368 |
+
name = "transcript_video"
|
369 |
+
description = "Fetch text transcript from YouTube movies with optional timestamps"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
inputs = {
|
371 |
+
"url": {"type": "string", "description": "YouTube video URL or ID"},
|
372 |
+
"include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
|
|
|
|
|
373 |
}
|
374 |
+
output_type = "string"
|
375 |
|
376 |
+
def forward(self, url: str, include_timestamps: bool = False) -> str:
|
377 |
+
|
378 |
+
if "youtube.com/watch" in url:
|
379 |
+
video_id = url.split("v=")[1].split("&")[0]
|
380 |
+
elif "youtu.be/" in url:
|
381 |
+
video_id = url.split("youtu.be/")[1].split("?")[0]
|
382 |
+
elif len(url.strip()) == 11: # Direct ID
|
383 |
+
video_id = url.strip()
|
384 |
+
else:
|
385 |
+
return f"YouTube URL or ID: {url} is invalid!"
|
386 |
|
|
|
|
|
|
|
|
|
|
|
387 |
try:
|
388 |
+
transcription = YouTubeTranscriptApi.get_transcript(video_id)
|
389 |
+
|
390 |
+
if include_timestamps:
|
391 |
+
formatted_transcription = []
|
392 |
+
for part in transcription:
|
393 |
+
timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
|
394 |
+
formatted_transcription.append(f"[{timestamp}] {part['text']}")
|
395 |
+
return "\n".join(formatted_transcription)
|
396 |
+
else:
|
397 |
+
return " ".join([part['text'] for part in transcription])
|
398 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
except Exception as e:
|
400 |
+
return f"Error in extracting YouTube transcript: {str(e)}"
|
401 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
|
403 |
|
404 |
class BasicAgent:
|