wt002 commited on
Commit
07f3838
·
verified ·
1 Parent(s): dd180a2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -73
app.py CHANGED
@@ -363,85 +363,42 @@ class ImageAnalysisTool:
363
  return self._run(image_url)
364
 
365
 
366
- import os
367
- import requests
368
- from transformers import pipeline
369
- import yt_dlp
370
-
371
- # Assuming 'tool' decorator and other smolagents components are imported
372
- from smolagents import tool, FinalAnswerTool, DuckDuckGoSearchTool, HfApiModel, CodeAgent # Add other necessary imports
373
-
374
- # --- Custom VideoTranscriptionTool Class ---
375
-
376
- class VideoTranscriptionTool:
377
- """
378
- A tool for transcribing audio from YouTube videos using Whisper.
379
- """
380
- name = "video_transcription"
381
- description = (
382
- "Transcribes the audio from a given YouTube video URL and returns the text content. "
383
- "Useful for getting text from video lectures, interviews, etc."
384
- )
385
  inputs = {
386
- "video_url": {
387
- "type": "string",
388
- "description": "The URL of the YouTube video to transcribe (e.g., 'https://www.youtube.com/watch?v=dQw4w9WgXcQ').",
389
- }
390
  }
 
391
 
392
- def __init__(self):
393
- # Initialize the Whisper ASR pipeline only once
394
- self.transcriber = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
 
 
 
 
 
 
 
395
 
396
- def _run(self, video_url: str) -> str:
397
- """
398
- Downloads the audio from the video and transcribes it.
399
- """
400
- temp_audio_file = "temp_audio.mp3"
401
  try:
402
- # 1. Download audio from YouTube video
403
- ydl_opts = {
404
- 'format': 'bestaudio/best',
405
- 'postprocessors': [{
406
- 'key': 'FFmpegExtractAudio',
407
- 'preferredcodec': 'mp3',
408
- 'preferredquality': '192',
409
- }],
410
- 'outtmpl': temp_audio_file, # Specify output filename
411
- }
412
- with yt_dlp.YoutubeDL(ydl_opts) as ydl:
413
- info_dict = ydl.extract_info(video_url, download=True)
414
- # yt_dlp sometimes adds an extension, ensure we get the right name
415
- downloaded_file = ydl.prepare_filename(info_dict)
416
- if not downloaded_file.endswith(".mp3"):
417
- # This might happen if the original format was already mp3 or similar
418
- downloaded_file = os.path.splitext(downloaded_file)[0] + ".mp3"
419
-
420
- if not os.path.exists(downloaded_file):
421
- return f"Error: Could not download audio from {video_url}"
422
-
423
- # 2. Transcribe the audio
424
- transcription_result = self.transcriber(downloaded_file)
425
- transcribed_text = transcription_result['text']
426
-
427
- return transcribed_text
428
-
429
- except yt_dlp.DownloadError as e:
430
- return f"Error downloading video: {e}"
431
  except Exception as e:
432
- return f"An error occurred during transcription: {e}"
433
- finally:
434
- # Clean up the temporary audio file
435
- if os.path.exists(temp_audio_file):
436
- os.remove(temp_audio_file)
437
- # Remove any other potential temporary files created by yt_dlp
438
- # This is a bit tricky, yt_dlp can create .ytdl files or similar
439
- for f in os.listdir('.'):
440
- if f.startswith(os.path.splitext(os.path.basename(temp_audio_file))[0]) and f != temp_audio_file:
441
- os.remove(f)
442
-
443
- def __call__(self, video_url: str) -> str:
444
- return self._run(video_url)
445
 
446
 
447
  class BasicAgent:
 
363
  return self._run(image_url)
364
 
365
 
366
+ class VideoTranscriptionTool(Tool):
367
+ """Fetch transcripts from YouTube videos"""
368
+ name = "transcript_video"
369
+ description = "Fetch text transcript from YouTube movies with optional timestamps"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  inputs = {
371
+ "url": {"type": "string", "description": "YouTube video URL or ID"},
372
+ "include_timestamps": {"type": "boolean", "description": "If timestamps should be included in output", "nullable": True}
 
 
373
  }
374
+ output_type = "string"
375
 
376
+ def forward(self, url: str, include_timestamps: bool = False) -> str:
377
+
378
+ if "youtube.com/watch" in url:
379
+ video_id = url.split("v=")[1].split("&")[0]
380
+ elif "youtu.be/" in url:
381
+ video_id = url.split("youtu.be/")[1].split("?")[0]
382
+ elif len(url.strip()) == 11: # Direct ID
383
+ video_id = url.strip()
384
+ else:
385
+ return f"YouTube URL or ID: {url} is invalid!"
386
 
 
 
 
 
 
387
  try:
388
+ transcription = YouTubeTranscriptApi.get_transcript(video_id)
389
+
390
+ if include_timestamps:
391
+ formatted_transcription = []
392
+ for part in transcription:
393
+ timestamp = f"{int(part['start']//60)}:{int(part['start']%60):02d}"
394
+ formatted_transcription.append(f"[{timestamp}] {part['text']}")
395
+ return "\n".join(formatted_transcription)
396
+ else:
397
+ return " ".join([part['text'] for part in transcription])
398
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
  except Exception as e:
400
+ return f"Error in extracting YouTube transcript: {str(e)}"
401
+
 
 
 
 
 
 
 
 
 
 
 
402
 
403
 
404
  class BasicAgent: