Daniel Amendoeira commited on
Commit
083f54c
·
verified ·
1 Parent(s): 0c7927e

Update tools.py

Browse files
Files changed (1) hide show
  1. tools.py +15 -6
tools.py CHANGED
@@ -4,9 +4,10 @@ import requests
4
  import openai
5
  import os
6
  import tempfile
7
- from urllib.parse import urlparse
8
  from openai import OpenAI
9
  from youtube_transcript_api import YouTubeTranscriptApi
 
10
  from pytube import extract
11
  from openai import OpenAI
12
 
@@ -128,13 +129,21 @@ def transcribe_youtube(youtube_url: str) -> str:
128
  youtube_url (str): youtube video's url
129
  """
130
  try:
131
- video_id = extract.video_id(youtube_url)
132
- transcript = YouTubeTranscriptApi.get_transcript(video_id)
133
-
 
 
 
 
 
134
  # keep only text
135
- text = '\n'.join([s['text'] for s in transcript])
136
  return text
137
-
 
 
 
138
  except Exception as e:
139
  return f"transcribe_youtube failed: {e}"
140
 
 
4
  import openai
5
  import os
6
  import tempfile
7
+ from urllib.parse import urlparse, parse_qs
8
  from openai import OpenAI
9
  from youtube_transcript_api import YouTubeTranscriptApi
10
+ from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
11
  from pytube import extract
12
  from openai import OpenAI
13
 
 
129
  youtube_url (str): youtube video's url
130
  """
131
  try:
132
+ query = urlparse(youtube_url).query
133
+ video_id = parse_qs(query)['v'][0]
134
+ except Exception:
135
+ return "invalid YouTube URL"
136
+
137
+ try:
138
+ transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
139
+ transcript = transcript_list.find_transcript(['en']).fetch()
140
  # keep only text
141
+ text = '\n'.join([t['text'] for t in transcript])
142
  return text
143
+
144
+ except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable) as e:
145
+ return f"transcript unavailable: {str(e)}"
146
+
147
  except Exception as e:
148
  return f"transcribe_youtube failed: {e}"
149