Spaces:
Running
Running
Daniel Amendoeira
commited on
Update tools.py
Browse files
tools.py
CHANGED
@@ -4,9 +4,10 @@ import requests
|
|
4 |
import openai
|
5 |
import os
|
6 |
import tempfile
|
7 |
-
from urllib.parse import urlparse
|
8 |
from openai import OpenAI
|
9 |
from youtube_transcript_api import YouTubeTranscriptApi
|
|
|
10 |
from pytube import extract
|
11 |
from openai import OpenAI
|
12 |
|
@@ -128,13 +129,21 @@ def transcribe_youtube(youtube_url: str) -> str:
|
|
128 |
youtube_url (str): youtube video's url
|
129 |
"""
|
130 |
try:
|
131 |
-
|
132 |
-
|
133 |
-
|
|
|
|
|
|
|
|
|
|
|
134 |
# keep only text
|
135 |
-
text = '\n'.join([
|
136 |
return text
|
137 |
-
|
|
|
|
|
|
|
138 |
except Exception as e:
|
139 |
return f"transcribe_youtube failed: {e}"
|
140 |
|
|
|
4 |
import openai
|
5 |
import os
|
6 |
import tempfile
|
7 |
+
from urllib.parse import urlparse, parse_qs
|
8 |
from openai import OpenAI
|
9 |
from youtube_transcript_api import YouTubeTranscriptApi
|
10 |
+
from youtube_transcript_api._errors import TranscriptsDisabled, NoTranscriptFound, VideoUnavailable
|
11 |
from pytube import extract
|
12 |
from openai import OpenAI
|
13 |
|
|
|
129 |
youtube_url (str): youtube video's url
|
130 |
"""
|
131 |
try:
|
132 |
+
query = urlparse(youtube_url).query
|
133 |
+
video_id = parse_qs(query)['v'][0]
|
134 |
+
except Exception:
|
135 |
+
return "invalid YouTube URL"
|
136 |
+
|
137 |
+
try:
|
138 |
+
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
139 |
+
transcript = transcript_list.find_transcript(['en']).fetch()
|
140 |
# keep only text
|
141 |
+
text = '\n'.join([t['text'] for t in transcript])
|
142 |
return text
|
143 |
+
|
144 |
+
except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable) as e:
|
145 |
+
return f"transcript unavailable: {str(e)}"
|
146 |
+
|
147 |
except Exception as e:
|
148 |
return f"transcribe_youtube failed: {e}"
|
149 |
|