Spaces:

naveenus
/

video-fit-score

Sleeping

naveenus commited on Jul 3

Commit

18e7e25

verified ·

1 Parent(s): 5e5049b

Create youtube_scraper.py

Files changed (1) hide show

youtube_scraper.py ADDED Viewed

+import requests
+from bs4 import BeautifulSoup
+from urllib.parse import urlparse, parse_qs
+from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, VideoUnavailable
+def extract_video_id(url: str) -> str:
+    qs = parse_qs(urlparse(url).query)
+    return qs.get('v', [urlparse(url).path.split('/')[-1]])[0]
+def scrape_metadata(url: str):
+    r = requests.get(url); r.raise_for_status()
+    soup = BeautifulSoup(r.text, 'html.parser')
+    title = soup.title.string or ""
+    desc  = soup.find('meta', {'name':'description'})
+    return title, (desc['content'] if desc else "")
+def fetch_transcript(video_id: str):
+    try:
+        fetched = YouTubeTranscriptApi().fetch(video_id)
+        return fetched.to_raw_data()      # JSON-friendly list of dicts :contentReference[oaicite:7]{index=7}
+    except (TranscriptsDisabled, VideoUnavailable):
+        return []
+def get_youtube_info(url: str):
+    vid = extract_video_id(url)
+    title, desc = scrape_metadata(url)
+    captions = fetch_transcript(vid)
+    return {"videoId": vid, "title": title, "description": desc, "captions": captions}