Spaces:
Running
Running
const express = require("express"); | |
const { chromium } = require("playwright"); | |
const app = express(); | |
const PORT = process.env.PORT || 7860; | |
let browser; | |
// Launch Playwright browser once at startup | |
(async () => { | |
browser = await chromium.launch({ | |
headless: true, | |
args: [ | |
"--no-sandbox", | |
"--disable-setuid-sandbox", | |
"--disable-dev-shm-usage" | |
] | |
}); | |
})(); | |
// Core scraping function | |
async function scrapeChannelVideos(channelName) { | |
const url = `https://www.youtube.com/@${channelName}/videos`; | |
const context = await browser.newContext({ | |
userAgent: | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + | |
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" | |
}); | |
const page = await context.newPage(); | |
try { | |
await page.goto(url, { waitUntil: "networkidle", timeout: 20000 }); | |
await page.waitForSelector('a#video-title-link', { timeout: 10000 }); | |
const videos = await page.$$eval("#dismissible", (els) => | |
els.slice(0, 31).map((el) => { | |
const anchor = el.querySelector('a#video-title-link'); | |
const img = el.querySelector("img"); | |
const meta = el.querySelector("#metadata-line span"); | |
const vidUrl = anchor?.href || ""; | |
const vidIdMatch = vidUrl.match(/v=([^&]+)/); | |
return { | |
title: anchor?.title || anchor?.textContent.trim() || "", | |
videoId: vidIdMatch?.[1] || null, | |
url: vidUrl, | |
thumbnail: | |
vidIdMatch && `https://img.youtube.com/vi/${vidIdMatch[1]}/maxresdefault.jpg`, | |
published: meta?.textContent.trim() || "" | |
}; | |
}) | |
); | |
await context.close(); | |
return videos; | |
} catch (err) { | |
await context.close(); | |
throw err; | |
} | |
} | |
// Home route | |
app.get("/", (req, res) => { | |
res.json({ | |
message: "Welcome to the YouTube Video Scraper API", | |
docs: "/api", | |
example: "/api/video/MrBeast" | |
}); | |
}); | |
// API landing/documentation route | |
app.get("/api", (req, res) => { | |
res.json({ | |
endpoints: [ | |
{ | |
route: "/api/video/:channelName", | |
method: "GET", | |
description: "Scrape the latest 3 videos for a given YouTube channel" | |
} | |
] | |
}); | |
}); | |
// Dynamic video-scraping endpoint | |
app.get("/api/video/:channelName", async (req, res) => { | |
try { | |
const channel = req.params.channelName; | |
const videos = await scrapeChannelVideos(channel); | |
res.json({ channel, videos, timestamp: new Date().toISOString() }); | |
} catch (error) { | |
res.status(500).json({ error: error.message }); | |
} | |
}); | |
app.listen(PORT, () => { | |
console.log(`Server listening on port ${PORT}`); | |
}); | |