playwright2 / index.js
deepak191z's picture
Update index.js
57a02ae verified
const express = require("express");
const { chromium } = require("playwright");
const app = express();
const PORT = process.env.PORT || 7860;
let browser;
// Launch Playwright browser once at startup
(async () => {
browser = await chromium.launch({
headless: true,
args: [
"--no-sandbox",
"--disable-setuid-sandbox",
"--disable-dev-shm-usage"
]
});
})();
// Core scraping function
async function scrapeChannelVideos(channelName) {
const url = `https://www.youtube.com/@${channelName}/videos`;
const context = await browser.newContext({
userAgent:
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " +
"(KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
});
const page = await context.newPage();
try {
await page.goto(url, { waitUntil: "networkidle", timeout: 20000 });
await page.waitForSelector('a#video-title-link', { timeout: 10000 });
const videos = await page.$$eval("#dismissible", (els) =>
els.slice(0, 31).map((el) => {
const anchor = el.querySelector('a#video-title-link');
const img = el.querySelector("img");
const meta = el.querySelector("#metadata-line span");
const vidUrl = anchor?.href || "";
const vidIdMatch = vidUrl.match(/v=([^&]+)/);
return {
title: anchor?.title || anchor?.textContent.trim() || "",
videoId: vidIdMatch?.[1] || null,
url: vidUrl,
thumbnail:
vidIdMatch && `https://img.youtube.com/vi/${vidIdMatch[1]}/maxresdefault.jpg`,
published: meta?.textContent.trim() || ""
};
})
);
await context.close();
return videos;
} catch (err) {
await context.close();
throw err;
}
}
// Home route
app.get("/", (req, res) => {
res.json({
message: "Welcome to the YouTube Video Scraper API",
docs: "/api",
example: "/api/video/MrBeast"
});
});
// API landing/documentation route
app.get("/api", (req, res) => {
res.json({
endpoints: [
{
route: "/api/video/:channelName",
method: "GET",
description: "Scrape the latest 3 videos for a given YouTube channel"
}
]
});
});
// Dynamic video-scraping endpoint
app.get("/api/video/:channelName", async (req, res) => {
try {
const channel = req.params.channelName;
const videos = await scrapeChannelVideos(channel);
res.json({ channel, videos, timestamp: new Date().toISOString() });
} catch (error) {
res.status(500).json({ error: error.message });
}
});
app.listen(PORT, () => {
console.log(`Server listening on port ${PORT}`);
});