Spaces:
Paused
Paused
fix(websearch): block protocols on playwright (#1579)
Browse files* fix(websearch): block protocols on playwright
* fix: only allow https
src/lib/server/websearch/scrape/playwright.ts
CHANGED
|
@@ -68,7 +68,18 @@ export async function withPage<T>(
|
|
| 68 |
|
| 69 |
try {
|
| 70 |
const page = await ctx.newPage();
|
| 71 |
-
env.PLAYWRIGHT_ADBLOCKER === "true"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
const res = await page
|
| 74 |
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
|
|
@@ -78,9 +89,8 @@ export async function withPage<T>(
|
|
| 78 |
);
|
| 79 |
});
|
| 80 |
|
| 81 |
-
// await needed here so that we don't close the context before the callback is done
|
| 82 |
return await callback(page, res ?? undefined);
|
| 83 |
} finally {
|
| 84 |
-
ctx.close();
|
| 85 |
}
|
| 86 |
}
|
|
|
|
| 68 |
|
| 69 |
try {
|
| 70 |
const page = await ctx.newPage();
|
| 71 |
+
if (env.PLAYWRIGHT_ADBLOCKER === "true") {
|
| 72 |
+
await blocker.enableBlockingInPage(page);
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
await page.route("**", (route, request) => {
|
| 76 |
+
const requestUrl = request.url();
|
| 77 |
+
if (!requestUrl.startsWith("https://")) {
|
| 78 |
+
logger.warn(`Blocked request to: ${requestUrl}`);
|
| 79 |
+
return route.abort();
|
| 80 |
+
}
|
| 81 |
+
return route.continue();
|
| 82 |
+
});
|
| 83 |
|
| 84 |
const res = await page
|
| 85 |
.goto(url, { waitUntil: "load", timeout: parseInt(env.WEBSEARCH_TIMEOUT) })
|
|
|
|
| 89 |
);
|
| 90 |
});
|
| 91 |
|
|
|
|
| 92 |
return await callback(page, res ?? undefined);
|
| 93 |
} finally {
|
| 94 |
+
await ctx.close();
|
| 95 |
}
|
| 96 |
}
|