kraken9054 nsarrazin HF Staff commited on
Commit
99e8e46
·
unverified ·
1 Parent(s): 295d9d9

feat: playwright-adblocked toggle #1298 (#1301)

Browse files

* feat: playwright-adblocked toggle

* lint

* Update src/lib/server/websearch/scrape/playwright.ts

Co-authored-by: Nathan Sarrazin <[email protected]>

* replace regex with equality check

---------

Co-authored-by: Nathan Sarrazin <[email protected]>

.env CHANGED
@@ -27,6 +27,7 @@ SERPSTACK_API_KEY=#your serpstack api key here
27
  SEARCHAPI_KEY=#your searchapi api key here
28
  USE_LOCAL_WEBSEARCH=#set to true to parse google results yourself, overrides other API keys
29
  SEARXNG_QUERY_URL=# where '<query>' will be replaced with query keywords see https://docs.searxng.org/dev/search_api.html eg https://searxng.yourdomain.com/search?q=<query>&engines=duckduckgo,google&format=json
 
30
 
31
  WEBSEARCH_ALLOWLIST=`[]` # if it's defined, allow websites from only this list.
32
  WEBSEARCH_BLOCKLIST=`[]` # if it's defined, block websites from this list.
 
27
  SEARCHAPI_KEY=#your searchapi api key here
28
  USE_LOCAL_WEBSEARCH=#set to true to parse google results yourself, overrides other API keys
29
  SEARXNG_QUERY_URL=# where '<query>' will be replaced with query keywords see https://docs.searxng.org/dev/search_api.html eg https://searxng.yourdomain.com/search?q=<query>&engines=duckduckgo,google&format=json
30
+ PLAYWRIGHT_ADBLOCKER=true
31
 
32
  WEBSEARCH_ALLOWLIST=`[]` # if it's defined, allow websites from only this list.
33
  WEBSEARCH_BLOCKLIST=`[]` # if it's defined, block websites from this list.
src/lib/server/websearch/scrape/playwright.ts CHANGED
@@ -65,7 +65,7 @@ export async function withPage<T>(
65
 
66
  try {
67
  const page = await ctx.newPage();
68
- await blocker.enableBlockingInPage(page);
69
 
70
  const res = await page.goto(url, { waitUntil: "load", timeout: 3500 }).catch(() => {
71
  console.warn(`Failed to load page within 2s: ${url}`);
 
65
 
66
  try {
67
  const page = await ctx.newPage();
68
+ process.env.PLAYWRIGHT_ADBLOCKER === "true" && (await blocker.enableBlockingInPage(page));
69
 
70
  const res = await page.goto(url, { waitUntil: "load", timeout: 3500 }).catch(() => {
71
  console.warn(`Failed to load page within 2s: ${url}`);