Spaces:
Sleeping
Sleeping
Commit
·
9ad60e7
1
Parent(s):
55d7b7b
prompt and strategy changed
Browse files
main.py
CHANGED
@@ -42,7 +42,7 @@ async def scrape_visible_text(url):
|
|
42 |
}
|
43 |
)
|
44 |
page = await context.new_page()
|
45 |
-
await page.goto(url, wait_until="
|
46 |
visible_text = await page.evaluate("document.body.innerText")
|
47 |
await browser.close()
|
48 |
return visible_text
|
@@ -117,7 +117,7 @@ async def crawl_web(request: CrawlerRequest):
|
|
117 |
all_links = google_links + quora_links + other_links
|
118 |
|
119 |
# Use Gemini to filter and list relevant URLs
|
120 |
-
prompt = f"Filter the following URLs and list only those that are most relevant to the topic '{topic_title}':\n{all_links}"
|
121 |
model = genai.GenerativeModel("gemini-1.5-pro")
|
122 |
response = model.generate_content(prompt)
|
123 |
filtered_links = response.text.strip().split('\n')
|
|
|
42 |
}
|
43 |
)
|
44 |
page = await context.new_page()
|
45 |
+
await page.goto(url, wait_until="domcontentloaded")
|
46 |
visible_text = await page.evaluate("document.body.innerText")
|
47 |
await browser.close()
|
48 |
return visible_text
|
|
|
117 |
all_links = google_links + quora_links + other_links
|
118 |
|
119 |
# Use Gemini to filter and list relevant URLs
|
120 |
+
prompt = f"Filter the following URLs and list only those that are most relevant to the topic '{topic_title}':\n{all_links}. Response should only contain the array of links with no formatting."
|
121 |
model = genai.GenerativeModel("gemini-1.5-pro")
|
122 |
response = model.generate_content(prompt)
|
123 |
filtered_links = response.text.strip().split('\n')
|