Spaces:

PyQuarX
/

scrape-with-ai

Running

PyQuarX commited on Apr 15

Commit

bf7ff08

verified ·

1 Parent(s): 0f53b6e

Update scraper.py

Files changed (1) hide show

scraper.py CHANGED Viewed

@@ -4,26 +4,23 @@ from selenium.webdriver.chrome.options import Options
 from bs4 import BeautifulSoup
 def scrape_website(website):
-    print("Launching chromium browser...")
-    chrome_driver_path = "/usr/lib/chromium/chromedriver"  # dépend de ton Dockerfile
-    options = Options()
-    options.binary_location = "/usr/bin/chromium"  # important !
-    options.add_argument("--headless")
-    options.add_argument("--no-sandbox")
-    options.add_argument("--disable-dev-shm-usage")
-    # ✅ C’est ici qu’il fallait mettre options
-    driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
     try:
         driver.get(website)
         print("Page Loaded...")
         html = driver.page_source
         return html
     finally:
         driver.quit()
 def extract_body_content(html_content):
     soup = BeautifulSoup(html_content,"html.parser")

 from bs4 import BeautifulSoup
 def scrape_website(website):
+    print("Launching chrome browser...")
+    chrome_driver_path = "/usr/bin/chromedriver"
+    options = webdriver.ChromeOptions()
+    driver = webdriver.Chrome(service=Service(chrome_driver_path, options=options))
     try:
         driver.get(website)
         print("Page Loaded...")
         html = driver.page_source
         return html
     finally:
         driver.quit()
 def extract_body_content(html_content):
     soup = BeautifulSoup(html_content,"html.parser")