scrape-with-ai / scraper.py
PyQuarX's picture
Update scraper.py
3a92801 verified
raw
history blame
850 Bytes
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
def scrape_website(website):
print("Launching chromium browser...")
chrome_driver_path = "/usr/lib/chromium/chromedriver" # dépend de ton Dockerfile
options = Options()
options.binary_location = "/usr/bin/chromium" # important !
options.add_argument("--headless")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
# ✅ C’est ici qu’il fallait mettre options
driver = webdriver.Chrome(service=Service(chrome_driver_path), options=options)
try:
driver.get(website)
print("Page Loaded...")
html = driver.page_source
return html
finally:
driver.quit()