PyQuarX commited on
Commit
646a14d
·
verified ·
1 Parent(s): 578c42d

Update scraper.py

Browse files
Files changed (1) hide show
  1. scraper.py +6 -7
scraper.py CHANGED
@@ -3,16 +3,15 @@ from selenium.webdriver.chrome.service import Service
3
  from bs4 import BeautifulSoup
4
  import os
5
 
 
6
  def scrape_website(website):
7
  print("Launching chrome browser...")
8
 
9
- chrome_driver_path = "../chromedriver"
10
- chrome_binary_path = "../chromium-browser"
11
 
12
- if not os.path.exists(chrome_driver_path):
13
- raise FileNotFoundError(f"Chromedriver not found at {chrome_driver_path}")
14
- if not os.path.exists(chrome_binary_path):
15
- raise FileNotFoundError(f"Chromium not found at {chrome_binary_path}")
16
 
17
  options = webdriver.ChromeOptions()
18
  options.binary_location = chrome_binary_path
@@ -25,12 +24,12 @@ def scrape_website(website):
25
 
26
  try:
27
  driver.get(website)
28
- print("Page Loaded...")
29
  html = driver.page_source
30
  return html
31
  finally:
32
  driver.quit()
33
 
 
34
  def extract_body_content(html_content):
35
  soup = BeautifulSoup(html_content, "html.parser")
36
  body_content = soup.body
 
3
  from bs4 import BeautifulSoup
4
  import os
5
 
6
+
7
  def scrape_website(website):
8
  print("Launching chrome browser...")
9
 
10
+ chrome_driver_path = which("chromedriver")
11
+ chrome_binary_path = which("chromium-browser") or which("chromium")
12
 
13
+ if not chrome_driver_path or not chrome_binary_path:
14
+ raise EnvironmentError("chromedriver or chromium-browser not found in PATH")
 
 
15
 
16
  options = webdriver.ChromeOptions()
17
  options.binary_location = chrome_binary_path
 
24
 
25
  try:
26
  driver.get(website)
 
27
  html = driver.page_source
28
  return html
29
  finally:
30
  driver.quit()
31
 
32
+
33
  def extract_body_content(html_content):
34
  soup = BeautifulSoup(html_content, "html.parser")
35
  body_content = soup.body