from selenium import webdriver from selenium.common.exceptions import WebDriverException from PIL import Image from io import BytesIO import time from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from bs4 import BeautifulSoup def take_webdata(url): options = webdriver.ChromeOptions() options.add_argument('--headless') options.add_argument('--no-sandbox') options.add_argument('--disable-dev-shm-usage') try: wd = webdriver.Chrome(options=options) wd.set_window_size(1080, 720) # Adjust the window size here wd.get(url) wd.implicitly_wait(5) # Get the page title page_title = wd.title screenshot = wd.get_screenshot_as_png() except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: if wd: wd.quit() return Image.open(BytesIO(screenshot)) , page_title def scrape_vehicle(page_source): soup = BeautifulSoup(page_source, "html.parser") data_kendaraan = {} table = soup.find("table") for row in table.find_all("tr"): cells = row.find_all("td") if len(cells) >= 3: key = cells[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_") value = cells[2].get_text(strip=True) data_kendaraan[key] = value rincians = [] rincian_div = soup.find("div", id="det_pkb") if rincian_div: rows = rincian_div.find_all("div", class_="row") for row in rows[1:]: # baris pertama adalah header cols = row.find_all("p") if len(cols) >= 3: rincian = { "pokok": cols[0].get_text(strip=True), "denda": cols[1].get_text(strip=True), "total": cols[2].get_text(strip=True), } rincian["jenis"] = cols[3].get_text(strip=True) if len(cols) > 3 else "" rincian["jenis"] = rincian["jenis"].upper() rincian = {k: v for k, v in rincian.items() if v} if rincian: rincians.append(rincian) return data_kendaraan, rincians def get_vehicle_info(plate_number: str): # Configure headless Chrome options = webdriver.ChromeOptions() options.add_argument("--headless") options.add_argument("--disable-gpu") options.add_argument("--no-sandbox") # Path to chromedriver (adjust if needed) driver = webdriver.Chrome(options=options) try: driver.get("https://www.jambisamsat.net/infopkb.html") time.sleep(1) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.ID, "no_polisi")) ) input_field = driver.find_element(By.ID, "no_polisi") input_field.clear() input_field.send_keys(plate_number) submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') submit_button.click() # Wait for the new page to load WebDriverWait(driver, 10).until( EC.url_contains("infopkb.php") ) driver.implicitly_wait(3) scroll_height = driver.execute_script("return document.body.scrollHeight") driver.set_window_size(1920, scroll_height + 200) # force full-page height time.sleep(1) data_kendaraan, rincian = scrape_vehicle(driver.page_source) print(data_kendaraan, rincian) page_title = driver.title screenshot = driver.get_screenshot_as_png() return Image.open(BytesIO(screenshot)) , page_title except WebDriverException as e: return Image.new('RGB', (1, 1)), page_title finally: driver.quit()