Spaces:
Sleeping
Sleeping
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from PIL import Image | |
from io import BytesIO | |
import time, requests | |
from bs4 import BeautifulSoup | |
from selenium.webdriver.common.by import By | |
from selenium.webdriver.support.ui import WebDriverWait | |
from selenium.webdriver.support import expected_conditions as EC | |
def take_webdata(url): | |
options = webdriver.ChromeOptions() | |
options.add_argument('--headless') | |
options.add_argument('--no-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
try: | |
wd = webdriver.Chrome(options=options) | |
wd.set_window_size(1080, 720) # Adjust the window size here | |
wd.get(url) | |
wd.implicitly_wait(5) | |
# Get the page title | |
page_title = wd.title | |
screenshot = wd.get_screenshot_as_png() | |
except WebDriverException as e: | |
return Image.new('RGB', (1, 1)), page_title | |
finally: | |
if wd: | |
wd.quit() | |
return Image.open(BytesIO(screenshot)) , page_title | |
def scrape_vehicle(driver): | |
data_kendaraan = {} | |
try: | |
rows = driver.find_elements(By.CSS_SELECTOR, "table tr") | |
for row in rows: | |
cols = row.find_elements(By.TAG_NAME, "td") | |
if len(cols) >= 3: | |
key = cols[0].text.strip().lower().replace(".", "").replace(" ", "_") | |
value = cols[2].text.strip() | |
data_kendaraan[key] = value | |
except Exception as e: | |
print("Gagal parsing tabel:", e) | |
# rincians = [] | |
# try: | |
# container = driver.find_element(By.ID, "det_pkb") | |
# rows = container.find_elements(By.CLASS_NAME, "row") | |
# for row in rows[1:]: # skip header | |
# cols = row.find_elements(By.TAG_NAME, "p") | |
# if len(cols) >= 3: | |
# rincian = { | |
# "pokok": cols[0].text.strip(), | |
# "denda": cols[1].text.strip(), | |
# "total": cols[2].text.strip(), | |
# } | |
# if len(cols) > 3: | |
# rincian["jenis"] = cols[3].text.strip().upper() | |
# rincians.append(rincian) | |
# except Exception as e: | |
# print("Gagal parsing det_pkb:", e) | |
total_tagihan = [] | |
try: | |
all_rows = driver.find_elements(By.CSS_SELECTOR, "div.row") | |
for row in all_rows: | |
print("[ROW TOTAL]", row.text) | |
if not ("Pokok" in row.text or "Denda" in row.text or "Total" in row.text): | |
cols = row.find_elements(By.TAG_NAME, "p") | |
print("[COLS TOTAL]", [x.text for x in cols]) | |
if len(cols) >= 4: | |
total_tagihan.append({ | |
"pokok": cols[0].text.strip(), | |
"denda": cols[1].text.strip(), | |
"total": cols[2].text.strip(), | |
"jenis": cols[3].text.strip() | |
}) | |
except Exception as e: | |
print("Gagal parsing total tagihan:", e) | |
rincians_pkb = [] | |
try: | |
pkb_rows = driver.find_elements(By.CSS_SELECTOR, "#det_pkb .row")[1:] # skip header | |
for row in pkb_rows: | |
print("[ROW PKB]", row.text) | |
cols = row.find_elements(By.TAG_NAME, "p") | |
print("[COLS PKB]", [x.text for x in cols]) | |
if len(cols) >= 3: | |
rincians_pkb.append({ | |
"pokok": cols[0].text.strip(), | |
"denda": cols[1].text.strip(), | |
"total": cols[2].text.strip() | |
}) | |
except Exception as e: | |
print("Gagal parsing det_pkb:", e) | |
rincians_swd = [] | |
try: | |
swd_rows = driver.find_elements(By.CSS_SELECTOR, "#det_swd .row")[1:] # skip header | |
for row in swd_rows: | |
print("[ROW SWD]", row.text) | |
cols = row.find_elements(By.TAG_NAME, "p") | |
print("[COLS SWD]", [x.text for x in cols]) | |
if len(cols) >= 3: | |
rincians_swd.append({ | |
"pokok": cols[0].text.strip(), | |
"denda": cols[1].text.strip(), | |
"total": cols[2].text.strip() | |
}) | |
except Exception as e: | |
print("Gagal parsing det_swd:", e) | |
# rincians = [total_tagihan, rincians_pkb, rincians_swd] | |
# return data_kendaraan, rincians | |
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd | |
def get_vehicle_info_bs4(nopol: str): | |
content = requests.get("https://www.jambisamsat.net/infopkb.php", params={"no_polisi":nopol}).content | |
soup = BeautifulSoup(content, "html.parser") | |
# === 1. Data Kendaraan === | |
data_kendaraan = {} | |
table = soup.find("table") | |
if table: | |
for row in table.find_all("tr"): | |
cols = row.find_all("td") | |
if len(cols) >= 3: | |
key = cols[0].get_text(strip=True).lower().replace(".", "").replace(" ", "_") | |
val = cols[2].get_text(strip=True) | |
data_kendaraan[key] = val | |
# === 2. Total Tagihan (div.row outside det_pkb and det_swd) === | |
all_rows = soup.find_all("div", class_="row") | |
total_tagihan = [] | |
for row in all_rows: | |
if row.find_parent(id="det_pkb") or row.find_parent(id="det_swd"): | |
continue | |
if not ("POKOK" in row.text or "DENDA" in row.text or "TOTAL" in row.text): | |
ps = row.find_all("p") | |
if len(ps) >= 4: | |
total_tagihan.append({ | |
"pokok": ps[0].get_text(strip=True), | |
"denda": ps[1].get_text(strip=True), | |
"total": ps[2].get_text(strip=True), | |
"jenis": ps[3].get_text(strip=True) | |
}) | |
# === 3. Rincian PKB === | |
rincians_pkb = [] | |
pkb_div = soup.find("div", id="det_pkb") | |
if pkb_div: | |
rows = pkb_div.find_all("div", class_="row")[1:] # skip header | |
for row in rows: | |
cols = row.find_all("p") | |
if len(cols) >= 3: | |
rincians_pkb.append({ | |
"pokok": cols[0].get_text(strip=True), | |
"denda": cols[1].get_text(strip=True), | |
"total": cols[2].get_text(strip=True) | |
}) | |
# === 4. Rincian SWDKLLJ === | |
rincians_swd = [] | |
swd_div = soup.find("div", id="det_swd") | |
if swd_div: | |
rows = swd_div.find_all("div", class_="row")[1:] # skip header | |
for row in rows: | |
cols = row.find_all("p") | |
if len(cols) >= 3: | |
rincians_swd.append({ | |
"pokok": cols[0].get_text(strip=True), | |
"denda": cols[1].get_text(strip=True), | |
"total": cols[2].get_text(strip=True) | |
}) | |
return data_kendaraan, total_tagihan, rincians_pkb, rincians_swd | |
def get_vehicle_info(driver, plate_number: str): | |
# options = webdriver.ChromeOptions() | |
# options.add_argument("--headless") | |
# options.add_argument("--disable-gpu") | |
# options.add_argument("--no-sandbox") | |
# driver = webdriver.Chrome(options=options) | |
try: | |
driver.get("https://www.jambisamsat.net/infopkb.html") | |
time.sleep(1) | |
print("[GET URL]") | |
WebDriverWait(driver, 10).until( | |
EC.presence_of_element_located((By.ID, "no_polisi")) | |
) | |
input_field = driver.find_element(By.ID, "no_polisi") | |
input_field.clear() | |
input_field.send_keys(plate_number) | |
submit_button = driver.find_element(By.CSS_SELECTOR, 'button.btn.btn-primary[type="submit"]') | |
submit_button.click() | |
print("BUTTON CLICKED") | |
# Wait for the new page to load | |
WebDriverWait(driver, 10).until( | |
EC.url_contains("infopkb.php") | |
) | |
print("PHP LOADED") | |
driver.implicitly_wait(3) | |
scroll_height = driver.execute_script("return document.body.scrollHeight") | |
driver.set_window_size(1920, scroll_height + 200) # force full-page height | |
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_pkb"))) | |
button.click() | |
button = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, "show_det_swd"))) | |
button.click() | |
print("SCRIPT EXECUTED") | |
time.sleep(1) | |
return scrape_vehicle(driver) | |
# print(data_kendaraan, rincian) | |
# page_title = driver.title | |
# screenshot = driver.get_screenshot_as_png() | |
# return Image.open(BytesIO(screenshot)) , page_title | |
except WebDriverException as e: | |
return Image.new('RGB', (1, 1)), page_title | |
finally: | |
driver.quit() |