terra1 / app.py
flatindo's picture
Update app.py
035990c
raw
history blame
1.2 kB
import gradio as gr
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from PIL import Image
from io import BytesIO
def web_scrape(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(10)
meta_element = wd.find_element_by_tag_name("meta")
#content_value = meta_element.get_attribute("name")
return meta_element
except WebDriverException as e:
return "error handle website"
finally:
if wd:
wd.quit()
return Image.open(BytesIO(screenshot))
iface = gr.Interface(
fn=web_scrape,
inputs=gr.inputs.Textbox(label="Website URL", default="https://stock.adobe.com/stock-photo/id/621214874"),
outputs=gr.outputs.Textbox(label="Web Content"),
title="Web Scraping with Selenium (Body Tag)",
description="Scrape the content of a website's <body> tag using Selenium.",
)
iface.launch()