terra1 / app.py
flatindo's picture
Update app.py
453a236
raw
history blame
1.2 kB
import gradio as gr
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from PIL import Image
from io import BytesIO
def web_scrape(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(10)
page_content = wd.find_element_by_name("meta")
content_value = page_content.get_attribute("content")
return content_value
except WebDriverException as e:
return "error handle website"
finally:
if wd:
wd.quit()
return Image.open(BytesIO(screenshot))
iface = gr.Interface(
fn=web_scrape,
inputs=gr.inputs.Textbox(label="Website URL", default="https://stock.adobe.com/stock-photo/id/621214874"),
outputs=gr.outputs.Textbox(label="Web Content"),
title="Web Scraping with Selenium (Body Tag)",
description="Scrape the content of a website's <body> tag using Selenium.",
)
iface.launch()