Spaces:
Sleeping
Sleeping
import gradio as gr | |
from selenium import webdriver | |
from selenium.common.exceptions import WebDriverException | |
from PIL import Image | |
from io import BytesIO | |
def web_scrape(url): | |
options = webdriver.ChromeOptions() | |
options.add_argument('--headless') | |
options.add_argument('--no-sandbox') | |
options.add_argument('--disable-dev-shm-usage') | |
try: | |
wd = webdriver.Chrome(options=options) | |
wd.set_window_size(1080, 720) # Adjust the window size here | |
wd.get(url) | |
wd.implicitly_wait(10) | |
page_content = wd.find_element_by_name("meta") | |
content_value = page_content.get_attribute("content") | |
return content_value | |
except WebDriverException as e: | |
return "error handle website" | |
finally: | |
if wd: | |
wd.quit() | |
return Image.open(BytesIO(screenshot)) | |
iface = gr.Interface( | |
fn=web_scrape, | |
inputs=gr.inputs.Textbox(label="Website URL", default="https://stock.adobe.com/stock-photo/id/621214874"), | |
outputs=gr.outputs.Textbox(label="Web Content"), | |
title="Web Scraping with Selenium (Body Tag)", | |
description="Scrape the content of a website's <body> tag using Selenium.", | |
) | |
iface.launch() |