Spaces:
Sleeping
Sleeping
File size: 1,199 Bytes
e0b4e38 de01cb6 5c579db 4d75a6f 850d477 5c579db f150bb6 5c579db 0184583 05d5483 ecabf86 05d5483 5c579db ecabf86 5c579db 850d477 fb6b3ad 850d477 de01cb6 f150bb6 850d477 5c579db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
import gradio as gr
from selenium import webdriver
from selenium.common.exceptions import WebDriverException
from PIL import Image
from io import BytesIO
def web_scrape(url):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
try:
wd = webdriver.Chrome(options=options)
wd.set_window_size(1080, 720) # Adjust the window size here
wd.get(url)
wd.implicitly_wait(10)
page_content = wd.find_element_by_tag_name("meta")
content_value = page_content.get_attribute("content")
return content_value
except WebDriverException as e:
return "error handle website"
finally:
if wd:
wd.quit()
return Image.open(BytesIO(screenshot))
iface = gr.Interface(
fn=web_scrape,
inputs=gr.inputs.Textbox(label="Website URL", default="https://stock.adobe.com/stock-photo/id/621214874"),
outputs=gr.outputs.Textbox(label="Web Content"),
title="Web Scraping with Selenium (Body Tag)",
description="Scrape the content of a website's <body> tag using Selenium.",
)
iface.launch() |