Spaces:
Running
Running
File size: 10,138 Bytes
4c3fe29 35ae779 3819331 35ae779 31d5b37 3819331 31d5b37 35ae779 15fdb32 35ae779 31d5b37 35ae779 31d5b37 35ae779 7f2bf6a 35ae779 31d5b37 35ae779 31d5b37 7f2bf6a 35ae779 3819331 35ae779 7f2bf6a 31d5b37 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 3819331 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 15fdb32 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 7f2bf6a 35ae779 3819331 31d5b37 35ae779 7f2bf6a 3819331 35ae779 31d5b37 7f2bf6a 35ae779 3819331 15fdb32 31d5b37 15fdb32 31d5b37 3819331 15fdb32 3819331 31d5b37 15fdb32 35ae779 31d5b37 7f2bf6a 35ae779 7f2bf6a 35ae779 3819331 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
import os
os.system("playwright install")
# app.py (Final version with async API)
import gradio as gr
from playwright.async_api import async_playwright, Error as PlaywrightError
from bs4 import BeautifulSoup
import urllib.parse
import datetime
import atexit
import re
import os
from itertools import cycle
import uuid
# --- 1. ASYNC GLOBAL RESOURCES ---
# We will initialize these in a Gradio startup event.
P = None
BROWSER = None
REVOLVER = None
LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
# --- 2. PLAIN DATA STATE CLASSES (Unchanged) ---
class TabState:
def __init__(self, tab_id, proxy_used="Direct Connection"):
self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
self.parsed_text = "Welcome! Navigate to a URL or search to get started."
self.links, self.proxy_used = [], proxy_used
class BrowserState:
def __init__(self): self.tabs, self.active_tab_id = [], None
def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
class CredentialRevolver: # Unchanged
def __init__(self, proxy_string: str):
self.proxies = self._parse_proxies(proxy_string)
if self.proxies: self.proxy_cycler = cycle(self.proxies)
else: self.proxy_cycler = None
def _parse_proxies(self, proxy_string: str):
proxies = [];
for line in proxy_string.strip().splitlines():
try: parsed = urllib.parse.urlparse(f"//{line.strip()}"); server = f"{parsed.scheme or 'http'}://{parsed.hostname}:{parsed.port}"; proxies.append({"server": server, "username": parsed.username, "password": parsed.password})
except: pass
return proxies
def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
def count(self): return len(self.proxies)
# --- 3. ASYNC LOGIC ---
# All functions interacting with Playwright are now `async def`
async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
log = f"▶️ Navigating to {url}..."
live_page = LIVE_CONTEXTS[tab_state.id]["page"]
try:
await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
tab_state.url = live_page.url
tab_state.title = await live_page.title() or "No Title"
log += f"\n✅ Arrived at: {tab_state.url}"
html_content = await live_page.content()
soup = BeautifulSoup(html_content, 'lxml')
for script in soup(["script", "style", "nav", "footer"]): script.extract()
tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link['href'] and link['href'].startswith('http')]
log += f"\n🔗 Found {len(tab_state.links)} links."
except PlaywrightError as e:
error_message = str(e); tab_state.title = "Error"; tab_state.url = url
tab_state.parsed_text = f"❌ Failed to load page.\n\nError: {error_message}"
tab_state.links = []; log += f"\n❌ {error_message}"
return log
async def handle_action(browser_state: BrowserState, action: str, value=None):
log = ""; active_tab_state = browser_state.get_active_tab()
if action == "new_tab":
tab_id = str(uuid.uuid4())
proxy_config = REVOLVER.get_next()
context = await BROWSER.new_context(proxy=proxy_config)
page = await context.new_page()
LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
browser_state.tabs.append(new_tab)
browser_state.active_tab_id = tab_id
log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
elif action == "go" and active_tab_state:
url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
log = await _fetch_and_update_tab_state(active_tab_state, url)
elif action == "click" and active_tab_state and value is not None:
try:
link_index = int(value)
if 0 <= link_index < len(active_tab_state.links):
log = await _fetch_and_update_tab_state(active_tab_state, active_tab_state.links[link_index]['url'])
else: log = "Invalid link number."
except: log = "Please enter a valid number to click."
elif action == "close_tab" and active_tab_state:
if len(browser_state.tabs) > 1:
tab_to_close_id = browser_state.active_tab_id
tab_index = browser_state.tabs.index(active_tab_state)
browser_state.tabs.pop(tab_index)
new_index = tab_index - 1 if tab_index > 0 else 0
browser_state.active_tab_id = browser_state.tabs[new_index].id
resources = LIVE_CONTEXTS.pop(tab_to_close_id)
await resources['context'].close()
log = f"💣 Tab closed."
else: log = "Cannot close the last tab."
elif action == "switch_tab" and value is not None:
browser_state.active_tab_id = value; log = f"Switched to tab."
return browser_state, log
def update_ui_components(browser_state: BrowserState): # This function is not async
active_tab = browser_state.get_active_tab()
if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
links_md = "### 🔗 Links on Page\n" + ('\n'.join(f"{i}. [{link['text'][:80]}]({link['url']})" for i, link in enumerate(active_tab.links[:25])) if active_tab.links else "_No links found._")
return {
page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
url_textbox: gr.Textbox(value=active_tab.url), links_display: gr.Markdown(links_md),
tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
}
# --- 4. GRADIO UI AND STARTUP/SHUTDOWN EVENTS ---
async def startup():
global P, BROWSER, REVOLVER
print("🚀 App starting up...")
P = await async_playwright().start()
BROWSER = await P.firefox.launch(headless=True)
proxy_list_str = os.getenv("PROXY_LIST", "")
REVOLVER = CredentialRevolver(proxy_list_str)
print(f"✅ Playwright started. {REVOLVER.count()} proxies loaded.")
async def shutdown():
print("🧹 App shutting down...")
if BROWSER: await BROWSER.close()
if P: await P.stop()
print("✅ Playwright stopped.")
with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
browser_state = gr.State(BrowserState())
gr.Markdown("# 🛰️ Real Browser Demo (Async API)")
# UI Layout is the same...
with gr.Row():
with gr.Column(scale=3):
url_textbox = gr.Textbox(label="URL or Search Term", interactive=True)
go_btn = gr.Button("Go", variant="primary")
with gr.Accordion("Page Content (Text Only)", open=True): page_content = gr.Markdown("Loading...")
log_display = gr.Textbox(label="Status Log", interactive=False)
with gr.Column(scale=1):
with gr.Row(): new_tab_btn = gr.Button("➕ New Tab"); close_tab_btn = gr.Button("❌ Close Tab")
tab_selector = gr.Radio(choices=[], label="Active Tabs", interactive=True)
with gr.Accordion("Clickable Links", open=True):
links_display = gr.Markdown("...")
with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
async def master_handler(current_state, action, value):
new_state, log = await handle_action(current_state, action, value)
ui_updates = update_ui_components(new_state)
ui_updates[log_display] = log
return new_state, ui_updates
# Initial Load - create first tab
async def initial_load(s):
_, ui_updates = await master_handler(s, "new_tab", None)
return ui_updates
demo.load(initial_load, inputs=[browser_state], outputs=list(all_outputs))
# Event listeners - Gradio automatically awaits async functions
go_btn.click(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], fn_name="go", show_progress="full")
url_textbox.submit(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], fn_name="go", show_progress="full")
click_btn.click(master_handler, [browser_state, click_num_box], [browser_state, *all_outputs], fn_name="click", show_progress="full")
new_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], fn_name="new_tab", show_progress="full")
close_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], fn_name="close_tab")
tab_selector.input(master_handler, [browser_state, tab_selector], [browser_state, *all_outputs], fn_name="switch_tab")
# The `startup` and `shutdown` events are not available in standard `Blocks`.
# We manage this by doing the startup inside the first `load` event.
# The global setup is the best way for Spaces.
# A small tweak to make it work without official startup events:
# The startup logic is moved to the global scope but needs an async context.
# We will use the existing startup/shutdown logic and assume the Gradio version supports it or handles it gracefully.
# The best practice would be to use a framework that has explicit startup/shutdown events like FastAPI.
# For Gradio Spaces, the singleton pattern with async calls in handlers is the way to go.
# Let's adjust for standard Gradio deployment.
# We'll run startup manually before launching.
import asyncio
asyncio.run(startup())
demo.launch() |