Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import os
|
2 |
os.system("playwright install")
|
3 |
-
# app.py (Final, Working Async Version with
|
4 |
|
5 |
import gradio as gr
|
6 |
from playwright.async_api import async_playwright, Error as PlaywrightError
|
@@ -18,32 +18,49 @@ REVOLVER = None
|
|
18 |
LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
|
19 |
APP_STARTED = False
|
20 |
|
21 |
-
# --- 2. PLAIN DATA STATE CLASSES (
|
22 |
class TabState:
|
|
|
23 |
def __init__(self, tab_id, proxy_used="Direct Connection"):
|
24 |
-
self.id
|
25 |
-
self.
|
|
|
|
|
|
|
|
|
26 |
|
27 |
class BrowserState:
|
28 |
-
|
29 |
-
def
|
|
|
|
|
|
|
|
|
|
|
30 |
|
31 |
class CredentialRevolver:
|
|
|
32 |
def __init__(self, proxy_string: str):
|
33 |
self.proxies = self._parse_proxies(proxy_string)
|
34 |
if self.proxies: self.proxy_cycler = cycle(self.proxies)
|
35 |
else: self.proxy_cycler = None
|
36 |
def _parse_proxies(self, proxy_string: str):
|
37 |
-
proxies = []
|
38 |
for line in proxy_string.strip().splitlines():
|
39 |
-
try:
|
|
|
|
|
|
|
40 |
except: pass
|
41 |
return proxies
|
42 |
-
def get_next(self):
|
43 |
-
|
|
|
|
|
44 |
|
45 |
-
# --- 3. ASYNC LOGIC
|
46 |
async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
|
|
|
47 |
log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
|
48 |
try:
|
49 |
await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
@@ -61,6 +78,7 @@ async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
|
|
61 |
return log
|
62 |
|
63 |
async def handle_action(browser_state: BrowserState, action: str, value=None):
|
|
|
64 |
log = ""; active_tab_state = browser_state.get_active_tab()
|
65 |
if action == "new_tab":
|
66 |
tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
|
@@ -90,7 +108,21 @@ async def handle_action(browser_state: BrowserState, action: str, value=None):
|
|
90 |
browser_state.active_tab_id = value; log = f"Switched to tab."
|
91 |
return browser_state, log
|
92 |
|
93 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
|
95 |
browser_state = gr.State(BrowserState())
|
96 |
gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
|
@@ -107,7 +139,6 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
|
|
107 |
links_display = gr.Markdown("...");
|
108 |
with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
|
109 |
|
110 |
-
# This order must be consistent
|
111 |
all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
|
112 |
|
113 |
async def master_handler(current_state, action, value=None):
|
@@ -121,7 +152,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
|
|
121 |
new_state, log = await handle_action(current_state, action, value)
|
122 |
ui_updates = update_ui_components(new_state)
|
123 |
|
124 |
-
# **
|
125 |
# We must return a tuple with a value for EACH output component, in the correct order.
|
126 |
return (
|
127 |
new_state, # 1. For the browser_state output
|
@@ -129,29 +160,16 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
|
|
129 |
ui_updates[url_textbox], # 3.
|
130 |
ui_updates[links_display], # 4.
|
131 |
ui_updates[tab_selector], # 5.
|
132 |
-
|
133 |
)
|
134 |
|
135 |
-
#
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
return await master_handler(state, "go", value)
|
143 |
-
|
144 |
-
async def on_click_link(state, value):
|
145 |
-
return await master_handler(state, "click", value)
|
146 |
-
|
147 |
-
async def on_new_tab(state):
|
148 |
-
return await master_handler(state, "new_tab", None)
|
149 |
-
|
150 |
-
async def on_close_tab(state):
|
151 |
-
return await master_handler(state, "close_tab", None)
|
152 |
-
|
153 |
-
async def on_switch_tab(state, value):
|
154 |
-
return await master_handler(state, "switch_tab", value)
|
155 |
|
156 |
# Wire up the new, clean event handlers
|
157 |
demo.load(on_load, inputs=[browser_state], outputs=[browser_state, *all_outputs])
|
|
|
1 |
import os
|
2 |
os.system("playwright install")
|
3 |
+
# app.py (Final, Working Async Version with All Bugs Fixed)
|
4 |
|
5 |
import gradio as gr
|
6 |
from playwright.async_api import async_playwright, Error as PlaywrightError
|
|
|
18 |
LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
|
19 |
APP_STARTED = False
|
20 |
|
21 |
+
# --- 2. PLAIN DATA STATE CLASSES (Copyable) ---
|
22 |
class TabState:
|
23 |
+
"""A plain data class representing a tab's state. Fully copyable."""
|
24 |
def __init__(self, tab_id, proxy_used="Direct Connection"):
|
25 |
+
self.id = tab_id
|
26 |
+
self.url = "about:blank"
|
27 |
+
self.title = "New Tab"
|
28 |
+
self.parsed_text = "Welcome! Navigate to a URL or search to get started."
|
29 |
+
self.links = []
|
30 |
+
self.proxy_used = proxy_used
|
31 |
|
32 |
class BrowserState:
|
33 |
+
"""A plain data class representing the browser's overall state."""
|
34 |
+
def __init__(self):
|
35 |
+
self.tabs = []
|
36 |
+
self.active_tab_id = None
|
37 |
+
def get_active_tab(self):
|
38 |
+
if not self.active_tab_id: return None
|
39 |
+
return next((t for t in self.tabs if t.id == self.active_tab_id), None)
|
40 |
|
41 |
class CredentialRevolver:
|
42 |
+
"""Manages a rotating list of proxies from an environment variable."""
|
43 |
def __init__(self, proxy_string: str):
|
44 |
self.proxies = self._parse_proxies(proxy_string)
|
45 |
if self.proxies: self.proxy_cycler = cycle(self.proxies)
|
46 |
else: self.proxy_cycler = None
|
47 |
def _parse_proxies(self, proxy_string: str):
|
48 |
+
proxies = []
|
49 |
for line in proxy_string.strip().splitlines():
|
50 |
+
try:
|
51 |
+
parsed = urllib.parse.urlparse(f"//{line.strip()}")
|
52 |
+
server = f"{parsed.scheme or 'http'}://{parsed.hostname}:{parsed.port}"
|
53 |
+
proxies.append({"server": server, "username": parsed.username, "password": parsed.password})
|
54 |
except: pass
|
55 |
return proxies
|
56 |
+
def get_next(self):
|
57 |
+
return next(self.proxy_cycler) if self.proxy_cycler else None
|
58 |
+
def count(self):
|
59 |
+
return len(self.proxies)
|
60 |
|
61 |
+
# --- 3. CORE ASYNC LOGIC ---
|
62 |
async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
|
63 |
+
"""Uses Playwright to navigate and BeautifulSoup to parse, updating the TabState object."""
|
64 |
log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
|
65 |
try:
|
66 |
await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
|
|
|
78 |
return log
|
79 |
|
80 |
async def handle_action(browser_state: BrowserState, action: str, value=None):
|
81 |
+
"""Modifies the state based on user actions."""
|
82 |
log = ""; active_tab_state = browser_state.get_active_tab()
|
83 |
if action == "new_tab":
|
84 |
tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
|
|
|
108 |
browser_state.active_tab_id = value; log = f"Switched to tab."
|
109 |
return browser_state, log
|
110 |
|
111 |
+
# **BUG FIX 1: NameError Fix**
|
112 |
+
# This function is now defined before it is called by the master_handler.
|
113 |
+
def update_ui_components(browser_state: BrowserState):
|
114 |
+
"""Generates a dictionary of UI updates from the state. Not async."""
|
115 |
+
active_tab = browser_state.get_active_tab()
|
116 |
+
if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
|
117 |
+
tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
|
118 |
+
links_md = "### 🔗 Links on Page\n" + ('\n'.join(f"{i}. [{link['text'][:80]}]({link['url']})" for i, link in enumerate(active_tab.links[:25])) if active_tab.links else "_No links found._")
|
119 |
+
return {
|
120 |
+
page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
|
121 |
+
url_textbox: gr.Textbox(value=active_tab.url), links_display: gr.Markdown(links_md),
|
122 |
+
tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
|
123 |
+
}
|
124 |
+
|
125 |
+
# --- 4. GRADIO UI AND EVENT HANDLING ---
|
126 |
with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
|
127 |
browser_state = gr.State(BrowserState())
|
128 |
gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
|
|
|
139 |
links_display = gr.Markdown("...");
|
140 |
with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
|
141 |
|
|
|
142 |
all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
|
143 |
|
144 |
async def master_handler(current_state, action, value=None):
|
|
|
152 |
new_state, log = await handle_action(current_state, action, value)
|
153 |
ui_updates = update_ui_components(new_state)
|
154 |
|
155 |
+
# **BUG FIX 2: ValueError Fix**
|
156 |
# We must return a tuple with a value for EACH output component, in the correct order.
|
157 |
return (
|
158 |
new_state, # 1. For the browser_state output
|
|
|
160 |
ui_updates[url_textbox], # 3.
|
161 |
ui_updates[links_display], # 4.
|
162 |
ui_updates[tab_selector], # 5.
|
163 |
+
log # 6. The log textbox is now updated correctly
|
164 |
)
|
165 |
|
166 |
+
# Define simple async lambdas for each event to ensure clean wiring
|
167 |
+
on_load = lambda s: master_handler(s, "new_tab", None)
|
168 |
+
on_go_click = lambda s, v: master_handler(s, "go", v)
|
169 |
+
on_click_link = lambda s, v: master_handler(s, "click", v)
|
170 |
+
on_new_tab = lambda s: master_handler(s, "new_tab", None)
|
171 |
+
on_close_tab = lambda s: master_handler(s, "close_tab", None)
|
172 |
+
on_switch_tab = lambda s, v: master_handler(s, "switch_tab", v)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
|
174 |
# Wire up the new, clean event handlers
|
175 |
demo.load(on_load, inputs=[browser_state], outputs=[browser_state, *all_outputs])
|