broadfield-dev commited on
Commit
d164b37
·
verified ·
1 Parent(s): f2b00a1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +39 -52
app.py CHANGED
@@ -11,39 +11,27 @@ from itertools import cycle
11
  import uuid
12
 
13
  # --- 1. GLOBAL RESOURCES & STATE ---
14
- # These are initialized on the first request to be compatible with Spaces.
15
  P = None
16
  BROWSER = None
17
  REVOLVER = None
18
- LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
19
  APP_STARTED = False
20
 
21
- # --- 2. PLAIN DATA STATE CLASSES (Copyable) ---
22
  class TabState:
23
- """A plain data class representing a tab's state. Fully copyable."""
24
  def __init__(self, tab_id, proxy_used="Direct Connection"):
25
- self.id = tab_id
26
- self.url = "about:blank"
27
- self.title = "New Tab"
28
- self.parsed_text = "Welcome! Navigate to a URL or search to get started."
29
- self.links = []
30
- self.proxy_used = proxy_used
31
 
32
  class BrowserState:
33
- """A plain data class representing the browser's overall state."""
34
- def __init__(self):
35
- self.tabs = []
36
- self.active_tab_id = None
37
- def get_active_tab(self):
38
- if not self.active_tab_id: return None
39
- return next((t for t in self.tabs if t.id == self.active_tab_id), None)
40
 
41
  class CredentialRevolver:
42
- """Manages a rotating list of proxies from an environment variable."""
43
  def __init__(self, proxy_string: str):
44
  self.proxies = self._parse_proxies(proxy_string)
45
- if self.proxies: self.proxy_cycler = cycle(self.proxies)
46
- else: self.proxy_cycler = None
47
  def _parse_proxies(self, proxy_string: str):
48
  proxies = []
49
  for line in proxy_string.strip().splitlines():
@@ -53,42 +41,38 @@ class CredentialRevolver:
53
  proxies.append({"server": server, "username": parsed.username, "password": parsed.password})
54
  except: pass
55
  return proxies
56
- def get_next(self):
57
- return next(self.proxy_cycler) if self.proxy_cycler else None
58
- def count(self):
59
- return len(self.proxies)
60
 
61
  # --- 3. CORE ASYNC LOGIC ---
62
  async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
63
- """Uses Playwright to navigate and BeautifulSoup to parse, updating the TabState object."""
64
  log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
65
  try:
66
  await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
67
  tab_state.url = live_page.url; tab_state.title = await live_page.title() or "No Title"
68
  log += f"\n✅ Arrived at: {tab_state.url}"
69
  html_content = await live_page.content(); soup = BeautifulSoup(html_content, 'lxml')
70
- for script in soup(["script", "style", "nav", "footer"]): script.extract()
71
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
72
  tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link.get('href', '').startswith('http')]
73
  log += f"\n🔗 Found {len(tab_state.links)} links."
74
- except PlaywrightError as e:
75
- error_message = str(e); tab_state.title = "Error"; tab_state.url = url
76
  tab_state.parsed_text = f"❌ Failed to load page.\n\nError: {error_message}"
77
  tab_state.links = []; log += f"\n❌ {error_message}"
78
  return log
79
 
80
  async def handle_action(browser_state: BrowserState, action: str, value=None):
81
- """Modifies the state based on user actions."""
82
  log = ""; active_tab_state = browser_state.get_active_tab()
83
  if action == "new_tab":
84
  tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
85
  context = await BROWSER.new_context(proxy=proxy_config)
86
  page = await context.new_page()
87
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
88
- new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
89
  browser_state.tabs.append(new_tab); browser_state.active_tab_id = tab_id
90
  log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
91
- elif action == "go" and active_tab_state:
92
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
93
  log = await _fetch_and_update_tab_state(active_tab_state, url)
94
  elif action == "click" and active_tab_state and value is not None:
@@ -108,13 +92,10 @@ async def handle_action(browser_state: BrowserState, action: str, value=None):
108
  browser_state.active_tab_id = value; log = f"Switched to tab."
109
  return browser_state, log
110
 
111
- # **BUG FIX 1: NameError Fix**
112
- # This function is now defined before it is called by the master_handler.
113
  def update_ui_components(browser_state: BrowserState):
114
- """Generates a dictionary of UI updates from the state. Not async."""
115
  active_tab = browser_state.get_active_tab()
116
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
117
- tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
118
  links_md = "### 🔗 Links on Page\n" + ('\n'.join(f"{i}. [{link['text'][:80]}]({link['url']})" for i, link in enumerate(active_tab.links[:25])) if active_tab.links else "_No links found._")
119
  return {
120
  page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
@@ -126,7 +107,7 @@ def update_ui_components(browser_state: BrowserState):
126
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
127
  browser_state = gr.State(BrowserState())
128
  gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
129
- # UI Layout is the same...
130
  with gr.Row():
131
  with gr.Column(scale=3):
132
  url_textbox = gr.Textbox(label="URL or Search Term", interactive=True); go_btn = gr.Button("Go", variant="primary")
@@ -152,26 +133,32 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
152
  new_state, log = await handle_action(current_state, action, value)
153
  ui_updates = update_ui_components(new_state)
154
 
155
- # **BUG FIX 2: ValueError Fix**
156
- # We must return a tuple with a value for EACH output component, in the correct order.
157
  return (
158
- new_state, # 1. For the browser_state output
159
- ui_updates[page_content], # 2.
160
- ui_updates[url_textbox], # 3.
161
- ui_updates[links_display], # 4.
162
- ui_updates[tab_selector], # 5.
163
- log # 6. The log textbox is now updated correctly
164
  )
165
 
166
- # Define simple async lambdas for each event to ensure clean wiring
167
- on_load = lambda s: master_handler(s, "new_tab", None)
168
- on_go_click = lambda s, v: master_handler(s, "go", v)
169
- on_click_link = lambda s, v: master_handler(s, "click", v)
170
- on_new_tab = lambda s: master_handler(s, "new_tab", None)
171
- on_close_tab = lambda s: master_handler(s, "close_tab", None)
172
- on_switch_tab = lambda s, v: master_handler(s, "switch_tab", v)
 
 
 
 
 
 
 
 
 
173
 
174
- # Wire up the new, clean event handlers
175
  demo.load(on_load, inputs=[browser_state], outputs=[browser_state, *all_outputs])
176
  go_btn.click(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
177
  url_textbox.submit(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
 
11
  import uuid
12
 
13
  # --- 1. GLOBAL RESOURCES & STATE ---
14
+ # Initialized on the first request to be compatible with Spaces.
15
  P = None
16
  BROWSER = None
17
  REVOLVER = None
18
+ LIVE_CONTEXTS = {}
19
  APP_STARTED = False
20
 
21
+ # --- 2. PLAIN DATA STATE CLASSES ---
22
  class TabState:
 
23
  def __init__(self, tab_id, proxy_used="Direct Connection"):
24
+ self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
25
+ self.parsed_text, self.links, self.proxy_used = "Welcome!", [], proxy_used
 
 
 
 
26
 
27
  class BrowserState:
28
+ def __init__(self): self.tabs, self.active_tab_id = [], None
29
+ def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
 
 
 
 
 
30
 
31
  class CredentialRevolver:
 
32
  def __init__(self, proxy_string: str):
33
  self.proxies = self._parse_proxies(proxy_string)
34
+ self.proxy_cycler = cycle(self.proxies) if self.proxies else None
 
35
  def _parse_proxies(self, proxy_string: str):
36
  proxies = []
37
  for line in proxy_string.strip().splitlines():
 
41
  proxies.append({"server": server, "username": parsed.username, "password": parsed.password})
42
  except: pass
43
  return proxies
44
+ def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
45
+ def count(self): return len(self.proxies) if self.proxies else 0
 
 
46
 
47
  # --- 3. CORE ASYNC LOGIC ---
48
  async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
 
49
  log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
50
  try:
51
  await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
52
  tab_state.url = live_page.url; tab_state.title = await live_page.title() or "No Title"
53
  log += f"\n✅ Arrived at: {tab_state.url}"
54
  html_content = await live_page.content(); soup = BeautifulSoup(html_content, 'lxml')
55
+ for el in soup(["script", "style", "nav", "footer", "aside"]): el.extract()
56
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
57
  tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link.get('href', '').startswith('http')]
58
  log += f"\n🔗 Found {len(tab_state.links)} links."
59
+ except Exception as e:
60
+ error_message = str(e).splitlines()[0]; tab_state.title = "Error"; tab_state.url = url
61
  tab_state.parsed_text = f"❌ Failed to load page.\n\nError: {error_message}"
62
  tab_state.links = []; log += f"\n❌ {error_message}"
63
  return log
64
 
65
  async def handle_action(browser_state: BrowserState, action: str, value=None):
 
66
  log = ""; active_tab_state = browser_state.get_active_tab()
67
  if action == "new_tab":
68
  tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
69
  context = await BROWSER.new_context(proxy=proxy_config)
70
  page = await context.new_page()
71
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
72
+ new_tab = TabState(tab_id, proxy_used=proxy_config['server'].split('@')[-1] if proxy_config else "Direct")
73
  browser_state.tabs.append(new_tab); browser_state.active_tab_id = tab_id
74
  log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
75
+ elif action == "go" and active_tab_state and value:
76
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
77
  log = await _fetch_and_update_tab_state(active_tab_state, url)
78
  elif action == "click" and active_tab_state and value is not None:
 
92
  browser_state.active_tab_id = value; log = f"Switched to tab."
93
  return browser_state, log
94
 
 
 
95
  def update_ui_components(browser_state: BrowserState):
 
96
  active_tab = browser_state.get_active_tab()
97
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
98
+ tab_choices = [(f"Tab {i}: {t.title[:25]}... ({t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
99
  links_md = "### 🔗 Links on Page\n" + ('\n'.join(f"{i}. [{link['text'][:80]}]({link['url']})" for i, link in enumerate(active_tab.links[:25])) if active_tab.links else "_No links found._")
100
  return {
101
  page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
 
107
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
108
  browser_state = gr.State(BrowserState())
109
  gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
110
+
111
  with gr.Row():
112
  with gr.Column(scale=3):
113
  url_textbox = gr.Textbox(label="URL or Search Term", interactive=True); go_btn = gr.Button("Go", variant="primary")
 
133
  new_state, log = await handle_action(current_state, action, value)
134
  ui_updates = update_ui_components(new_state)
135
 
 
 
136
  return (
137
+ new_state,
138
+ ui_updates[page_content],
139
+ ui_updates[url_textbox],
140
+ ui_updates[links_display],
141
+ ui_updates[tab_selector],
142
+ log
143
  )
144
 
145
+ # ** THE DEFINITIVE FIX FOR GRADIO'S ASYNC RETURN VALUE ERROR **
146
+ # Each event listener is its own async function that awaits the handler.
147
+ # This ensures Gradio correctly receives the unpacked tuple of return values.
148
+
149
+ async def on_load(state):
150
+ return await master_handler(state, "new_tab", None)
151
+ async def on_go_click(state, value):
152
+ return await master_handler(state, "go", value)
153
+ async def on_click_link(state, value):
154
+ return await master_handler(state, "click", value)
155
+ async def on_new_tab(state):
156
+ return await master_handler(state, "new_tab", None)
157
+ async def on_close_tab(state):
158
+ return await master_handler(state, "close_tab", None)
159
+ async def on_switch_tab(state, value):
160
+ return await master_handler(state, "switch_tab", value)
161
 
 
162
  demo.load(on_load, inputs=[browser_state], outputs=[browser_state, *all_outputs])
163
  go_btn.click(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
164
  url_textbox.submit(on_go_click, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")