broadfield-dev commited on
Commit
006e72f
·
verified ·
1 Parent(s): 35ae779

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -78
app.py CHANGED
@@ -1,9 +1,9 @@
1
  import os
2
  os.system("playwright install")
3
- # app.py (Final version with async API)
4
 
5
  import gradio as gr
6
- from playwright.async_api import async_playwright, Error as PlaywrightError
7
  from bs4 import BeautifulSoup
8
  import urllib.parse
9
  import datetime
@@ -13,14 +13,26 @@ import os
13
  from itertools import cycle
14
  import uuid
15
 
16
- # --- 1. ASYNC GLOBAL RESOURCES ---
17
- # We will initialize these in a Gradio startup event.
18
- P = None
19
- BROWSER = None
20
- REVOLVER = None
21
- LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
 
22
 
23
- # --- 2. PLAIN DATA STATE CLASSES (Unchanged) ---
 
 
 
 
 
 
 
 
 
 
 
24
  class TabState:
25
  def __init__(self, tab_id, proxy_used="Direct Connection"):
26
  self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
@@ -31,7 +43,9 @@ class BrowserState:
31
  def __init__(self): self.tabs, self.active_tab_id = [], None
32
  def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
33
 
34
- class CredentialRevolver: # Unchanged
 
 
35
  def __init__(self, proxy_string: str):
36
  self.proxies = self._parse_proxies(proxy_string)
37
  if self.proxies: self.proxy_cycler = cycle(self.proxies)
@@ -45,18 +59,16 @@ class CredentialRevolver: # Unchanged
45
  def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
46
  def count(self): return len(self.proxies)
47
 
48
- # --- 3. ASYNC LOGIC ---
49
- # All functions interacting with Playwright are now `async def`
50
 
51
- async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
52
- log = f"▶️ Navigating to {url}..."
53
- live_page = LIVE_CONTEXTS[tab_state.id]["page"]
54
  try:
55
- await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
56
- tab_state.url = live_page.url
57
- tab_state.title = await live_page.title() or "No Title"
58
  log += f"\n✅ Arrived at: {tab_state.url}"
59
- html_content = await live_page.content()
60
  soup = BeautifulSoup(html_content, 'lxml')
61
  for script in soup(["script", "style", "nav", "footer"]): script.extract()
62
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
@@ -68,26 +80,26 @@ async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
68
  tab_state.links = []; log += f"\n❌ {error_message}"
69
  return log
70
 
71
- async def handle_action(browser_state: BrowserState, action: str, value=None):
72
  log = ""; active_tab_state = browser_state.get_active_tab()
73
  if action == "new_tab":
74
- tab_id = str(uuid.uuid4())
75
- proxy_config = REVOLVER.get_next()
76
- context = await BROWSER.new_context(proxy=proxy_config)
77
- page = await context.new_page()
78
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
79
  new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
80
  browser_state.tabs.append(new_tab)
81
  browser_state.active_tab_id = tab_id
82
- log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
83
  elif action == "go" and active_tab_state:
84
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
85
- log = await _fetch_and_update_tab_state(active_tab_state, url)
86
  elif action == "click" and active_tab_state and value is not None:
87
  try:
88
  link_index = int(value)
89
  if 0 <= link_index < len(active_tab_state.links):
90
- log = await _fetch_and_update_tab_state(active_tab_state, active_tab_state.links[link_index]['url'])
 
91
  else: log = "Invalid link number."
92
  except: log = "Please enter a valid number to click."
93
  elif action == "close_tab" and active_tab_state:
@@ -98,14 +110,14 @@ async def handle_action(browser_state: BrowserState, action: str, value=None):
98
  new_index = tab_index - 1 if tab_index > 0 else 0
99
  browser_state.active_tab_id = browser_state.tabs[new_index].id
100
  resources = LIVE_CONTEXTS.pop(tab_to_close_id)
101
- await resources['context'].close()
102
  log = f"💣 Tab closed."
103
  else: log = "Cannot close the last tab."
104
  elif action == "switch_tab" and value is not None:
105
  browser_state.active_tab_id = value; log = f"Switched to tab."
106
  return browser_state, log
107
 
108
- def update_ui_components(browser_state: BrowserState): # This function is not async
109
  active_tab = browser_state.get_active_tab()
110
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
111
  tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
@@ -116,26 +128,11 @@ def update_ui_components(browser_state: BrowserState): # This function is not as
116
  tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
117
  }
118
 
119
- # --- 4. GRADIO UI AND STARTUP/SHUTDOWN EVENTS ---
120
- async def startup():
121
- global P, BROWSER, REVOLVER
122
- print("🚀 App starting up...")
123
- P = await async_playwright().start()
124
- BROWSER = await P.firefox.launch(headless=True)
125
- proxy_list_str = os.getenv("PROXY_LIST", "")
126
- REVOLVER = CredentialRevolver(proxy_list_str)
127
- print(f"✅ Playwright started. {REVOLVER.count()} proxies loaded.")
128
-
129
- async def shutdown():
130
- print("🧹 App shutting down...")
131
- if BROWSER: await BROWSER.close()
132
- if P: await P.stop()
133
- print("✅ Playwright stopped.")
134
-
135
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
136
  browser_state = gr.State(BrowserState())
137
- gr.Markdown("# 🛰️ Real Browser Demo (Async API)")
138
- # UI Layout is the same...
139
  with gr.Row():
140
  with gr.Column(scale=3):
141
  url_textbox = gr.Textbox(label="URL or Search Term", interactive=True)
@@ -146,44 +143,29 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
146
  with gr.Row(): new_tab_btn = gr.Button("➕ New Tab"); close_tab_btn = gr.Button("❌ Close Tab")
147
  tab_selector = gr.Radio(choices=[], label="Active Tabs", interactive=True)
148
  with gr.Accordion("Clickable Links", open=True):
149
- links_display = gr.Markdown("...")
150
  with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
151
-
152
  all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
153
 
154
- async def master_handler(current_state, action, value):
155
- new_state, log = await handle_action(current_state, action, value)
156
- ui_updates = update_ui_components(new_state)
157
- ui_updates[log_display] = log
158
  return new_state, ui_updates
159
 
160
- # Initial Load - create first tab
161
- async def initial_load(s):
162
- _, ui_updates = await master_handler(s, "new_tab", None)
 
163
  return ui_updates
164
-
165
  demo.load(initial_load, inputs=[browser_state], outputs=list(all_outputs))
166
 
167
- # Event listeners - Gradio automatically awaits async functions
168
- go_btn.click(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], fn_name="go", show_progress="full")
169
- url_textbox.submit(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], fn_name="go", show_progress="full")
170
- click_btn.click(master_handler, [browser_state, click_num_box], [browser_state, *all_outputs], fn_name="click", show_progress="full")
171
- new_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], fn_name="new_tab", show_progress="full")
172
- close_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], fn_name="close_tab")
173
- tab_selector.input(master_handler, [browser_state, tab_selector], [browser_state, *all_outputs], fn_name="switch_tab")
174
-
175
- # The `startup` and `shutdown` events are not available in standard `Blocks`.
176
- # We manage this by doing the startup inside the first `load` event.
177
- # The global setup is the best way for Spaces.
178
- # A small tweak to make it work without official startup events:
179
- # The startup logic is moved to the global scope but needs an async context.
180
- # We will use the existing startup/shutdown logic and assume the Gradio version supports it or handles it gracefully.
181
- # The best practice would be to use a framework that has explicit startup/shutdown events like FastAPI.
182
- # For Gradio Spaces, the singleton pattern with async calls in handlers is the way to go.
183
-
184
- # Let's adjust for standard Gradio deployment.
185
- # We'll run startup manually before launching.
186
- import asyncio
187
- asyncio.run(startup())
188
 
189
  demo.launch()
 
1
  import os
2
  os.system("playwright install")
3
+ # app.py (Synchronous, state-separated, and Gradio API fix)
4
 
5
  import gradio as gr
6
+ from playwright.sync_api import sync_playwright, Error as PlaywrightError
7
  from bs4 import BeautifulSoup
8
  import urllib.parse
9
  import datetime
 
13
  from itertools import cycle
14
  import uuid
15
 
16
+ # --- 1. LIVE RESOURCES (Global, Non-copyable) ---
17
+ try:
18
+ p = sync_playwright().start()
19
+ browser = p.firefox.launch(headless=True, timeout=60000)
20
+ print("✅ Playwright browser launched successfully.")
21
+ except Exception as e:
22
+ print(f"❌ Could not launch Playwright browser. Original error: {e}"); exit()
23
 
24
+ LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
25
+
26
+ def cleanup():
27
+ print(f"🧹 Cleaning up: Closing {len(LIVE_CONTEXTS)} browser contexts...")
28
+ for tab_id, resources in LIVE_CONTEXTS.items():
29
+ if not resources["context"].is_closed():
30
+ resources["context"].close()
31
+ browser.close(); p.stop()
32
+ atexit.register(cleanup)
33
+
34
+
35
+ # --- 2. STATE DATA (Plain, Copyable Classes) ---
36
  class TabState:
37
  def __init__(self, tab_id, proxy_used="Direct Connection"):
38
  self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
 
43
  def __init__(self): self.tabs, self.active_tab_id = [], None
44
  def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
45
 
46
+
47
+ # --- 3. LOGIC ---
48
+ class CredentialRevolver:
49
  def __init__(self, proxy_string: str):
50
  self.proxies = self._parse_proxies(proxy_string)
51
  if self.proxies: self.proxy_cycler = cycle(self.proxies)
 
59
  def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
60
  def count(self): return len(self.proxies)
61
 
62
+ proxy_list_str = os.getenv("PROXY_LIST", "")
63
+ revolver = CredentialRevolver(proxy_list_str)
64
 
65
+ def _fetch_and_update_tab_state(tab_state: TabState, url: str):
66
+ log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
 
67
  try:
68
+ live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
69
+ tab_state.url, tab_state.title = live_page.url, live_page.title() or "No Title"
 
70
  log += f"\n✅ Arrived at: {tab_state.url}"
71
+ html_content = live_page.content()
72
  soup = BeautifulSoup(html_content, 'lxml')
73
  for script in soup(["script", "style", "nav", "footer"]): script.extract()
74
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
 
80
  tab_state.links = []; log += f"\n❌ {error_message}"
81
  return log
82
 
83
+ def handle_action(browser_state: BrowserState, action: str, value=None):
84
  log = ""; active_tab_state = browser_state.get_active_tab()
85
  if action == "new_tab":
86
+ tab_id, proxy_config = str(uuid.uuid4()), revolver.get_next()
87
+ context = browser.new_context(proxy=proxy_config)
88
+ page = context.new_page()
 
89
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
90
  new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
91
  browser_state.tabs.append(new_tab)
92
  browser_state.active_tab_id = tab_id
93
+ log = _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
94
  elif action == "go" and active_tab_state:
95
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
96
+ log = _fetch_and_update_tab_state(active_tab_state, url)
97
  elif action == "click" and active_tab_state and value is not None:
98
  try:
99
  link_index = int(value)
100
  if 0 <= link_index < len(active_tab_state.links):
101
+ link_url = active_tab_state.links[link_index]['url']
102
+ log = _fetch_and_update_tab_state(active_tab_state, link_url)
103
  else: log = "Invalid link number."
104
  except: log = "Please enter a valid number to click."
105
  elif action == "close_tab" and active_tab_state:
 
110
  new_index = tab_index - 1 if tab_index > 0 else 0
111
  browser_state.active_tab_id = browser_state.tabs[new_index].id
112
  resources = LIVE_CONTEXTS.pop(tab_to_close_id)
113
+ if not resources['context'].is_closed(): resources['context'].close()
114
  log = f"💣 Tab closed."
115
  else: log = "Cannot close the last tab."
116
  elif action == "switch_tab" and value is not None:
117
  browser_state.active_tab_id = value; log = f"Switched to tab."
118
  return browser_state, log
119
 
120
+ def update_ui_components(browser_state: BrowserState):
121
  active_tab = browser_state.get_active_tab()
122
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
123
  tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
 
128
  tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
129
  }
130
 
131
+ # --- Gradio UI Layout ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
132
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
133
  browser_state = gr.State(BrowserState())
134
+ gr.Markdown("# 🛰️ Real Browser Demo (Synchronous Fix)")
135
+ gr.Markdown(f"This demo runs a real headless browser. **{revolver.count()} proxies loaded**.")
136
  with gr.Row():
137
  with gr.Column(scale=3):
138
  url_textbox = gr.Textbox(label="URL or Search Term", interactive=True)
 
143
  with gr.Row(): new_tab_btn = gr.Button("➕ New Tab"); close_tab_btn = gr.Button("❌ Close Tab")
144
  tab_selector = gr.Radio(choices=[], label="Active Tabs", interactive=True)
145
  with gr.Accordion("Clickable Links", open=True):
146
+ links_display = gr.Markdown("...");
147
  with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
148
+
149
  all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
150
 
151
+ def master_handler(current_state, action, value):
152
+ new_state, log = handle_action(current_state, action, value)
153
+ ui_updates = update_ui_components(new_state); ui_updates[log_display] = log
 
154
  return new_state, ui_updates
155
 
156
+ def initial_load(s):
157
+ # A small hack to initialize the browser state with one tab on load
158
+ # We call the handler directly but only return the UI update part.
159
+ _, ui_updates = master_handler(s, "new_tab", None)
160
  return ui_updates
 
161
  demo.load(initial_load, inputs=[browser_state], outputs=list(all_outputs))
162
 
163
+ # Event listeners - FIXED by removing the `fn_name` argument.
164
+ go_btn.click(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
165
+ url_textbox.submit(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
166
+ click_btn.click(master_handler, [browser_state, click_num_box], [browser_state, *all_outputs], show_progress="full")
167
+ new_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], show_progress="full")
168
+ close_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs])
169
+ tab_selector.input(master_handler, [browser_state, tab_selector], [browser_state, *all_outputs])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
170
 
171
  demo.launch()