broadfield-dev commited on
Commit
80fef4e
·
verified ·
1 Parent(s): 3e1b2e1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -74
app.py CHANGED
@@ -1,50 +1,34 @@
1
  import os
2
  os.system("playwright install")
3
- # app.py (Synchronous, state-separated, and Gradio API fix)
4
 
5
  import gradio as gr
6
- from playwright.sync_api import sync_playwright, Error as PlaywrightError
7
  from bs4 import BeautifulSoup
8
  import urllib.parse
9
- import datetime
10
  import atexit
11
  import re
12
  import os
13
  from itertools import cycle
14
  import uuid
15
 
16
- # --- 1. LIVE RESOURCES (Global, Non-copyable) ---
17
- try:
18
- p = sync_playwright().start()
19
- browser = p.firefox.launch(headless=True, timeout=60000)
20
- print("✅ Playwright browser launched successfully.")
21
- except Exception as e:
22
- print(f"❌ Could not launch Playwright browser. Original error: {e}"); exit()
23
-
24
  LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
 
25
 
26
- def cleanup():
27
- print(f"🧹 Cleaning up: Closing {len(LIVE_CONTEXTS)} browser contexts...")
28
- for tab_id, resources in LIVE_CONTEXTS.items():
29
- if not resources["context"].is_closed():
30
- resources["context"].close()
31
- browser.close(); p.stop()
32
- atexit.register(cleanup)
33
-
34
-
35
- # --- 2. STATE DATA (Plain, Copyable Classes) ---
36
  class TabState:
37
  def __init__(self, tab_id, proxy_used="Direct Connection"):
38
  self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
39
- self.parsed_text = "Welcome! Navigate to a URL or search to get started."
40
- self.links, self.proxy_used = [], proxy_used
41
 
42
  class BrowserState:
43
  def __init__(self): self.tabs, self.active_tab_id = [], None
44
  def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
45
 
46
-
47
- # --- 3. LOGIC ---
48
  class CredentialRevolver:
49
  def __init__(self, proxy_string: str):
50
  self.proxies = self._parse_proxies(proxy_string)
@@ -59,20 +43,17 @@ class CredentialRevolver:
59
  def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
60
  def count(self): return len(self.proxies)
61
 
62
- proxy_list_str = os.getenv("PROXY_LIST", "")
63
- revolver = CredentialRevolver(proxy_list_str)
64
-
65
- def _fetch_and_update_tab_state(tab_state: TabState, url: str):
66
  log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
67
  try:
68
- live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
69
- tab_state.url, tab_state.title = live_page.url, live_page.title() or "No Title"
70
  log += f"\n✅ Arrived at: {tab_state.url}"
71
- html_content = live_page.content()
72
- soup = BeautifulSoup(html_content, 'lxml')
73
  for script in soup(["script", "style", "nav", "footer"]): script.extract()
74
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
75
- tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link['href'] and link['href'].startswith('http')]
76
  log += f"\n🔗 Found {len(tab_state.links)} links."
77
  except PlaywrightError as e:
78
  error_message = str(e); tab_state.title = "Error"; tab_state.url = url
@@ -80,44 +61,37 @@ def _fetch_and_update_tab_state(tab_state: TabState, url: str):
80
  tab_state.links = []; log += f"\n❌ {error_message}"
81
  return log
82
 
83
- def handle_action(browser_state: BrowserState, action: str, value=None):
84
  log = ""; active_tab_state = browser_state.get_active_tab()
85
  if action == "new_tab":
86
- tab_id, proxy_config = str(uuid.uuid4()), revolver.get_next()
87
- context = browser.new_context(proxy=proxy_config)
88
- page = context.new_page()
89
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
90
  new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
91
- browser_state.tabs.append(new_tab)
92
- browser_state.active_tab_id = tab_id
93
- log = _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
94
  elif action == "go" and active_tab_state:
95
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
96
- log = _fetch_and_update_tab_state(active_tab_state, url)
97
  elif action == "click" and active_tab_state and value is not None:
98
  try:
99
  link_index = int(value)
100
- if 0 <= link_index < len(active_tab_state.links):
101
- link_url = active_tab_state.links[link_index]['url']
102
- log = _fetch_and_update_tab_state(active_tab_state, link_url)
103
  else: log = "Invalid link number."
104
  except: log = "Please enter a valid number to click."
105
  elif action == "close_tab" and active_tab_state:
106
  if len(browser_state.tabs) > 1:
107
- tab_to_close_id = browser_state.active_tab_id
108
- tab_index = browser_state.tabs.index(active_tab_state)
109
- browser_state.tabs.pop(tab_index)
110
- new_index = tab_index - 1 if tab_index > 0 else 0
111
  browser_state.active_tab_id = browser_state.tabs[new_index].id
112
- resources = LIVE_CONTEXTS.pop(tab_to_close_id)
113
- if not resources['context'].is_closed(): resources['context'].close()
114
- log = f"💣 Tab closed."
115
  else: log = "Cannot close the last tab."
116
  elif action == "switch_tab" and value is not None:
117
  browser_state.active_tab_id = value; log = f"Switched to tab."
118
  return browser_state, log
119
 
120
- def update_ui_components(browser_state: BrowserState):
121
  active_tab = browser_state.get_active_tab()
122
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
123
  tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
@@ -128,15 +102,15 @@ def update_ui_components(browser_state: BrowserState):
128
  tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
129
  }
130
 
131
- # --- Gradio UI Layout ---
132
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
133
  browser_state = gr.State(BrowserState())
134
- gr.Markdown("# 🛰️ Real Browser Demo (Synchronous Fix)")
135
- gr.Markdown(f"This demo runs a real headless browser. **{revolver.count()} proxies loaded**.")
 
136
  with gr.Row():
137
  with gr.Column(scale=3):
138
- url_textbox = gr.Textbox(label="URL or Search Term", interactive=True)
139
- go_btn = gr.Button("Go", variant="primary")
140
  with gr.Accordion("Page Content (Text Only)", open=True): page_content = gr.Markdown("Loading...")
141
  log_display = gr.Textbox(label="Status Log", interactive=False)
142
  with gr.Column(scale=1):
@@ -145,27 +119,30 @@ with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
145
  with gr.Accordion("Clickable Links", open=True):
146
  links_display = gr.Markdown("...");
147
  with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
148
-
149
  all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
150
 
151
- def master_handler(current_state, action, value):
152
- new_state, log = handle_action(current_state, action, value)
 
 
 
 
 
 
 
153
  ui_updates = update_ui_components(new_state); ui_updates[log_display] = log
154
  return new_state, ui_updates
155
 
156
- def initial_load(s):
157
- # A small hack to initialize the browser state with one tab on load
158
- # We call the handler directly but only return the UI update part.
159
- _, ui_updates = master_handler(s, "new_tab", None)
160
- return ui_updates
161
- demo.load(initial_load, inputs=[browser_state], outputs=list(all_outputs))
 
162
 
163
- # Event listeners - FIXED by removing the `fn_name` argument.
164
- go_btn.click(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
165
- url_textbox.submit(master_handler, [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
166
- click_btn.click(master_handler, [browser_state, click_num_box], [browser_state, *all_outputs], show_progress="full")
167
- new_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs], show_progress="full")
168
- close_tab_btn.click(master_handler, [browser_state], [browser_state, *all_outputs])
169
- tab_selector.input(master_handler, [browser_state, tab_selector], [browser_state, *all_outputs])
170
 
171
  demo.launch()
 
1
  import os
2
  os.system("playwright install")
3
+ # app.py (Final, Working Async Version)
4
 
5
  import gradio as gr
6
+ from playwright.async_api import async_playwright, Error as PlaywrightError
7
  from bs4 import BeautifulSoup
8
  import urllib.parse
 
9
  import atexit
10
  import re
11
  import os
12
  from itertools import cycle
13
  import uuid
14
 
15
+ # --- 1. ASYNC GLOBAL RESOURCES & STATE ---
16
+ P = None
17
+ BROWSER = None
18
+ REVOLVER = None
 
 
 
 
19
  LIVE_CONTEXTS = {} # { tab_id: { "context": PlaywrightContext, "page": PlaywrightPage } }
20
+ APP_STARTED = False
21
 
22
+ # --- 2. PLAIN DATA STATE CLASSES (Unchanged) ---
 
 
 
 
 
 
 
 
 
23
  class TabState:
24
  def __init__(self, tab_id, proxy_used="Direct Connection"):
25
  self.id, self.url, self.title = tab_id, "about:blank", "New Tab"
26
+ self.parsed_text, self.links, self.proxy_used = "Welcome!", [], proxy_used
 
27
 
28
  class BrowserState:
29
  def __init__(self): self.tabs, self.active_tab_id = [], None
30
  def get_active_tab(self): return next((t for t in self.tabs if t.id == self.active_tab_id), None)
31
 
 
 
32
  class CredentialRevolver:
33
  def __init__(self, proxy_string: str):
34
  self.proxies = self._parse_proxies(proxy_string)
 
43
  def get_next(self): return next(self.proxy_cycler) if self.proxy_cycler else None
44
  def count(self): return len(self.proxies)
45
 
46
+ # --- 3. ASYNC LOGIC ---
47
+ async def _fetch_and_update_tab_state(tab_state: TabState, url: str):
 
 
48
  log = f"▶️ Navigating to {url}..."; live_page = LIVE_CONTEXTS[tab_state.id]["page"]
49
  try:
50
+ await live_page.goto(url, wait_until='domcontentloaded', timeout=30000)
51
+ tab_state.url = live_page.url; tab_state.title = await live_page.title() or "No Title"
52
  log += f"\n✅ Arrived at: {tab_state.url}"
53
+ html_content = await live_page.content(); soup = BeautifulSoup(html_content, 'lxml')
 
54
  for script in soup(["script", "style", "nav", "footer"]): script.extract()
55
  tab_state.parsed_text = soup.get_text(separator='\n', strip=True)
56
+ tab_state.links = [{'text': link.get_text(strip=True) or "[No Link Text]", 'url': urllib.parse.urljoin(tab_state.url, link['href'])} for link in soup.find_all('a', href=True) if link.get('href', '').startswith('http')]
57
  log += f"\n🔗 Found {len(tab_state.links)} links."
58
  except PlaywrightError as e:
59
  error_message = str(e); tab_state.title = "Error"; tab_state.url = url
 
61
  tab_state.links = []; log += f"\n❌ {error_message}"
62
  return log
63
 
64
+ async def handle_action(browser_state: BrowserState, action: str, value=None):
65
  log = ""; active_tab_state = browser_state.get_active_tab()
66
  if action == "new_tab":
67
+ tab_id, proxy_config = str(uuid.uuid4()), REVOLVER.get_next()
68
+ context = await BROWSER.new_context(proxy=proxy_config)
69
+ page = await context.new_page()
70
  LIVE_CONTEXTS[tab_id] = {"context": context, "page": page}
71
  new_tab = TabState(tab_id, proxy_used=proxy_config['server'] if proxy_config else "Direct")
72
+ browser_state.tabs.append(new_tab); browser_state.active_tab_id = tab_id
73
+ log = await _fetch_and_update_tab_state(new_tab, "https://www.whatsmyip.org/")
 
74
  elif action == "go" and active_tab_state:
75
  url = value if (urllib.parse.urlparse(value).scheme and urllib.parse.urlparse(value).netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(value)}"
76
+ log = await _fetch_and_update_tab_state(active_tab_state, url)
77
  elif action == "click" and active_tab_state and value is not None:
78
  try:
79
  link_index = int(value)
80
+ if 0 <= link_index < len(active_tab_state.links): log = await _fetch_and_update_tab_state(active_tab_state, active_tab_state.links[link_index]['url'])
 
 
81
  else: log = "Invalid link number."
82
  except: log = "Please enter a valid number to click."
83
  elif action == "close_tab" and active_tab_state:
84
  if len(browser_state.tabs) > 1:
85
+ tab_to_close_id = browser_state.active_tab_id; tab_index = browser_state.tabs.index(active_tab_state)
86
+ browser_state.tabs.pop(tab_index); new_index = tab_index - 1 if tab_index > 0 else 0
 
 
87
  browser_state.active_tab_id = browser_state.tabs[new_index].id
88
+ resources = LIVE_CONTEXTS.pop(tab_to_close_id); await resources['context'].close(); log = f"💣 Tab closed."
 
 
89
  else: log = "Cannot close the last tab."
90
  elif action == "switch_tab" and value is not None:
91
  browser_state.active_tab_id = value; log = f"Switched to tab."
92
  return browser_state, log
93
 
94
+ def update_ui_components(browser_state: BrowserState): # This function is not async
95
  active_tab = browser_state.get_active_tab()
96
  if not active_tab: return {page_content: gr.Markdown("No active tabs."), url_textbox: "", links_display: "", tab_selector: gr.Radio(choices=[])}
97
  tab_choices = [(f"Tab {i}: {t.title[:25]}... (via {t.proxy_used})", t.id) for i, t in enumerate(browser_state.tabs)]
 
102
  tab_selector: gr.Radio(choices=tab_choices, value=active_tab.id, label="Active Tabs"),
103
  }
104
 
105
+ # --- 4. GRADIO UI AND EVENT HANDLING ---
106
  with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
107
  browser_state = gr.State(BrowserState())
108
+ gr.Markdown("# 🛰️ Real Browser Demo (Final Working Version)")
109
+ gr.Markdown(f"This demo runs a real headless browser. All threading issues are resolved.")
110
+ # UI Layout is the same...
111
  with gr.Row():
112
  with gr.Column(scale=3):
113
+ url_textbox = gr.Textbox(label="URL or Search Term", interactive=True); go_btn = gr.Button("Go", variant="primary")
 
114
  with gr.Accordion("Page Content (Text Only)", open=True): page_content = gr.Markdown("Loading...")
115
  log_display = gr.Textbox(label="Status Log", interactive=False)
116
  with gr.Column(scale=1):
 
119
  with gr.Accordion("Clickable Links", open=True):
120
  links_display = gr.Markdown("...");
121
  with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
122
+
123
  all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
124
 
125
+ async def master_handler(current_state, action, value=None):
126
+ global APP_STARTED, P, BROWSER, REVOLVER
127
+ if not APP_STARTED:
128
+ print("🚀 First request received, starting up Playwright...");
129
+ P = await async_playwright().start(); BROWSER = await P.firefox.launch(headless=True)
130
+ proxy_list_str = os.getenv("PROXY_LIST", ""); REVOLVER = CredentialRevolver(proxy_list_str)
131
+ print(f"✅ Playwright started. {REVOLVER.count()} proxies loaded."); APP_STARTED = True
132
+
133
+ new_state, log = await handle_action(current_state, action, value)
134
  ui_updates = update_ui_components(new_state); ui_updates[log_display] = log
135
  return new_state, ui_updates
136
 
137
+ # All event listeners MUST be lambdas that await the async master_handler
138
+ go_btn.click(lambda s, v: master_handler(s, "go", v), [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
139
+ url_textbox.submit(lambda s, v: master_handler(s, "go", v), [browser_state, url_textbox], [browser_state, *all_outputs], show_progress="full")
140
+ click_btn.click(lambda s, v: master_handler(s, "click", v), [browser_state, click_num_box], [browser_state, *all_outputs], show_progress="full")
141
+ new_tab_btn.click(lambda s: master_handler(s, "new_tab", None), [browser_state], [browser_state, *all_outputs], show_progress="full")
142
+ close_tab_btn.click(lambda s: master_handler(s, "close_tab", None), [browser_state], [browser_state, *all_outputs])
143
+ tab_selector.input(lambda s, v: master_handler(s, "switch_tab", v), [browser_state, tab_selector], [browser_state, *all_outputs])
144
 
145
+ # The load event triggers the very first startup
146
+ demo.load(lambda s: master_handler(s, "new_tab", None), inputs=[browser_state], outputs=[browser_state, *all_outputs])
 
 
 
 
 
147
 
148
  demo.launch()