File size: 13,375 Bytes
3819331
 
 
31d5b37
 
 
3819331
31d5b37
 
15fdb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31d5b37
 
15fdb32
31d5b37
 
15fdb32
 
 
 
 
31d5b37
 
15fdb32
31d5b37
 
 
15fdb32
3819331
 
15fdb32
 
 
 
 
31d5b37
 
 
15fdb32
31d5b37
 
15fdb32
 
 
31d5b37
 
15fdb32
3819331
31d5b37
 
 
3819331
 
15fdb32
3819331
31d5b37
 
15fdb32
31d5b37
 
 
 
 
 
 
 
 
 
15fdb32
 
31d5b37
 
 
 
 
 
 
 
 
 
15fdb32
31d5b37
15fdb32
31d5b37
3819331
31d5b37
3819331
31d5b37
 
15fdb32
31d5b37
3819331
15fdb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3819331
 
31d5b37
15fdb32
31d5b37
3819331
 
 
31d5b37
15fdb32
31d5b37
 
3819331
 
31d5b37
15fdb32
31d5b37
3819331
31d5b37
 
 
15fdb32
31d5b37
15fdb32
3819331
15fdb32
31d5b37
 
 
15fdb32
31d5b37
 
 
 
 
 
15fdb32
 
 
 
3819331
31d5b37
 
15fdb32
 
31d5b37
3819331
31d5b37
 
 
3819331
 
 
15fdb32
31d5b37
15fdb32
 
31d5b37
 
 
 
 
15fdb32
 
 
 
 
 
 
 
 
 
3819331
15fdb32
3819331
15fdb32
31d5b37
15fdb32
31d5b37
15fdb32
 
3819331
 
15fdb32
 
31d5b37
15fdb32
31d5b37
3819331
15fdb32
3819331
31d5b37
 
15fdb32
 
31d5b37
15fdb32
31d5b37
 
 
 
 
 
 
 
 
3819331
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
# app.py

import gradio as gr
from playwright.sync_api import sync_playwright, Error as PlaywrightError
from bs4 import BeautifulSoup
import urllib.parse
import datetime
import atexit
import re
import os
from itertools import cycle

# --- NEW: Credential Revolver Class ---
class CredentialRevolver:
    """Manages a rotating list of proxies."""
    def __init__(self, proxy_string: str):
        self.proxies = self._parse_proxies(proxy_string)
        if self.proxies:
            self.proxy_cycler = cycle(self.proxies)
            print(f"✅ CredentialRevolver initialized with {len(self.proxies)} proxies.")
        else:
            self.proxy_cycler = None
            print("⚠️ CredentialRevolver initialized with no proxies. Using direct connection.")

    def _parse_proxies(self, proxy_string: str):
        """Parses a multi-line string of proxies into a list of dicts."""
        proxies = []
        for line in proxy_string.strip().splitlines():
            line = line.strip()
            if not line:
                continue
            try:
                # Format: http://user:pass@host:port
                parsed = urllib.parse.urlparse(f"//{line}") # Add // to help parsing
                server = f"{parsed.scheme or 'http'}://{parsed.hostname}:{parsed.port}"
                proxy_dict = {
                    "server": server,
                    "username": parsed.username,
                    "password": parsed.password,
                }
                proxies.append(proxy_dict)
            except Exception as e:
                print(f"Could not parse proxy line: '{line}'. Error: {e}")
        return proxies

    def get_next(self):
        """Returns the next proxy configuration in a round-robin fashion."""
        if self.proxy_cycler:
            return next(self.proxy_cycler)
        return None # No proxy

    def count(self):
        return len(self.proxies)

# --- GLOBAL PLAYWRIGHT AND REVOLVER SETUP ---
try:
    p = sync_playwright().start()
    browser = p.firefox.launch(headless=True, timeout=60000)
    print("✅ Playwright browser launched successfully.")
except Exception as e:
    print(f"❌ Could not launch Playwright browser: {e}"); exit()

# Load proxies from Hugging Face Secrets (environment variable)
proxy_list_str = os.getenv("PROXY_LIST", "")
revolver = CredentialRevolver(proxy_list_str)

def cleanup():
    print("🧹 Cleaning up: Closing Playwright browser..."); browser.close(); p.stop()
atexit.register(cleanup)


# --- Core Browser Logic (Upgraded with Proxy Contexts) ---

class Tab:
    """Represents a single browser tab, now tied to a BrowserContext."""
    def __init__(self, context, page, proxy_used):
        self.context = context # The isolated browser context (has the proxy)
        self.page = page       # The Playwright page object within the context
        self.proxy_used = proxy_used # Info for logging
        self.title = "New Tab"
        self.url = "about:blank"
        self.parsed_text = "Welcome! Navigate to a URL or search to get started."
        self.links = []

    def close(self):
        """Closes the underlying BrowserContext, which also closes the page."""
        if not self.context.is_closed():
            self.context.close()

class RealBrowser:
    """Manages multiple tabs, each potentially with its own proxy."""
    def __init__(self):
        self.tabs = []
        self.active_tab_index = -1
        self.new_tab() # Start with one tab

    def _get_active_tab(self):
        if self.active_tab_index == -1 or self.active_tab_index >= len(self.tabs): return None
        return self.tabs[self.active_tab_index]
        
    def _fetch_and_parse(self, tab, url):
        # (This function remains largely the same as the previous version)
        log = f"▶️ Navigating to {url}..."
        try:
            tab.page.goto(url, wait_until='domcontentloaded', timeout=30000)
            tab.url = tab.page.url
            tab.title = tab.page.title() or "No Title"
            log += f"\n✅ Arrived at: {tab.url}"
            log += f"\n📄 Title: {tab.title}"

            html_content = tab.page.content()
            soup = BeautifulSoup(html_content, 'lxml')
            for script in soup(["script", "style", "nav", "footer"]): script.extract()
            tab.parsed_text = soup.get_text(separator='\n', strip=True)

            tab.links = []
            for link in soup.find_all('a', href=True):
                href = link['href']
                absolute_url = urllib.parse.urljoin(tab.url, href)
                if absolute_url.startswith('http') and not re.match(r'javascript:|mailto:', absolute_url):
                    link_text = link.get_text(strip=True) or "[No Link Text]"
                    tab.links.append({'text': link_text, 'url': absolute_url})
            log += f"\n🔗 Found {len(tab.links)} links."
        except PlaywrightError as e:
            error_message = str(e); tab.title = "Error"; tab.url = url
            tab.parsed_text = f"❌ Failed to load page.\n\nError: {error_message}"
            tab.links = []; log += f"\n❌ {error_message}"
        return log

    def go(self, term_or_url):
        tab = self._get_active_tab()
        if not tab: return "No active tab."
        parsed_url = urllib.parse.urlparse(term_or_url)
        url = term_or_url if (parsed_url.scheme and parsed_url.netloc) else f"https://duckduckgo.com/html/?q={urllib.parse.quote_plus(term_or_url)}"
        return self._fetch_and_parse(tab, url)

    def new_tab(self):
        """CRITICAL CHANGE: Creates a new tab with the next available proxy."""
        proxy_config = revolver.get_next()
        log = ""
        
        try:
            # Create a new context with the proxy settings
            context = browser.new_context(proxy=proxy_config)
            page = context.new_page()
            
            proxy_info = proxy_config['server'] if proxy_config else "Direct Connection"
            log += f"✨ New tab opened.\n🔒 Using proxy: {proxy_info}"
            
            tab = Tab(context, page, proxy_info)
            self.tabs.append(tab)
            self.active_tab_index = len(self.tabs) - 1
            
            # Navigate to a default page
            log += "\n" + self.go("https://www.whatsmyip.org/")
        except Exception as e:
            log += f"\n❌ Failed to create new tab/context: {e}"
            if 'context' in locals() and not context.is_closed():
                context.close()
        return log

    def close_tab(self):
        if len(self.tabs) <= 1: return "Cannot close the last tab."
        
        tab_to_close = self.tabs.pop(self.active_tab_index)
        tab_to_close.close() # This now closes the context and the page

        if self.active_tab_index >= len(self.tabs):
            self.active_tab_index = len(self.tabs) - 1
        return f"💣 Tab closed. Switched to Tab {self.active_tab_index}."

    # Other methods (back, forward, refresh, switch_tab) remain the same
    # as they operate on the tab's page object, which is now correctly context-aware.
    def back(self):
        tab = self._get_active_tab()
        if tab and tab.page.can_go_back():
            tab.page.go_back(wait_until='domcontentloaded'); return self._fetch_and_parse(tab, tab.page.url)
        return "Cannot go back."

    def forward(self):
        tab = self._get_active_tab()
        if tab and tab.page.can_go_forward():
            tab.page.go_forward(wait_until='domcontentloaded'); return self._fetch_and_parse(tab, tab.page.url)
        return "Cannot go forward."
        
    def refresh(self):
        tab = self._get_active_tab()
        if tab:
            tab.page.reload(wait_until='domcontentloaded'); return self._fetch_and_parse(tab, tab.page.url)
        return "No active tab."

    def switch_tab(self, tab_label):
        try:
            index = int(tab_label.split(":")[0].replace("Tab", "").strip())
            if 0 <= index < len(self.tabs): self.active_tab_index = index; return f"Switched to Tab {index}."
            return "Invalid tab index."
        except: return "Invalid tab format."

# --- Gradio UI and Event Handlers (mostly unchanged, but with proxy info) ---
def update_ui_components(browser_state: RealBrowser):
    active_tab = browser_state._get_active_tab()
    if not active_tab:
        # Handle case where all tabs are closed
        return {
            page_content: gr.Markdown("No active tabs. Please create a new one."),
            url_textbox: "",
            links_display: "",
            tab_selector: gr.Radio(choices=[], label="Active Tabs"),
        }
        
    # Add proxy info to the tab selector for clarity
    tab_choices = [f"Tab {i}: {tab.title[:30]}... (via {tab.proxy_used.split('//')[1].split('@')[-1] if tab.proxy_used != 'Direct Connection' else 'Direct'})" for i, tab in enumerate(browser_state.tabs)]
    active_tab_label = tab_choices[browser_state.active_tab_index]

    links_md = "### 🔗 Links on Page\n"
    if active_tab.links:
        for i, link in enumerate(active_tab.links[:25]): links_md += f"{i}. [{link['text'][:80]}]({link['url']})\n"
    else: links_md += "_No links found._"
        
    return {
        page_content: gr.Markdown(f"# {active_tab.title}\n**URL:** {active_tab.url}\n\n---\n\n{active_tab.parsed_text[:2000]}..."),
        url_textbox: gr.Textbox(value=active_tab.url),
        links_display: gr.Markdown(links_md),
        tab_selector: gr.Radio(choices=tab_choices, value=active_tab_label, label="Active Tabs"),
    }

# The handle_action function remains the same as it's a generic dispatcher
def handle_action(browser_state, action, value=None):
    # ... (same as previous version)
    if action == "go": log = browser_state.go(value)
    elif action == "click":
        tab = browser_state._get_active_tab()
        try:
            link_index = int(value)
            if tab and 0 <= link_index < len(tab.links):
                log = browser_state.go(tab.links[link_index]['url'])
            else: log = "Invalid link number."
        except: log = "Please enter a valid number to click."
    elif action == "back": log = browser_state.back()
    elif action == "forward": log = browser_state.forward()
    elif action == "refresh": log = browser_state.refresh()
    elif action == "new_tab": log = browser_state.new_tab()
    elif action == "close_tab": log = browser_state.close_tab()
    elif action == "switch_tab": log = browser_state.switch_tab(value)
    else: log = "Unknown action."
    
    return {**update_ui_components(browser_state), log_display: gr.Textbox(log)}

# The Gradio Blocks layout remains the same
with gr.Blocks(theme=gr.themes.Soft(), title="Real Browser Demo") as demo:
    # ... (same as previous version)
    browser_state = gr.State(RealBrowser())
    gr.Markdown("# 🛰️ Real Browser Demo (with Proxy Revolver)")
    gr.Markdown(f"Type a URL or search term. This demo runs a real headless browser with **{revolver.count()} proxies loaded**.")
    with gr.Row():
        with gr.Column(scale=3):
            with gr.Row(): back_btn = gr.Button("◀ Back"); forward_btn = gr.Button("▶ Forward"); refresh_btn = gr.Button("🔄 Refresh")
            url_textbox = gr.Textbox(label="URL or Search Term", interactive=True)
            go_btn = gr.Button("Go", variant="primary")
            with gr.Accordion("Page Content (Text Only)", open=True): page_content = gr.Markdown("Loading...")
            log_display = gr.Textbox(label="Status Log", interactive=False)
        with gr.Column(scale=1):
            with gr.Row(): new_tab_btn = gr.Button("➕ New Tab"); close_tab_btn = gr.Button("❌ Close Tab")
            tab_selector = gr.Radio(choices=[], label="Active Tabs", interactive=True)
            with gr.Accordion("Clickable Links", open=True):
                links_display = gr.Markdown("...")
                with gr.Row(): click_num_box = gr.Number(label="Link #", scale=1, minimum=0, step=1); click_btn = gr.Button("Click Link", scale=2)
    
    all_outputs = [page_content, url_textbox, links_display, tab_selector, log_display]
    demo.load(lambda s: {**update_ui_components(s), log_display: f"🚀 Browser Initialized! {revolver.count()} proxies loaded. A new tab has been opened to check the IP."}, inputs=[browser_state], outputs=all_outputs)
    go_btn.click(lambda s, v: handle_action(s, "go", v), [browser_state, url_textbox], all_outputs, show_progress="full")
    url_textbox.submit(lambda s, v: handle_action(s, "go", v), [browser_state, url_textbox], all_outputs, show_progress="full")
    click_btn.click(lambda s, v: handle_action(s, "click", v), [browser_state, click_num_box], all_outputs, show_progress="full")
    back_btn.click(lambda s: handle_action(s, "back"), [browser_state], all_outputs, show_progress="full")
    forward_btn.click(lambda s: handle_action(s, "forward"), [browser_state], all_outputs, show_progress="full")
    refresh_btn.click(lambda s: handle_action(s, "refresh"), [browser_state], all_outputs, show_progress="full")
    new_tab_btn.click(lambda s: handle_action(s, "new_tab"), [browser_state], all_outputs, show_progress="full")
    close_tab_btn.click(lambda s: handle_action(s, "close_tab"), [browser_state], all_outputs)
    tab_selector.input(lambda s, v: handle_action(s, "switch_tab", v), [browser_state, tab_selector], all_outputs)

demo.launch()