Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

Firoj112 commited on May 5

Commit

e9ed5be

verified ·

1 Parent(s): d175522

Create scrape_text.py

Browse files

Files changed (1) hide show

tools/scrape_text.py +52 -0

tools/scrape_text.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from smolagents.tools import Tool
+from helium import S
+from selenium.webdriver.common.by import By
+import json
+def scrape_text(driver, selector="p", extract_table=False):
+    """
+    Scrape text or table data from elements matching a CSS selector on the current page.
+    Args:
+        driver: Selenium WebDriver instance
+        selector (str): CSS selector to target elements (default: 'p' for paragraphs)
+        extract_table (bool): If True, extract table data as JSON (default: False)
+    Returns:
+        str or dict: Text from elements or JSON table data
+    """
+    try:
+        if extract_table:
+            tables = driver.find_elements(By.CSS_SELECTOR, selector)
+            if not tables:
+                return "No tables found for selector"
+            table_data = []
+            for table in tables:
+                rows = table.find_elements(By.TAG_NAME, "tr")
+                table_rows = []
+                for row in rows:
+                    cells = row.find_elements(By.TAG_NAME, "td") or row.find_elements(By.TAG_NAME, "th")
+                    row_data = [cell.text.strip() for cell in cells if cell.text.strip()]
+                    if row_data:
+                        table_rows.append(row_data)
+                if table_rows:
+                    table_data.append(table_rows)
+            return json.dumps(table_data) if table_data else "No table data found"
+        else:
+            elements = driver.find_elements(By.CSS_SELECTOR, selector)
+            text_list = [element.text.strip() for element in elements if element.text.strip()]
+            return "\n".join(text_list) if text_list else "No text found for selector"
+    except Exception as e:
+        return f"Failed to scrape with selector {selector}: {str(e)}"
+# Register the tool
+tool = Tool(
+    name="scrape_text",
+    description="Scrapes text or table data from elements matching a CSS selector on the current page.",
+    inputs={
+        "selector": {"type": "str", "default": "p", "description": "CSS selector to target elements"},
+        "extract_table": {"type": "bool", "default": False, "description": "If True, extract table data as JSON"}
+    },
+    output_type="str",
+    function=scrape_text
+)