Spaces:
Paused
Paused
| from smolagents.tools import Tool | |
| import cv2 | |
| import numpy as np | |
| import os | |
| def detect_elements(screenshot_path, element_type="table"): | |
| """ | |
| Detect table-like structures or text boxes in a screenshot using OpenCV. | |
| Args: | |
| screenshot_path (str): Path to the screenshot | |
| element_type (str): Type of element to detect ('table', 'textbox') (default: 'table') | |
| Returns: | |
| str: JSON with bounding boxes and detection details | |
| """ | |
| try: | |
| if not os.path.exists(screenshot_path): | |
| return f"Screenshot not found: {screenshot_path}" | |
| # Read and preprocess image | |
| image = cv2.imread(screenshot_path) | |
| gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
| blurred = cv2.GaussianBlur(gray, (5, 5), 0) | |
| edges = cv2.Canny(blurred, 50, 150) | |
| # Detect contours | |
| contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) | |
| detections = [] | |
| for contour in contours: | |
| x, y, w, h = cv2.boundingRect(contour) | |
| area = w * h | |
| aspect_ratio = w / h if h > 0 else 0 | |
| # Filter for tables (rectangular, large area) | |
| if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0: | |
| detections.append({"type": "table", "bbox": [x, y, w, h]}) | |
| # Filter for text boxes (narrow, horizontal) | |
| elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0: | |
| detections.append({"type": "textbox", "bbox": [x, y, w, h]}) | |
| # Draw bounding boxes on a copy of the image | |
| output_path = screenshot_path.replace(".png", "_detected.png") | |
| output_image = image.copy() | |
| for detection in detections: | |
| x, y, w, h = detection["bbox"] | |
| color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255) | |
| cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2) | |
| cv2.imwrite(output_path, output_image) | |
| return json.dumps({ | |
| "detections": detections, | |
| "output_image": output_path | |
| }) if detections else "No elements detected" | |
| except Exception as e: | |
| return f"Failed to detect elements: {str(e)}" | |
| # Register the tool | |
| tool = Tool( | |
| name="detect_elements", | |
| description="Detects table-like structures or text boxes in a screenshot using OpenCV.", | |
| inputs={ | |
| "screenshot_path": {"type": "str", "description": "Path to the screenshot"}, | |
| "element_type": {"type": "str", "default": "table", "description": "Type: 'table' or 'textbox'"} | |
| }, | |
| output_type="str", | |
| function=detect_elements | |
| ) |