Spaces:

Firoj112
/

WebAgents_

Running

App Files Files Community

WebAgents_ / tools /detect_elements.py

Firoj112

Update tools/detect_elements.py

f018237 verified 9 days ago

raw

history blame contribute delete

3.01 kB

	from smolagents.tools import Tool
	import cv2
	import numpy as np
	import os
	import json
	from typing import Optional
	import logging

	logger = logging.getLogger(__name__)

	class DetectElementsTool(Tool):
	name = "detect_elements"
	description = "Detects table-like structures or text boxes in a screenshot using OpenCV."
	inputs = {
	"screenshot_path": {"type": "string", "nullable": True, "description": "Path to the screenshot"},
	"element_type": {"type": "string", "default": "table", "nullable": False, "description": "Type: 'table' or 'textbox'"}
	}
	output_type = "string"

	def __init__(self, driver=None):
	super().__init__()
	self.driver = driver # Store driver for consistency, even if unused
	self.is_initialized = True # No dependency on driver, so always True
	logger.debug(f"DetectElementsTool initialized: is_initialized={self.is_initialized}")

	def forward(self, screenshot_path: Optional[str], element_type="table"):
	if not self.is_initialized:
	return "Error: DetectElementsTool is not initialized"
	try:
	if not os.path.exists(screenshot_path):
	return f"Screenshot not found: {screenshot_path}"

	# Read and preprocess image
	image = cv2.imread(screenshot_path)
	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	blurred = cv2.GaussianBlur(gray, (5, 5), 0)
	edges = cv2.Canny(blurred, 50, 150)

	# Detect contours
	contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	detections = []

	for contour in contours:
	x, y, w, h = cv2.boundingRect(contour)
	area = w * h
	aspect_ratio = w / h if h > 0 else 0

	# Filter for tables (rectangular, large area)
	if element_type == "table" and area > 10000 and 0.5 < aspect_ratio < 2.0:
	detections.append({"type": "table", "bbox": [x, y, w, h]})
	# Filter for text boxes (narrow, horizontal)
	elif element_type == "textbox" and area > 500 and aspect_ratio > 2.0:
	detections.append({"type": "textbox", "bbox": [x, y, w, h]})

	# Draw bounding boxes on a copy of the image
	output_path = screenshot_path.replace(".png", "_detected.png")
	output_image = image.copy()
	for detection in detections:
	x, y, w, h = detection["bbox"]
	color = (0, 255, 0) if detection["type"] == "table" else (0, 0, 255)
	cv2.rectangle(output_image, (x, y), (x + w, y + h), color, 2)
	cv2.imwrite(output_path, output_image)

	return json.dumps({
	"detections": detections,
	"output_image": output_path
	}) if detections else "No elements detected"
	except Exception as e:
	return f"Failed to detect elements: {str(e)}"