Spaces:
Sleeping
Sleeping
Andy Lee
commited on
Commit
·
749ea04
1
Parent(s):
d140f7d
feat: use keyboards to interact
Browse files- config.py +7 -55
- geo_bot.py +178 -71
- main.py +120 -201
- mapcrunch_controller.py +110 -141
config.py
CHANGED
@@ -8,54 +8,13 @@ MAPCRUNCH_URL = "https://www.mapcrunch.com"
|
|
8 |
# UI element selectors
|
9 |
SELECTORS = {
|
10 |
"go_button": "#go-button",
|
11 |
-
"options_button": "#options-button",
|
12 |
-
"stealth_checkbox": "#stealth",
|
13 |
-
"urban_checkbox": "#cities",
|
14 |
-
"indoor_checkbox": "#inside",
|
15 |
-
"tour_checkbox": "#tour",
|
16 |
-
"auto_checkbox": "#auto",
|
17 |
"pano_container": "#pano",
|
18 |
-
"map_container": "#map",
|
19 |
"address_element": "#address",
|
20 |
-
"confirm_button": "#confirm-button", # Will be determined dynamically
|
21 |
-
"country_list": "#countrylist",
|
22 |
-
"continent_links": "#continents a",
|
23 |
-
}
|
24 |
-
|
25 |
-
# MapCrunch collection options
|
26 |
-
MAPCRUNCH_OPTIONS = {
|
27 |
-
"urban_only": True, # Show urban areas only
|
28 |
-
"exclude_indoor": True, # Exclude indoor views
|
29 |
-
"stealth_mode": False, # Hide location info during gameplay
|
30 |
-
"tour_mode": False, # 360 degree tour
|
31 |
-
"auto_mode": False, # Automatic slideshow
|
32 |
-
"selected_countries": None, # None means all, or list like ['us', 'gb', 'jp']
|
33 |
-
"selected_continents": None, # None means all, or list like [1, 2] # 1=N.America, 2=Europe, etc
|
34 |
}
|
35 |
|
36 |
# Data collection settings
|
37 |
DATA_COLLECTION_CONFIG = {
|
38 |
-
"
|
39 |
-
"thumbnail_size": (320, 240), # Thumbnail dimensions
|
40 |
-
"save_full_screenshots": False, # Save full resolution screenshots (storage intensive)
|
41 |
-
"extract_address": True, # Extract address/location name
|
42 |
-
"wait_after_go": 3, # Seconds to wait after clicking Go
|
43 |
-
"retry_on_failure": True, # Retry if location fails
|
44 |
-
"max_retries": 3, # Max retries per location
|
45 |
-
}
|
46 |
-
|
47 |
-
# Reference points for coordinate calibration (used in pyautogui coordinate system)
|
48 |
-
REFERENCE_POINTS = {
|
49 |
-
"kodiak": {"lat": 57.7916, "lon": -152.4083},
|
50 |
-
"hobart": {"lat": -42.8833, "lon": 147.3355},
|
51 |
-
}
|
52 |
-
|
53 |
-
# Selenium settings
|
54 |
-
SELENIUM_CONFIG = {
|
55 |
-
"headless": False,
|
56 |
-
"window_size": (1920, 1080),
|
57 |
-
"implicit_wait": 10,
|
58 |
-
"page_load_timeout": 30,
|
59 |
}
|
60 |
|
61 |
# Model configurations
|
@@ -66,27 +25,20 @@ MODELS_CONFIG = {
|
|
66 |
},
|
67 |
"claude-3.5-sonnet": {
|
68 |
"class": "ChatAnthropic",
|
69 |
-
"model_name": "claude-3-5-sonnet-
|
70 |
},
|
71 |
"gemini-1.5-pro": {
|
72 |
"class": "ChatGoogleGenerativeAI",
|
73 |
-
"model_name": "gemini-1.5-pro",
|
|
|
|
|
|
|
|
|
74 |
},
|
75 |
-
}
|
76 |
-
|
77 |
-
# Benchmark settings
|
78 |
-
BENCHMARK_CONFIG = {
|
79 |
-
"rounds_per_model": 50,
|
80 |
-
"data_collection_samples": 200,
|
81 |
-
"screenshot_delay": 2,
|
82 |
-
"click_delay": 1,
|
83 |
}
|
84 |
|
85 |
# Data paths
|
86 |
DATA_PATHS = {
|
87 |
"golden_labels": "data/golden_labels.json",
|
88 |
-
"screenshots": "data/screenshots/",
|
89 |
-
"thumbnails": "data/thumbnails/",
|
90 |
"results": "results/",
|
91 |
-
"screen_regions": "screen_regions.yaml", # Keep for backward compatibility
|
92 |
}
|
|
|
8 |
# UI element selectors
|
9 |
SELECTORS = {
|
10 |
"go_button": "#go-button",
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
"pano_container": "#pano",
|
|
|
12 |
"address_element": "#address",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
}
|
14 |
|
15 |
# Data collection settings
|
16 |
DATA_COLLECTION_CONFIG = {
|
17 |
+
"wait_after_go": 3,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
}
|
19 |
|
20 |
# Model configurations
|
|
|
25 |
},
|
26 |
"claude-3.5-sonnet": {
|
27 |
"class": "ChatAnthropic",
|
28 |
+
"model_name": "claude-3-5-sonnet-20240620",
|
29 |
},
|
30 |
"gemini-1.5-pro": {
|
31 |
"class": "ChatGoogleGenerativeAI",
|
32 |
+
"model_name": "gemini-1.5-pro-latest",
|
33 |
+
},
|
34 |
+
"gemini-2.5-pro": {
|
35 |
+
"class": "ChatGoogleGenerativeAI",
|
36 |
+
"model_name": "gemini-2.5-pro-preview-06-05",
|
37 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
}
|
39 |
|
40 |
# Data paths
|
41 |
DATA_PATHS = {
|
42 |
"golden_labels": "data/golden_labels.json",
|
|
|
|
|
43 |
"results": "results/",
|
|
|
44 |
}
|
geo_bot.py
CHANGED
@@ -1,11 +1,10 @@
|
|
1 |
-
# geo_bot.py (Final Streamlined Version)
|
2 |
-
|
3 |
-
from io import BytesIO
|
4 |
import base64
|
|
|
5 |
import re
|
6 |
-
from
|
7 |
-
from
|
8 |
|
|
|
9 |
from langchain_core.messages import HumanMessage, BaseMessage
|
10 |
from langchain_openai import ChatOpenAI
|
11 |
from langchain_anthropic import ChatAnthropic
|
@@ -13,24 +12,52 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
13 |
|
14 |
from mapcrunch_controller import MapCrunchController
|
15 |
|
16 |
-
|
17 |
-
|
18 |
-
First describe the relevant details in the image to do it.
|
19 |
-
List some regions and places where it could be.
|
20 |
-
Choose the most likely Country and City or Specific Location.
|
21 |
-
At the end, in the last line apart from the previous reasoning, write the Latitude and Longitude from that guessed location
|
22 |
-
using the following format, making sure that the coords are valid floats, without anything else and making sure to be consistent with the format:
|
23 |
-
Lat: XX.XXXX, Lon: XX.XXXX
|
24 |
-
"""
|
25 |
|
|
|
|
|
|
|
26 |
|
27 |
-
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
|
|
|
|
|
32 |
def __init__(
|
33 |
-
self,
|
|
|
|
|
|
|
|
|
34 |
):
|
35 |
self.model = model(model=model_name)
|
36 |
self.model_name = model_name
|
@@ -42,87 +69,167 @@ class GeoBot:
|
|
42 |
@staticmethod
|
43 |
def pil_to_base64(image: Image) -> str:
|
44 |
buffered = BytesIO()
|
|
|
45 |
image.save(buffered, format="PNG")
|
46 |
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
for
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
content.append(
|
53 |
{
|
54 |
"type": "image_url",
|
55 |
-
"image_url": {"url": f"data:image/png;base64,{
|
56 |
}
|
57 |
)
|
58 |
-
return HumanMessage(content=content)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
|
60 |
-
def
|
61 |
-
|
62 |
-
|
63 |
-
"""
|
64 |
try:
|
65 |
content = response.content.strip()
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
|
76 |
-
|
|
|
77 |
|
78 |
-
|
|
|
79 |
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
84 |
return None
|
85 |
|
86 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
return None
|
91 |
|
92 |
-
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
|
|
|
|
|
|
99 |
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
return None
|
107 |
|
108 |
def analyze_image(self, image: Image) -> Optional[Tuple[float, float]]:
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
|
118 |
-
|
|
|
119 |
|
120 |
-
|
121 |
-
|
122 |
-
|
|
|
|
|
123 |
|
124 |
def close(self):
|
125 |
-
"""Cleans up resources."""
|
126 |
if self.controller:
|
127 |
self.controller.close()
|
128 |
|
|
|
|
|
|
|
|
|
1 |
import base64
|
2 |
+
import json
|
3 |
import re
|
4 |
+
from io import BytesIO
|
5 |
+
from typing import Tuple, List, Optional, Dict, Any, Type
|
6 |
|
7 |
+
from PIL import Image
|
8 |
from langchain_core.messages import HumanMessage, BaseMessage
|
9 |
from langchain_openai import ChatOpenAI
|
10 |
from langchain_anthropic import ChatAnthropic
|
|
|
12 |
|
13 |
from mapcrunch_controller import MapCrunchController
|
14 |
|
15 |
+
AGENT_PROMPT_TEMPLATE = """
|
16 |
+
**Mission:** You are an expert geo-location agent. Your goal is to find clues to determine your location within a limited number of steps.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
|
18 |
+
**Current Status:**
|
19 |
+
- **Remaining Steps: {remaining_steps}**
|
20 |
+
- **Available Actions This Turn: {available_actions}**
|
21 |
|
22 |
+
---
|
23 |
+
**Core Principles of an Expert Player:**
|
24 |
+
|
25 |
+
1. **Final Step Rule:** If `remaining_steps` is **exactly 1**, this is your last action and it **MUST be `GUESS`**. Do not use your final step for exploration.
|
26 |
+
2. **Be Decisive:** If you find a key clue (a specific address, a unique landmark, or text identifying a city/region), make a `GUESS` immediately. Don't waste steps.
|
27 |
+
3. **Efficient Exploration:**
|
28 |
+
- At intersections or when the view is unpromising, **pan first** to see all directions before moving.
|
29 |
+
- If a path looks barren, don't get stuck moving forward. It's often smarter to turn around (using `PAN` or `MOVE_BACKWARD`).
|
30 |
+
4. **Understand Your Path (The Arrow Heuristic):** The navigation arrows on the ground show the two directions of the **road**. `MOVE_FORWARD` follows the arrow that appears **physically higher on your screen**. `MOVE_BACKWARD` follows the lower arrow. Use this to navigate predictably.
|
31 |
+
|
32 |
+
---
|
33 |
+
**Context & Task:**
|
34 |
+
You will receive a sequence of images from your journey. The last image is your **CURRENT** view. Analyze the full history and your current view, apply the Core Principles, and decide your next action.
|
35 |
+
|
36 |
+
**Action History:**
|
37 |
+
{history_text}
|
38 |
|
39 |
+
**JSON Output Format:**
|
40 |
+
Your response MUST be a valid JSON object wrapped in ```json ... ```.
|
41 |
+
- For exploration: `{{"reasoning": "...", "action_details": {{"action": "ACTION_NAME"}} }}`
|
42 |
+
- For the final guess: `{{"reasoning": "...", "action_details": {{"action": "GUESS", "lat": <float>, "lon": <float>}} }}`
|
43 |
+
"""
|
44 |
+
|
45 |
+
BENCHMARK_PROMPT = """
|
46 |
+
Analyze the image and determine its geographic coordinates.
|
47 |
+
1. Describe visual clues.
|
48 |
+
2. Suggest potential regions.
|
49 |
+
3. State your most probable location.
|
50 |
+
4. Provide coordinates in the last line in this exact format: `Lat: XX.XXXX, Lon: XX.XXXX`
|
51 |
+
"""
|
52 |
|
53 |
+
|
54 |
+
class GeoBot:
|
55 |
def __init__(
|
56 |
+
self,
|
57 |
+
model: Type,
|
58 |
+
model_name: str,
|
59 |
+
use_selenium: bool = True,
|
60 |
+
headless: bool = False,
|
61 |
):
|
62 |
self.model = model(model=model_name)
|
63 |
self.model_name = model_name
|
|
|
69 |
@staticmethod
|
70 |
def pil_to_base64(image: Image) -> str:
|
71 |
buffered = BytesIO()
|
72 |
+
image.thumbnail((1024, 1024))
|
73 |
image.save(buffered, format="PNG")
|
74 |
return base64.b64encode(buffered.getvalue()).decode("utf-8")
|
75 |
|
76 |
+
def _create_message_with_history(
|
77 |
+
self, prompt: str, image_b64_list: List[str]
|
78 |
+
) -> List[HumanMessage]:
|
79 |
+
"""Creates a message for the LLM that includes text and a sequence of images."""
|
80 |
+
content = [{"type": "text", "text": prompt}]
|
81 |
+
# Add the JSON format instructions right after the main prompt text
|
82 |
+
content.append(
|
83 |
+
{
|
84 |
+
"type": "text",
|
85 |
+
"text": '\n**JSON Output Format:**\nYour response MUST be a valid JSON object wrapped in ```json ... ```.\n- For exploration: `{{"reasoning": "...", "action_details": {{"action": "ACTION_NAME"}} }}`\n- For the final guess: `{{"reasoning": "...", "action_details": {{"action": "GUESS", "lat": <float>, "lon": <float>}} }}`',
|
86 |
+
}
|
87 |
+
)
|
88 |
+
|
89 |
+
for b64_string in image_b64_list:
|
90 |
content.append(
|
91 |
{
|
92 |
"type": "image_url",
|
93 |
+
"image_url": {"url": f"data:image/png;base64,{b64_string}"},
|
94 |
}
|
95 |
)
|
96 |
+
return [HumanMessage(content=content)]
|
97 |
+
|
98 |
+
def _create_llm_message(self, prompt: str, image_b64: str) -> List[HumanMessage]:
|
99 |
+
"""Original method for single-image analysis (benchmark)."""
|
100 |
+
return [
|
101 |
+
HumanMessage(
|
102 |
+
content=[
|
103 |
+
{"type": "text", "text": prompt},
|
104 |
+
{
|
105 |
+
"type": "image_url",
|
106 |
+
"image_url": {"url": f"data:image/png;base64,{image_b64}"},
|
107 |
+
},
|
108 |
+
]
|
109 |
+
)
|
110 |
+
]
|
111 |
|
112 |
+
def _parse_agent_response(self, response: BaseMessage) -> Optional[Dict[str, Any]]:
|
113 |
+
"""
|
114 |
+
Robustly parses JSON from the LLM response, handling markdown code blocks.
|
115 |
+
"""
|
116 |
try:
|
117 |
content = response.content.strip()
|
118 |
+
match = re.search(r"```json\s*(\{.*?\})\s*```", content, re.DOTALL)
|
119 |
+
if match:
|
120 |
+
json_str = match.group(1)
|
121 |
+
else:
|
122 |
+
json_str = content
|
123 |
+
return json.loads(json_str)
|
124 |
+
except (json.JSONDecodeError, AttributeError) as e:
|
125 |
+
print(f"Invalid JSON from LLM: {e}\nFull response was:\n{response.content}")
|
126 |
+
return None
|
127 |
|
128 |
+
def run_agent_loop(self, max_steps: int = 10) -> Optional[Tuple[float, float]]:
|
129 |
+
history: List[Dict[str, Any]] = []
|
130 |
|
131 |
+
for step in range(max_steps, 0, -1):
|
132 |
+
print(f"\n--- Step {max_steps - step + 1}/{max_steps} ---")
|
133 |
|
134 |
+
self.controller.setup_clean_environment()
|
135 |
+
|
136 |
+
screenshot_bytes = self.controller.take_street_view_screenshot()
|
137 |
+
if not screenshot_bytes:
|
138 |
+
print("Failed to take screenshot. Ending agent loop.")
|
139 |
return None
|
140 |
|
141 |
+
current_screenshot_b64 = self.pil_to_base64(
|
142 |
+
Image.open(BytesIO(screenshot_bytes))
|
143 |
+
)
|
144 |
+
available_actions = self.controller.get_available_actions()
|
145 |
+
print(f"Available actions: {available_actions}")
|
146 |
+
|
147 |
+
history_text = ""
|
148 |
+
image_b64_for_prompt = []
|
149 |
+
if not history:
|
150 |
+
history_text = "No history yet. This is the first step."
|
151 |
+
else:
|
152 |
+
for i, h in enumerate(history):
|
153 |
+
history_text += f"--- History Step {i + 1} ---\n"
|
154 |
+
history_text += f"Reasoning: {h.get('reasoning', 'N/A')}\n"
|
155 |
+
history_text += f"Action: {h.get('action_details', {}).get('action', 'N/A')}\n\n"
|
156 |
+
image_b64_for_prompt.append(h["screenshot_b64"])
|
157 |
+
|
158 |
+
image_b64_for_prompt.append(current_screenshot_b64)
|
159 |
+
|
160 |
+
prompt = AGENT_PROMPT_TEMPLATE.format(
|
161 |
+
remaining_steps=step,
|
162 |
+
history_text=history_text,
|
163 |
+
available_actions=json.dumps(available_actions),
|
164 |
+
)
|
165 |
|
166 |
+
message = self._create_message_with_history(prompt, image_b64_for_prompt)
|
167 |
+
response = self.model.invoke(message)
|
|
|
168 |
|
169 |
+
decision = self._parse_agent_response(response)
|
170 |
|
171 |
+
if not decision:
|
172 |
+
print(
|
173 |
+
"Response parsing failed. Using default recovery action: PAN_RIGHT."
|
174 |
+
)
|
175 |
+
decision = {
|
176 |
+
"reasoning": "Recovery due to parsing failure.",
|
177 |
+
"action_details": {"action": "PAN_RIGHT"},
|
178 |
+
}
|
179 |
|
180 |
+
decision["screenshot_b64"] = current_screenshot_b64
|
181 |
+
history.append(decision)
|
182 |
+
|
183 |
+
action_details = decision.get("action_details", {})
|
184 |
+
action = action_details.get("action")
|
185 |
+
print(f"AI Reasoning: {decision.get('reasoning', 'N/A')}")
|
186 |
+
print(f"AI Action: {action}")
|
187 |
+
|
188 |
+
if action == "GUESS":
|
189 |
+
lat, lon = action_details.get("lat"), action_details.get("lon")
|
190 |
+
if lat is not None and lon is not None:
|
191 |
+
return lat, lon
|
192 |
+
elif action == "MOVE_FORWARD":
|
193 |
+
self.controller.move("forward")
|
194 |
+
elif action == "MOVE_BACKWARD":
|
195 |
+
self.controller.move("backward")
|
196 |
+
elif action == "PAN_LEFT":
|
197 |
+
self.controller.pan_view("left")
|
198 |
+
elif action == "PAN_RIGHT":
|
199 |
+
self.controller.pan_view("right")
|
200 |
+
|
201 |
+
print("Max steps reached. Agent did not make a final guess.")
|
202 |
return None
|
203 |
|
204 |
def analyze_image(self, image: Image) -> Optional[Tuple[float, float]]:
|
205 |
+
image_b64 = self.pil_to_base64(image)
|
206 |
+
message = self._create_llm_message(BENCHMARK_PROMPT, image_b64)
|
207 |
+
response = self.model.invoke(message)
|
208 |
+
print(f"\nLLM Response:\n{response.content}")
|
209 |
+
|
210 |
+
content = response.content.strip()
|
211 |
+
last_line = ""
|
212 |
+
for line in reversed(content.split("\n")):
|
213 |
+
if "lat" in line.lower() and "lon" in line.lower():
|
214 |
+
last_line = line
|
215 |
+
break
|
216 |
+
if not last_line:
|
217 |
+
return None
|
218 |
|
219 |
+
numbers = re.findall(r"[-+]?\d*\.\d+|\d+", last_line)
|
220 |
+
if len(numbers) < 2:
|
221 |
+
return None
|
222 |
|
223 |
+
lat, lon = float(numbers[0]), float(numbers[1])
|
224 |
+
return lat, lon
|
225 |
|
226 |
+
def take_screenshot(self) -> Optional[Image.Image]:
|
227 |
+
screenshot_bytes = self.controller.take_street_view_screenshot()
|
228 |
+
if screenshot_bytes:
|
229 |
+
return Image.open(BytesIO(screenshot_bytes))
|
230 |
+
return None
|
231 |
|
232 |
def close(self):
|
|
|
233 |
if self.controller:
|
234 |
self.controller.close()
|
235 |
|
main.py
CHANGED
@@ -1,254 +1,173 @@
|
|
1 |
-
#!/usr/bin/env python3
|
2 |
-
"""
|
3 |
-
Main entry point for MapCrunch geo-location testing
|
4 |
-
|
5 |
-
Usage:
|
6 |
-
python main.py --mode data --samples 50 --urban --no-indoor # Collect filtered data
|
7 |
-
python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet # Run benchmark
|
8 |
-
python main.py --mode interactive --model gpt-4o # Interactive testing
|
9 |
-
"""
|
10 |
-
|
11 |
import argparse
|
12 |
-
import
|
13 |
-
|
14 |
-
from typing import Dict
|
15 |
|
16 |
from langchain_openai import ChatOpenAI
|
17 |
from langchain_anthropic import ChatAnthropic
|
18 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
19 |
|
20 |
from geo_bot import GeoBot
|
21 |
-
from data_collector import DataCollector
|
22 |
from benchmark import MapGuesserBenchmark
|
23 |
-
from config import MODELS_CONFIG, SUCCESS_THRESHOLD_KM
|
24 |
|
25 |
|
26 |
-
def
|
27 |
-
"""
|
28 |
-
|
|
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
return
|
35 |
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
# Create bot with Selenium integration
|
41 |
-
with GeoBot(model=model_class, model_name=model_instance, use_selenium=True) as bot:
|
42 |
-
# Setup clean environment
|
43 |
-
if bot.controller:
|
44 |
-
bot.controller.setup_clean_environment()
|
45 |
-
|
46 |
-
for turn in range(turns):
|
47 |
-
print(f"\n{'=' * 50}")
|
48 |
-
print(f"🎯 Turn {turn + 1}/{turns}")
|
49 |
-
print(f"{'=' * 50}")
|
50 |
-
|
51 |
-
try:
|
52 |
-
# Get new location (click Go button)
|
53 |
-
if bot.controller:
|
54 |
-
if not bot.controller.click_go_button():
|
55 |
-
print("❌ Failed to get new location")
|
56 |
-
continue
|
57 |
-
else:
|
58 |
-
print("⚠️ Manual mode: Please click Go button and press Enter")
|
59 |
-
input()
|
60 |
-
|
61 |
-
# Take screenshot and analyze
|
62 |
-
screenshot = bot.take_screenshot()
|
63 |
-
location = bot.analyze_image(screenshot)
|
64 |
-
|
65 |
-
if location is not None:
|
66 |
-
bot.select_map_location(*location, plot=plot)
|
67 |
-
print("✅ Location selected successfully")
|
68 |
-
else:
|
69 |
-
print("❌ Could not determine location")
|
70 |
-
# Select a default location
|
71 |
-
bot.select_map_location(
|
72 |
-
x=bot.map_x + bot.map_w // 2,
|
73 |
-
y=bot.map_y + bot.map_h // 2,
|
74 |
-
plot=plot,
|
75 |
-
)
|
76 |
-
|
77 |
-
# Brief pause between turns
|
78 |
-
sleep(2)
|
79 |
-
|
80 |
-
except KeyboardInterrupt:
|
81 |
-
print(f"\n⏹️ Game stopped by user after {turn + 1} turns")
|
82 |
-
break
|
83 |
-
except Exception as e:
|
84 |
-
print(f"❌ Error in turn {turn + 1}: {e}")
|
85 |
-
continue
|
86 |
|
|
|
|
|
|
|
87 |
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
|
94 |
-
|
95 |
-
|
|
|
|
|
|
|
|
|
96 |
|
97 |
-
|
98 |
-
data = collector.collect_samples(samples)
|
99 |
-
print(f"✅ Collected {len(data)} samples successfully")
|
100 |
|
|
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
"""Benchmark mode"""
|
106 |
-
if models is None:
|
107 |
-
models = ["gpt-4o"] # Default model
|
108 |
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
|
|
|
|
113 |
|
114 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
115 |
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
)
|
|
|
|
|
|
|
120 |
|
121 |
-
|
122 |
|
123 |
-
if summary:
|
124 |
-
print(f"\n📊 Results Summary:")
|
125 |
-
for model, stats in summary.items():
|
126 |
-
print(f"\n🤖 {model}:")
|
127 |
-
print(
|
128 |
-
f" Success Rate (under {SUCCESS_THRESHOLD_KM}km): {stats.get('success_rate', 0) * 100:.1f}%"
|
129 |
-
)
|
130 |
-
print(f" 📏 Average Distance: {stats['average_distance_km']:.1f} km")
|
131 |
-
print(f" 📊 Median Distance: {stats['median_distance_km']:.1f} km")
|
132 |
-
print(f" 🎯 Best: {stats['min_distance_km']:.1f} km")
|
133 |
-
print(f" 📈 Worst: {stats['max_distance_km']:.1f} km")
|
134 |
|
135 |
-
|
136 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
|
138 |
|
139 |
def main():
|
140 |
-
parser = argparse.ArgumentParser(
|
141 |
-
description="MapCrunch Geo-Location AI Benchmark",
|
142 |
-
formatter_class=argparse.RawDescriptionHelpFormatter,
|
143 |
-
epilog="""
|
144 |
-
Examples:
|
145 |
-
# Collect training data with filters
|
146 |
-
python main.py --mode data --samples 100 --urban --no-indoor
|
147 |
-
|
148 |
-
# Collect from specific countries
|
149 |
-
python main.py --mode data --samples 50 --countries us gb jp --urban
|
150 |
-
|
151 |
-
# Run benchmark on saved data
|
152 |
-
python main.py --mode benchmark --models gpt-4o claude-3.5-sonnet --samples 20
|
153 |
-
|
154 |
-
# Interactive testing
|
155 |
-
python main.py --mode interactive --model gpt-4o --turns 5 --plot
|
156 |
-
|
157 |
-
# Live benchmark (uses MapCrunch website directly)
|
158 |
-
python main.py --mode benchmark --live --models gpt-4o
|
159 |
-
""",
|
160 |
-
)
|
161 |
-
|
162 |
parser.add_argument(
|
163 |
"--mode",
|
164 |
-
choices=["
|
165 |
-
default="
|
166 |
-
help="Operation mode",
|
167 |
)
|
168 |
-
|
169 |
-
# Interactive mode options
|
170 |
parser.add_argument(
|
171 |
"--model",
|
172 |
choices=list(MODELS_CONFIG.keys()),
|
173 |
default="gpt-4o",
|
174 |
-
help="Model
|
175 |
-
)
|
176 |
-
parser.add_argument(
|
177 |
-
"--turns", type=int, default=5, help="Number of turns in interactive mode"
|
178 |
-
)
|
179 |
-
parser.add_argument(
|
180 |
-
"--plot", action="store_true", help="Generate plots of predictions"
|
181 |
)
|
182 |
-
|
183 |
-
# Data collection options
|
184 |
parser.add_argument(
|
185 |
-
"--
|
186 |
)
|
187 |
parser.add_argument(
|
188 |
-
"--
|
|
|
|
|
|
|
189 |
)
|
190 |
-
parser.add_argument("--no-indoor", action="store_true", help="Exclude indoor views")
|
191 |
parser.add_argument(
|
192 |
-
"--
|
193 |
-
nargs="+",
|
194 |
-
help="Specific countries to collect from (e.g., us gb jp)",
|
195 |
)
|
196 |
-
|
197 |
-
# Benchmark options
|
198 |
parser.add_argument(
|
199 |
"--models",
|
200 |
nargs="+",
|
201 |
choices=list(MODELS_CONFIG.keys()),
|
202 |
-
help="Models to benchmark",
|
203 |
-
)
|
204 |
-
parser.add_argument(
|
205 |
-
"--live", action="store_true", help="Use live MapCrunch website for benchmark"
|
206 |
-
)
|
207 |
-
|
208 |
-
# General options
|
209 |
-
parser.add_argument(
|
210 |
-
"--headless", action="store_true", help="Run browser in headless mode"
|
211 |
)
|
212 |
|
213 |
args = parser.parse_args()
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
if args.urban:
|
229 |
-
options["urban_only"] = True
|
230 |
-
if args.no_indoor:
|
231 |
-
options["exclude_indoor"] = True
|
232 |
-
if args.countries:
|
233 |
-
options["selected_countries"] = args.countries
|
234 |
-
|
235 |
-
data_collection_mode(
|
236 |
-
samples=args.samples, headless=args.headless, options=options
|
237 |
-
)
|
238 |
-
|
239 |
-
elif args.mode == "benchmark":
|
240 |
-
benchmark_mode(
|
241 |
-
models=args.models,
|
242 |
-
samples=args.samples,
|
243 |
-
live=args.live,
|
244 |
-
headless=args.headless,
|
245 |
-
)
|
246 |
-
|
247 |
-
except KeyboardInterrupt:
|
248 |
-
print(f"\n⏹️ Operation interrupted by user")
|
249 |
-
except Exception as e:
|
250 |
-
print(f"❌ Error: {e}")
|
251 |
-
raise
|
252 |
|
253 |
|
254 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import argparse
|
2 |
+
import json
|
3 |
+
import random
|
4 |
+
from typing import Dict, Optional, List
|
5 |
|
6 |
from langchain_openai import ChatOpenAI
|
7 |
from langchain_anthropic import ChatAnthropic
|
8 |
from langchain_google_genai import ChatGoogleGenerativeAI
|
9 |
|
10 |
from geo_bot import GeoBot
|
|
|
11 |
from benchmark import MapGuesserBenchmark
|
12 |
+
from config import MODELS_CONFIG, DATA_PATHS, SUCCESS_THRESHOLD_KM
|
13 |
|
14 |
|
15 |
+
def agent_mode(model_name: str, steps: int, headless: bool, samples: int):
|
16 |
+
"""
|
17 |
+
Runs the AI Agent in a benchmark loop over multiple samples,
|
18 |
+
using multi-step exploration for each.
|
19 |
+
"""
|
20 |
+
print(
|
21 |
+
f"Starting Agent Mode (as a benchmark): model={model_name}, steps={steps}, samples={samples}"
|
22 |
+
)
|
23 |
|
24 |
+
try:
|
25 |
+
with open(DATA_PATHS["golden_labels"], "r", encoding="utf-8") as f:
|
26 |
+
golden_labels = json.load(f).get("samples", [])
|
27 |
+
except FileNotFoundError:
|
28 |
+
print(f"Error: Golden labels file not found at {DATA_PATHS['golden_labels']}.")
|
29 |
return
|
30 |
|
31 |
+
if not golden_labels:
|
32 |
+
print("Error: No samples found in golden_labels.json.")
|
33 |
+
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
+
num_to_test = min(samples, len(golden_labels))
|
36 |
+
test_samples = golden_labels[:num_to_test]
|
37 |
+
print(f"Will run on {len(test_samples)} samples.")
|
38 |
|
39 |
+
config = MODELS_CONFIG.get(model_name)
|
40 |
+
model_class = globals()[config["class"]]
|
41 |
+
model_instance_name = config["model_name"]
|
42 |
+
|
43 |
+
benchmark_helper = MapGuesserBenchmark(headless=True)
|
44 |
+
all_results = []
|
45 |
+
|
46 |
+
with GeoBot(
|
47 |
+
model=model_class, model_name=model_instance_name, headless=headless
|
48 |
+
) as bot:
|
49 |
+
for i, sample in enumerate(test_samples):
|
50 |
+
print(
|
51 |
+
f"\n--- Running Sample {i + 1}/{len(test_samples)} (ID: {sample.get('id')}) ---"
|
52 |
+
)
|
53 |
|
54 |
+
# **FIXED**: Correct sequence: Load Data -> Clean Environment -> Run Loop
|
55 |
+
if not bot.controller.load_location_from_data(sample):
|
56 |
+
print(
|
57 |
+
f" ❌ Failed to load location for sample {sample.get('id')}. Skipping."
|
58 |
+
)
|
59 |
+
continue
|
60 |
|
61 |
+
bot.controller.setup_clean_environment()
|
|
|
|
|
62 |
|
63 |
+
final_guess = bot.run_agent_loop(max_steps=steps)
|
64 |
|
65 |
+
true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
|
66 |
+
distance_km = None
|
67 |
+
is_success = False
|
|
|
|
|
|
|
68 |
|
69 |
+
if final_guess:
|
70 |
+
distance_km = benchmark_helper.calculate_distance(
|
71 |
+
true_coords, final_guess
|
72 |
+
)
|
73 |
+
if distance_km is not None:
|
74 |
+
is_success = distance_km <= SUCCESS_THRESHOLD_KM
|
75 |
|
76 |
+
print(f"\nResult for Sample ID: {sample.get('id')}")
|
77 |
+
print(
|
78 |
+
f" Ground Truth: Lat={true_coords['lat']:.4f}, Lon={true_coords['lng']:.4f}"
|
79 |
+
)
|
80 |
+
print(
|
81 |
+
f" Final Guess: Lat={final_guess[0]:.4f}, Lon={final_guess[1]:.4f}"
|
82 |
+
)
|
83 |
+
dist_str = f"{distance_km:.1f} km" if distance_km is not None else "N/A"
|
84 |
+
print(f" Distance: {dist_str}, Success: {is_success}")
|
85 |
+
else:
|
86 |
+
print("Agent did not make a final guess for this sample.")
|
87 |
+
|
88 |
+
all_results.append(
|
89 |
+
{
|
90 |
+
"sample_id": sample.get("id"),
|
91 |
+
"model": bot.model_name,
|
92 |
+
"true_coordinates": true_coords,
|
93 |
+
"predicted_coordinates": final_guess,
|
94 |
+
"distance_km": distance_km,
|
95 |
+
"success": is_success,
|
96 |
+
}
|
97 |
+
)
|
98 |
|
99 |
+
summary = benchmark_helper.generate_summary(all_results)
|
100 |
+
if summary:
|
101 |
+
print("\n\n--- Agent Benchmark Complete! Summary ---")
|
102 |
+
for model, stats in summary.items():
|
103 |
+
print(f"Model: {model}")
|
104 |
+
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
105 |
+
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
106 |
|
107 |
+
print("\nAgent Mode finished.")
|
108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
109 |
|
110 |
+
def benchmark_mode(models: list, samples: int, headless: bool):
|
111 |
+
"""Runs the benchmark on pre-collected data."""
|
112 |
+
print(f"Starting Benchmark Mode: models={models}, samples={samples}")
|
113 |
+
benchmark = MapGuesserBenchmark(headless=headless)
|
114 |
+
summary = benchmark.run_benchmark(models=models, max_samples=samples)
|
115 |
+
if summary:
|
116 |
+
print("\n--- Benchmark Complete! Summary ---")
|
117 |
+
for model, stats in summary.items():
|
118 |
+
print(f"Model: {model}")
|
119 |
+
print(f" Success Rate: {stats['success_rate'] * 100:.1f}%")
|
120 |
+
print(f" Avg Distance: {stats['average_distance_km']:.1f} km")
|
121 |
|
122 |
|
123 |
def main():
|
124 |
+
parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
parser.add_argument(
|
126 |
"--mode",
|
127 |
+
choices=["agent", "benchmark"],
|
128 |
+
default="agent",
|
129 |
+
help="Operation mode.",
|
130 |
)
|
|
|
|
|
131 |
parser.add_argument(
|
132 |
"--model",
|
133 |
choices=list(MODELS_CONFIG.keys()),
|
134 |
default="gpt-4o",
|
135 |
+
help="Model to use.",
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
)
|
|
|
|
|
137 |
parser.add_argument(
|
138 |
+
"--steps", type=int, default=10, help="[Agent] Number of exploration steps."
|
139 |
)
|
140 |
parser.add_argument(
|
141 |
+
"--samples",
|
142 |
+
type=int,
|
143 |
+
default=50,
|
144 |
+
help="Number of samples to process for the selected mode.",
|
145 |
)
|
|
|
146 |
parser.add_argument(
|
147 |
+
"--headless", action="store_true", help="Run browser in headless mode."
|
|
|
|
|
148 |
)
|
|
|
|
|
149 |
parser.add_argument(
|
150 |
"--models",
|
151 |
nargs="+",
|
152 |
choices=list(MODELS_CONFIG.keys()),
|
153 |
+
help="[Benchmark] Models to benchmark.",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
)
|
155 |
|
156 |
args = parser.parse_args()
|
157 |
|
158 |
+
if args.mode == "agent":
|
159 |
+
agent_mode(
|
160 |
+
model_name=args.model,
|
161 |
+
steps=args.steps,
|
162 |
+
headless=args.headless,
|
163 |
+
samples=args.samples,
|
164 |
+
)
|
165 |
+
elif args.mode == "benchmark":
|
166 |
+
benchmark_mode(
|
167 |
+
models=args.models or [args.model],
|
168 |
+
samples=args.samples,
|
169 |
+
headless=args.headless,
|
170 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
171 |
|
172 |
|
173 |
if __name__ == "__main__":
|
mapcrunch_controller.py
CHANGED
@@ -1,14 +1,12 @@
|
|
1 |
-
|
|
|
2 |
|
3 |
from selenium import webdriver
|
4 |
-
from selenium.webdriver.common.by import By
|
5 |
from selenium.webdriver.support.ui import WebDriverWait
|
6 |
from selenium.webdriver.support import expected_conditions as EC
|
7 |
-
from selenium.common.
|
8 |
-
from typing import Dict, Optional
|
9 |
-
import time
|
10 |
|
11 |
-
from config import MAPCRUNCH_URL, SELECTORS, DATA_COLLECTION_CONFIG
|
12 |
|
13 |
|
14 |
class MapCrunchController:
|
@@ -17,155 +15,126 @@ class MapCrunchController:
|
|
17 |
if headless:
|
18 |
options.add_argument("--headless")
|
19 |
options.add_argument("--window-size=1920,1080")
|
20 |
-
options.add_experimental_option("excludeSwitches", ["enable-automation"])
|
21 |
self.driver = webdriver.Chrome(options=options)
|
22 |
self.wait = WebDriverWait(self.driver, 10)
|
23 |
self.driver.get(MAPCRUNCH_URL)
|
24 |
time.sleep(3)
|
25 |
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
)
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
).
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
)
|
47 |
-
|
48 |
-
|
49 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
)
|
55 |
-
if options.get("exclude_indoor", True) == indoor_checkbox.is_selected():
|
56 |
-
indoor_checkbox.click()
|
57 |
-
print(
|
58 |
-
f"✅ Indoor views excluded: {options.get('exclude_indoor', True)}"
|
59 |
-
)
|
60 |
-
|
61 |
-
# 关闭面板
|
62 |
-
options_button.click()
|
63 |
-
time.sleep(0.5)
|
64 |
-
print("✅ Collection options configured.")
|
65 |
-
return True
|
66 |
-
except Exception as e:
|
67 |
-
print(f"❌ Error configuring options: {e}")
|
68 |
-
return False
|
69 |
|
70 |
-
# ... 其他所有函数 (click_go_button, get_live_location_identifiers, 等) 保持我们上一版的最终形态,无需改动 ...
|
71 |
def click_go_button(self) -> bool:
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 5))
|
78 |
-
return True
|
79 |
-
except Exception as e:
|
80 |
-
print(f"❌ Error clicking Go button: {e}")
|
81 |
-
return False
|
82 |
-
|
83 |
-
def get_live_location_identifiers(self) -> Dict:
|
84 |
-
try:
|
85 |
-
return self.driver.execute_script("""
|
86 |
-
try {
|
87 |
-
const pov = window.panorama.getPov();
|
88 |
-
return {
|
89 |
-
panoId: window.panorama ? window.panorama.getPano() : null,
|
90 |
-
pov: { heading: pov.heading, pitch: pov.pitch, zoom: pov.zoom }
|
91 |
-
};
|
92 |
-
} catch (e) { return { error: e.toString() }; }
|
93 |
-
""")
|
94 |
-
except Exception as e:
|
95 |
-
print(f"❌ Error getting live identifiers via JS: {e}")
|
96 |
-
return {}
|
97 |
-
|
98 |
-
def get_current_address(self) -> Optional[str]:
|
99 |
-
try:
|
100 |
-
address_element = self.wait.until(
|
101 |
-
EC.visibility_of_element_located(
|
102 |
-
(By.CSS_SELECTOR, SELECTORS["address_element"])
|
103 |
-
)
|
104 |
-
)
|
105 |
-
return address_element.get_attribute("title") or address_element.text
|
106 |
-
except TimeoutException:
|
107 |
-
return "Address not found"
|
108 |
|
109 |
-
def
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
)
|
114 |
-
|
115 |
-
|
116 |
-
elementsToHide.forEach(sel => { const el = document.querySelector(sel); if (el) el.style.display = 'none'; });
|
117 |
-
const panoBox = document.querySelector('#pano-box'); if (panoBox) panoBox.style.height = '100vh';
|
118 |
-
""")
|
119 |
-
except Exception as e:
|
120 |
-
print(f"⚠️ Warning: Could not fully configure clean environment: {e}")
|
121 |
|
122 |
def load_location_from_data(self, location_data: Dict) -> bool:
|
123 |
-
"""
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
pov = location_data.get("pov")
|
130 |
-
|
131 |
-
# 策略B:优先尝试通过JS直接设置场景,速度最快
|
132 |
-
if pano_id and pov:
|
133 |
-
# print(f"✅ Loading location via JS Call: PanoID {pano_id[:10]}...")
|
134 |
-
self.driver.execute_script(
|
135 |
-
"window.panorama.setPano(arguments[0]);"
|
136 |
-
"window.panorama.setPov(arguments[1]);",
|
137 |
-
pano_id,
|
138 |
-
pov,
|
139 |
-
)
|
140 |
-
time.sleep(2) # 等待新瓦片图加载
|
141 |
-
return True
|
142 |
-
|
143 |
-
# 策略A:如果数据不完整,回退到URL加载的方式
|
144 |
-
url_slug = location_data.get("url_slug")
|
145 |
-
if url_slug:
|
146 |
-
url_to_load = f"{MAPCRUNCH_URL}/p/{url_slug}"
|
147 |
-
print(f"⚠️ JS load failed, falling back to URL Slug: {url_to_load}")
|
148 |
-
self.driver.get(url_to_load)
|
149 |
-
time.sleep(4)
|
150 |
-
return True
|
151 |
-
|
152 |
-
print("❌ Cannot load location: No valid pano_id/pov or url_slug in data.")
|
153 |
-
return False
|
154 |
-
|
155 |
-
except Exception as e:
|
156 |
-
print(f"❌ Error loading location: {e}")
|
157 |
-
return False
|
158 |
-
|
159 |
-
def take_street_view_screenshot(self) -> Optional[bytes]:
|
160 |
-
try:
|
161 |
-
pano_element = self.wait.until(
|
162 |
-
EC.presence_of_element_located(
|
163 |
-
(By.CSS_SELECTOR, SELECTORS["pano_container"])
|
164 |
-
)
|
165 |
)
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
|
170 |
def close(self):
|
171 |
if self.driver:
|
|
|
1 |
+
import time
|
2 |
+
from typing import Dict, Optional, List
|
3 |
|
4 |
from selenium import webdriver
|
|
|
5 |
from selenium.webdriver.support.ui import WebDriverWait
|
6 |
from selenium.webdriver.support import expected_conditions as EC
|
7 |
+
from selenium.webdriver.common.by import By
|
|
|
|
|
8 |
|
9 |
+
from config import MAPCRUNCH_URL, SELECTORS, DATA_COLLECTION_CONFIG
|
10 |
|
11 |
|
12 |
class MapCrunchController:
|
|
|
15 |
if headless:
|
16 |
options.add_argument("--headless")
|
17 |
options.add_argument("--window-size=1920,1080")
|
|
|
18 |
self.driver = webdriver.Chrome(options=options)
|
19 |
self.wait = WebDriverWait(self.driver, 10)
|
20 |
self.driver.get(MAPCRUNCH_URL)
|
21 |
time.sleep(3)
|
22 |
|
23 |
+
def setup_clean_environment(self):
|
24 |
+
"""
|
25 |
+
Minimal environment setup using hideLoc() and hiding major UI.
|
26 |
+
"""
|
27 |
+
self.driver.execute_script("if(typeof hideLoc === 'function') hideLoc();")
|
28 |
+
self.driver.execute_script("""
|
29 |
+
const topBar = document.querySelector('#topbar');
|
30 |
+
if (topBar) topBar.style.display = 'none';
|
31 |
+
|
32 |
+
const bottomBox = document.querySelector('#bottom-box');
|
33 |
+
if (bottomBox) bottomBox.style.display = 'none';
|
34 |
+
|
35 |
+
const infoFirstView = document.querySelector('#info-firstview');
|
36 |
+
if (infoFirstView) infoFirstView.style.display = 'none';
|
37 |
+
""")
|
38 |
+
|
39 |
+
def get_available_actions(self) -> List[str]:
|
40 |
+
"""
|
41 |
+
Checks for movement links via JavaScript.
|
42 |
+
FIXED: Removed PAN_UP and PAN_DOWN as they are not very useful.
|
43 |
+
"""
|
44 |
+
base_actions = ["PAN_LEFT", "PAN_RIGHT", "GUESS"]
|
45 |
+
links = self.driver.execute_script("return window.panorama.getLinks();")
|
46 |
+
if links and len(links) > 0:
|
47 |
+
base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
|
48 |
+
return base_actions
|
49 |
+
|
50 |
+
def pan_view(self, direction: str, degrees: int = 45):
|
51 |
+
"""Pans the view using a direct JS call."""
|
52 |
+
pov = self.driver.execute_script("return window.panorama.getPov();")
|
53 |
+
if direction == "left":
|
54 |
+
pov["heading"] -= degrees
|
55 |
+
elif direction == "right":
|
56 |
+
pov["heading"] += degrees
|
57 |
+
# UP/DOWN panning logic removed as actions are no longer available.
|
58 |
+
self.driver.execute_script("window.panorama.setPov(arguments[0]);", pov)
|
59 |
+
time.sleep(0.5)
|
60 |
+
|
61 |
+
def move(self, direction: str):
|
62 |
+
"""Moves by finding the best panorama link and setting it via JS."""
|
63 |
+
pov = self.driver.execute_script("return window.panorama.getPov();")
|
64 |
+
links = self.driver.execute_script("return window.panorama.getLinks();")
|
65 |
+
if not links:
|
66 |
+
return
|
67 |
+
|
68 |
+
current_heading = pov["heading"]
|
69 |
+
best_link = None
|
70 |
+
|
71 |
+
if direction == "forward":
|
72 |
+
min_diff = 360
|
73 |
+
for link in links:
|
74 |
+
diff = 180 - abs(abs(link["heading"] - current_heading) - 180)
|
75 |
+
if diff < min_diff:
|
76 |
+
min_diff = diff
|
77 |
+
best_link = link
|
78 |
+
elif direction == "backward":
|
79 |
+
target_heading = (current_heading + 180) % 360
|
80 |
+
min_diff = 360
|
81 |
+
for link in links:
|
82 |
+
diff = 180 - abs(abs(link["heading"] - target_heading) - 180)
|
83 |
+
if diff < min_diff:
|
84 |
+
min_diff = diff
|
85 |
+
best_link = link
|
86 |
+
|
87 |
+
if best_link:
|
88 |
+
self.driver.execute_script(
|
89 |
+
"window.panorama.setPano(arguments[0]);", best_link["pano"]
|
90 |
)
|
91 |
+
time.sleep(2.5)
|
92 |
+
|
93 |
+
# ... a többi metódus változatlan ...
|
94 |
+
def select_map_location_and_guess(self, lat: float, lon: float):
|
95 |
+
"""Minimalist guess confirmation."""
|
96 |
+
self.driver.execute_script(
|
97 |
+
"document.querySelector('#bottom-box').style.display = 'block';"
|
98 |
+
)
|
99 |
+
self.wait.until(
|
100 |
+
EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["go_button"]))
|
101 |
+
).click()
|
102 |
+
time.sleep(0.5)
|
103 |
+
self.wait.until(
|
104 |
+
EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["confirm_button"]))
|
105 |
+
).click()
|
106 |
+
time.sleep(3)
|
107 |
|
108 |
+
def get_ground_truth_location(self) -> Optional[Dict[str, float]]:
|
109 |
+
"""Directly gets location from JS object."""
|
110 |
+
return self.driver.execute_script("return window.loc;")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
|
|
|
112 |
def click_go_button(self) -> bool:
|
113 |
+
self.wait.until(
|
114 |
+
EC.element_to_be_clickable((By.CSS_SELECTOR, SELECTORS["go_button"]))
|
115 |
+
).click()
|
116 |
+
time.sleep(DATA_COLLECTION_CONFIG.get("wait_after_go", 3))
|
117 |
+
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
|
119 |
+
def take_street_view_screenshot(self) -> Optional[bytes]:
|
120 |
+
pano_element = self.wait.until(
|
121 |
+
EC.presence_of_element_located(
|
122 |
+
(By.CSS_SELECTOR, SELECTORS["pano_container"])
|
123 |
)
|
124 |
+
)
|
125 |
+
return pano_element.screenshot_as_png
|
|
|
|
|
|
|
|
|
|
|
126 |
|
127 |
def load_location_from_data(self, location_data: Dict) -> bool:
|
128 |
+
pano_id, pov = location_data.get("pano_id"), location_data.get("pov")
|
129 |
+
if pano_id and pov:
|
130 |
+
self.driver.execute_script(
|
131 |
+
"window.panorama.setPano(arguments[0]); window.panorama.setPov(arguments[1]);",
|
132 |
+
pano_id,
|
133 |
+
pov,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
134 |
)
|
135 |
+
time.sleep(2)
|
136 |
+
return True
|
137 |
+
return False
|
138 |
|
139 |
def close(self):
|
140 |
if self.driver:
|