feat: add test mode and debug message in hf UI

#4
by LRU1 - opened
Files changed (9) hide show
  1. .gitignore +1 -1
  2. analyzer.ipynb +0 -0
  3. app.py +344 -205
  4. benchmark.py +6 -1
  5. config.py +2 -2
  6. datasets/test/golden_labels.json +759 -0
  7. geo_bot.py +182 -0
  8. main.py +171 -2
  9. mapcrunch_controller.py +73 -36
.gitignore CHANGED
@@ -14,4 +14,4 @@ datasets/*/thumbnails/
14
 
15
  # Legacy data directory (can be removed if no longer used)
16
  data/
17
- !data/golden_labels.json
 
14
 
15
  # Legacy data directory (can be removed if no longer used)
16
  data/
17
+ !data/golden_labels.json
analyzer.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -2,6 +2,8 @@ import streamlit as st
2
  import json
3
  import os
4
  import time
 
 
5
  import re
6
  from pathlib import Path
7
 
@@ -67,7 +69,7 @@ with st.sidebar:
67
  st.header("Configuration")
68
 
69
  # Mode selection
70
- mode = st.radio("Mode", ["Dataset Mode", "Online Mode"], index=0)
71
 
72
  if mode == "Dataset Mode":
73
  # Get available datasets and ensure we have a valid default
@@ -114,6 +116,43 @@ with st.sidebar:
114
  num_samples = st.slider(
115
  "Samples to Test", 1, len(golden_labels), min(3, len(golden_labels))
116
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  else: # Online Mode
118
  st.info("Enter a URL to analyze a specific location")
119
 
@@ -211,219 +250,319 @@ with st.sidebar:
211
  help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
212
  )
213
 
 
214
  start_button = st.button("🚀 Start", type="primary")
215
 
216
  # Main Logic
217
  if start_button:
218
- test_samples = golden_labels[:num_samples]
219
- config = MODELS_CONFIG[model_choice]
220
- model_class = get_model_class(config["class"])
221
-
222
- benchmark_helper = MapGuesserBenchmark(
223
- dataset_name=dataset_choice if mode == "Dataset Mode" else "online"
224
- )
225
- all_results = []
226
-
227
- progress_bar = st.progress(0)
228
-
229
- with GeoBot(
230
- model=model_class,
231
- model_name=config["model_name"],
232
- headless=True,
233
- temperature=temperature,
234
- ) as bot:
235
- for i, sample in enumerate(test_samples):
236
- st.divider()
237
- st.header(f"Sample {i + 1}/{num_samples}")
238
-
239
- if mode == "Online Mode":
240
- # Load the MapCrunch URL directly
241
- bot.controller.load_url(sample["url"])
242
- else:
243
- # Load from dataset as before
244
- bot.controller.load_location_from_data(sample)
245
-
246
- bot.controller.setup_clean_environment()
247
-
248
- # Create containers for UI updates
249
- sample_container = st.container()
250
-
251
- # Initialize UI state for this sample
252
- step_containers = {}
253
- sample_steps_data = []
254
-
255
- def ui_step_callback(step_info):
256
- """Callback function to update UI after each step"""
257
- step_num = step_info["step_num"]
258
-
259
- # Store step data
260
- sample_steps_data.append(step_info)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
261
 
262
- with sample_container:
263
- # Create step container if it doesn't exist
264
- if step_num not in step_containers:
265
- step_containers[step_num] = st.container()
266
-
267
- with step_containers[step_num]:
268
- st.subheader(f"Step {step_num}/{step_info['max_steps']}")
269
-
270
- col1, col2 = st.columns([1, 2])
271
-
272
- with col1:
273
- # Display screenshot
274
- st.image(
275
- step_info["screenshot_bytes"],
276
- caption=f"What AI sees - Step {step_num}",
277
- use_column_width=True,
278
- )
279
-
280
- with col2:
281
- # Show available actions
282
- st.write("**Available Actions:**")
283
- st.code(
284
- json.dumps(step_info["available_actions"], indent=2)
285
- )
286
-
287
- # Show history context - use the history from step_info
288
- current_history = step_info.get("history", [])
289
- history_text = bot.generate_history_text(current_history)
290
- st.write("**AI Context:**")
291
- st.text_area(
292
- "History",
293
- history_text,
294
- height=100,
295
- disabled=True,
296
- key=f"history_{i}_{step_num}",
297
- )
298
-
299
- # Show AI reasoning and action
300
- action = step_info.get("action_details", {}).get(
301
- "action", "N/A"
302
- )
303
-
304
- if step_info.get("is_final_step") and action != "GUESS":
305
- st.warning("Max steps reached. Forcing GUESS.")
306
-
307
- st.write("**AI Reasoning:**")
308
- st.info(step_info.get("reasoning", "N/A"))
309
-
310
- st.write("**AI Action:**")
311
- if action == "GUESS":
312
- lat = step_info.get("action_details", {}).get("lat")
313
- lon = step_info.get("action_details", {}).get("lon")
314
- st.success(f"`{action}` - {lat:.4f}, {lon:.4f}")
315
- else:
316
- st.success(f"`{action}`")
317
-
318
- # Show decision details for debugging
319
- with st.expander("Decision Details"):
320
- decision_data = {
321
- "reasoning": step_info.get("reasoning"),
322
- "action_details": step_info.get("action_details"),
323
- "remaining_steps": step_info.get("remaining_steps"),
324
- }
325
- st.json(decision_data)
326
-
327
- # Force UI refresh
328
- time.sleep(0.5) # Small delay to ensure UI updates are visible
329
-
330
- # Run the agent loop with UI callback
331
- try:
332
- final_guess = bot.run_agent_loop(
333
- max_steps=steps_per_sample, step_callback=ui_step_callback
334
- )
335
- except Exception as e:
336
- st.error(f"Error during agent execution: {e}")
337
- final_guess = None
338
-
339
- # Sample Results
340
- with sample_container:
341
- st.subheader("Sample Result")
342
- true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
343
- distance_km = None
344
- is_success = False
345
-
346
- if final_guess:
347
- distance_km = benchmark_helper.calculate_distance(
348
- true_coords, final_guess
349
- )
350
- if distance_km is not None:
351
- is_success = distance_km <= SUCCESS_THRESHOLD_KM
352
 
353
- col1, col2, col3 = st.columns(3)
354
- col1.metric(
355
- "Final Guess", f"{final_guess[0]:.3f}, {final_guess[1]:.3f}"
356
- )
357
- col2.metric(
358
- "Ground Truth",
359
- f"{true_coords['lat']:.3f}, {true_coords['lng']:.3f}",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
360
  )
361
- col3.metric(
362
- "Distance",
363
- f"{distance_km:.1f} km",
364
- delta="Success" if is_success else "Failed",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
365
  )
366
- else:
367
- st.error("No final guess made")
368
-
369
- all_results.append(
370
- {
371
- "sample_id": sample.get("id"),
372
- "model": model_choice,
373
- "steps_taken": len(sample_steps_data),
374
- "max_steps": steps_per_sample,
375
- "temperature": temperature,
376
- "true_coordinates": true_coords,
377
- "predicted_coordinates": final_guess,
378
- "distance_km": distance_km,
379
- "success": is_success,
380
- }
381
- )
382
 
383
- progress_bar.progress((i + 1) / num_samples)
384
-
385
- # Final Summary
386
- st.divider()
387
- st.header("🏁 Final Results")
388
-
389
- # Calculate summary stats
390
- successes = [r for r in all_results if r["success"]]
391
- success_rate = len(successes) / len(all_results) if all_results else 0
392
-
393
- valid_distances = [
394
- r["distance_km"] for r in all_results if r["distance_km"] is not None
395
- ]
396
- avg_distance = sum(valid_distances) / len(valid_distances) if valid_distances else 0
397
-
398
- # Overall metrics
399
- col1, col2, col3 = st.columns(3)
400
- col1.metric("Success Rate", f"{success_rate * 100:.1f}%")
401
- col2.metric("Average Distance", f"{avg_distance:.1f} km")
402
- col3.metric("Total Samples", len(all_results))
403
-
404
- # Detailed results table
405
- st.subheader("Detailed Results")
406
- st.dataframe(all_results, use_container_width=True)
407
-
408
- # Success/failure breakdown
409
- if successes:
410
- st.subheader("✅ Successful Samples")
411
- st.dataframe(successes, use_container_width=True)
412
-
413
- failures = [r for r in all_results if not r["success"]]
414
- if failures:
415
- st.subheader("❌ Failed Samples")
416
- st.dataframe(failures, use_container_width=True)
417
-
418
- # Export functionality
419
- if st.button("💾 Export Results"):
420
- results_json = json.dumps(all_results, indent=2)
421
- st.download_button(
422
- label="Download results.json",
423
- data=results_json,
424
- file_name=f"geo_results_{dataset_choice}_{model_choice}_{num_samples}samples.json",
425
- mime="application/json",
426
- )
427
 
428
 
429
  def handle_tab_completion():
 
2
  import json
3
  import os
4
  import time
5
+ import pandas as pd
6
+ import altair as alt
7
  import re
8
  from pathlib import Path
9
 
 
69
  st.header("Configuration")
70
 
71
  # Mode selection
72
+ mode = st.radio("Mode", ["Dataset Mode", "Online Mode", "Test Mode"], index=0)
73
 
74
  if mode == "Dataset Mode":
75
  # Get available datasets and ensure we have a valid default
 
116
  num_samples = st.slider(
117
  "Samples to Test", 1, len(golden_labels), min(3, len(golden_labels))
118
  )
119
+
120
+ elif mode == "Test Mode":
121
+ st.info("🔬 Multi-Model Benchmark Testing")
122
+ available_datasets = get_available_datasets()
123
+ dataset_choice = st.selectbox("Dataset", available_datasets, index=0)
124
+
125
+ selected_models = st.multiselect(
126
+ "Select Models to Compare",
127
+ list(MODELS_CONFIG.keys()),
128
+ default=[DEFAULT_MODEL],
129
+ )
130
+ if not selected_models:
131
+ st.warning("Please select at least one model to run the test.")
132
+ st.stop()
133
+
134
+ steps_per_sample = st.slider("Max Steps", 1, 50, 10)
135
+ temperature = st.slider(
136
+ "Temperature",
137
+ 0.0,
138
+ 2.0,
139
+ DEFAULT_TEMPERATURE,
140
+ 0.1,
141
+ help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
142
+ )
143
+
144
+ # load dataset
145
+ data_paths = get_data_paths(dataset_choice)
146
+ try:
147
+ with open(data_paths["golden_labels"], "r") as f:
148
+ golden_labels = json.load(f).get("samples", [])
149
+ st.success(f"Dataset '{dataset_choice}' loaded with {len(golden_labels)} samples")
150
+ except Exception as e:
151
+ st.error(f"Error loading dataset '{dataset_choice}': {str(e)}")
152
+ st.stop()
153
+ num_samples = st.slider("Samples per Run", 1, len(golden_labels), min(10, len(golden_labels)))
154
+ runs_per_model = st.slider("Runs per Model", 1, 10, 5)
155
+
156
  else: # Online Mode
157
  st.info("Enter a URL to analyze a specific location")
158
 
 
250
  help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
251
  )
252
 
253
+ # common start button
254
  start_button = st.button("🚀 Start", type="primary")
255
 
256
  # Main Logic
257
  if start_button:
258
+ if mode == "Test Mode":
259
+ benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_choice)
260
+ summary_by_step = {}
261
+ avg_distance_by_step = {}
262
+ progress_bar = st.progress(0)
263
+ for mi, model_name in enumerate(selected_models):
264
+ st.header(f"Model: {model_name}")
265
+ config = MODELS_CONFIG[model_name]
266
+ model_class = get_model_class(config["class"])
267
+
268
+ successes_per_step = [0]*steps_per_sample
269
+
270
+ dist_sum_per_step = [0.0]*steps_per_sample
271
+ dist_cnt_per_step = [0]*steps_per_sample
272
+
273
+ total_iterations = runs_per_model * num_samples
274
+ model_bar = st.progress(0, text=f"Running {model_name}")
275
+ iteration_counter = 0
276
+ for run_idx in range(runs_per_model):
277
+ with GeoBot(model=model_class, model_name=config["model_name"], headless=True, temperature=temperature) as bot:
278
+ for si, sample in enumerate(golden_labels[:num_samples]):
279
+ if not bot.controller.load_location_from_data(sample):
280
+ iteration_counter += 1
281
+ model_bar.progress(iteration_counter/total_iterations)
282
+ continue
283
+ predictions = bot.test_run_agent_loop(max_steps=steps_per_sample)
284
+ true_coords = {"lat": sample["lat"], "lng": sample["lng"]}
285
+ for step_idx, pred in enumerate(predictions):
286
+ if isinstance(pred, dict) and "lat" in pred:
287
+ dist = benchmark_helper.calculate_distance(true_coords, (pred["lat"], pred["lon"]))
288
+ if dist is not None:
289
+ # 新增:累计距离与计数
290
+ dist_sum_per_step[step_idx] += dist
291
+ dist_cnt_per_step[step_idx] += 1
292
+ # 原有:成功数
293
+ if dist <= SUCCESS_THRESHOLD_KM:
294
+ successes_per_step[step_idx] += 1
295
+ iteration_counter += 1
296
+ model_bar.progress(iteration_counter/total_iterations)
297
+
298
+ acc_per_step = [s/(num_samples*runs_per_model) for s in successes_per_step]
299
+ summary_by_step[model_name] = acc_per_step
300
+
301
+ avg_per_step = [
302
+ (dist_sum_per_step[i]/dist_cnt_per_step[i]) if dist_cnt_per_step[i] else None
303
+ for i in range(steps_per_sample)
304
+ ]
305
+ avg_distance_by_step[model_name] = avg_per_step
306
+
307
+ progress_bar.progress((mi+1)/len(selected_models))
308
+ # plot
309
+ st.subheader("Accuracy vs Steps")
310
+
311
+ # summary_by_step {model: [acc_step1, acc_step2, ...]}
312
+ df_wide = pd.DataFrame(summary_by_step)
313
+ df_long = (
314
+ df_wide
315
+ .reset_index(names="Step")
316
+ .melt(id_vars="Step", var_name="Model", value_name="Accuracy")
317
+ )
318
 
319
+ chart = (
320
+ alt.Chart(df_long)
321
+ .mark_line(point=True)
322
+ .encode(
323
+ x=alt.X("Step:O", title="Step #"),
324
+ y=alt.Y("Accuracy:Q", title="Accuracy", scale=alt.Scale(domain=[0, 1])),
325
+ color=alt.Color("Model:N", title="Model"),
326
+ tooltip=["Model:N", "Step:O", alt.Tooltip("Accuracy:Q", format=".2%")],
327
+ )
328
+ .properties(width=700, height=400)
329
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
 
331
+ st.altair_chart(chart, use_container_width=True)
332
+
333
+ st.subheader("Average Distance vs Steps (km)")
334
+ df_wide_dist = pd.DataFrame(avg_distance_by_step)
335
+ df_long_dist = (
336
+ df_wide_dist
337
+ .reset_index(names="Step")
338
+ .melt(id_vars="Step", var_name="Model", value_name="AvgDistanceKm")
339
+ )
340
+ dist_chart = (
341
+ alt.Chart(df_long_dist)
342
+ .mark_line(point=True)
343
+ .encode(
344
+ x=alt.X("Step:O", title="Step #"),
345
+ y=alt.Y("AvgDistanceKm:Q", title="Avg Distance (km)", scale=alt.Scale(zero=True)),
346
+ color=alt.Color("Model:N", title="Model"),
347
+ tooltip=["Model:N", "Step:O", alt.Tooltip("AvgDistanceKm:Q", format=".1f")],
348
+ )
349
+ .properties(width=700, height=400)
350
+ )
351
+ st.altair_chart(dist_chart, use_container_width=True)
352
+ st.stop()
353
+
354
+ else:
355
+ test_samples = golden_labels[:num_samples]
356
+ config = MODELS_CONFIG[model_choice]
357
+ model_class = get_model_class(config["class"])
358
+
359
+ benchmark_helper = MapGuesserBenchmark(
360
+ dataset_name=dataset_choice if mode == "Dataset Mode" else "online"
361
+ )
362
+ all_results = []
363
+
364
+ progress_bar = st.progress(0)
365
+
366
+ with GeoBot(
367
+ model=model_class,
368
+ model_name=config["model_name"],
369
+ headless=True,
370
+ temperature=temperature,
371
+ ) as bot:
372
+ for i, sample in enumerate(test_samples):
373
+ st.divider()
374
+ st.header(f"Sample {i + 1}/{num_samples}")
375
+
376
+ if mode == "Online Mode":
377
+ # Load the MapCrunch URL directly
378
+ bot.controller.load_url(sample["url"])
379
+ else:
380
+ # Load from dataset as before
381
+ bot.controller.load_location_from_data(sample)
382
+
383
+ bot.controller.setup_clean_environment()
384
+
385
+ # Create containers for UI updates
386
+ sample_container = st.container()
387
+
388
+ # Initialize UI state for this sample
389
+ step_containers = {}
390
+ sample_steps_data = []
391
+
392
+ def ui_step_callback(step_info):
393
+ """Callback function to update UI after each step"""
394
+ step_num = step_info["step_num"]
395
+
396
+ # Store step data
397
+ sample_steps_data.append(step_info)
398
+
399
+ with sample_container:
400
+ # Create step container if it doesn't exist
401
+ if step_num not in step_containers:
402
+ step_containers[step_num] = st.container()
403
+
404
+ with step_containers[step_num]:
405
+ st.subheader(f"Step {step_num}/{step_info['max_steps']}")
406
+
407
+ col1, col2 = st.columns([1, 2])
408
+
409
+ with col1:
410
+ # Display screenshot
411
+ st.image(
412
+ step_info["screenshot_bytes"],
413
+ caption=f"What AI sees - Step {step_num}",
414
+ use_column_width=True,
415
+ )
416
+
417
+ with col2:
418
+ # Show available actions
419
+ st.write("**Available Actions:**")
420
+ st.code(
421
+ json.dumps(step_info["available_actions"], indent=2)
422
+ )
423
+
424
+ # Show history context - use the history from step_info
425
+ current_history = step_info.get("history", [])
426
+ history_text = bot.generate_history_text(current_history)
427
+ st.write("**AI Context:**")
428
+ st.text_area(
429
+ "History",
430
+ history_text,
431
+ height=100,
432
+ disabled=True,
433
+ key=f"history_{i}_{step_num}",
434
+ )
435
+
436
+ # Show AI reasoning and action
437
+ action = step_info.get("action_details", {}).get(
438
+ "action", "N/A"
439
+ )
440
+
441
+ if step_info.get("is_final_step") and action != "GUESS":
442
+ st.warning("Max steps reached. Forcing GUESS.")
443
+
444
+ st.write("**AI Reasoning:**")
445
+ st.info(step_info.get("reasoning", "N/A"))
446
+ if step_info.get("debug_message") != "N/A":
447
+ st.write("**AI Debug Message:**")
448
+ st.code(step_info.get("debug_message"), language="json")
449
+ st.write("**AI Action:**")
450
+ if action == "GUESS":
451
+ lat = step_info.get("action_details", {}).get("lat")
452
+ lon = step_info.get("action_details", {}).get("lon")
453
+ st.success(f"`{action}` - {lat:.4f}, {lon:.4f}")
454
+ else:
455
+ st.success(f"`{action}`")
456
+
457
+ # Show decision details for debugging
458
+ with st.expander("Decision Details"):
459
+ decision_data = {
460
+ "reasoning": step_info.get("reasoning"),
461
+ "action_details": step_info.get("action_details"),
462
+ "remaining_steps": step_info.get("remaining_steps"),
463
+ }
464
+ st.json(decision_data)
465
+
466
+ # Force UI refresh
467
+ time.sleep(0.5) # Small delay to ensure UI updates are visible
468
+
469
+ # Run the agent loop with UI callback
470
+ try:
471
+ final_guess = bot.run_agent_loop(
472
+ max_steps=steps_per_sample, step_callback=ui_step_callback
473
  )
474
+ except Exception as e:
475
+ st.error(f"Error during agent execution: {e}")
476
+ final_guess = None
477
+
478
+ # Sample Results
479
+ with sample_container:
480
+ st.subheader("Sample Result")
481
+ true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
482
+ distance_km = None
483
+ is_success = False
484
+
485
+ if final_guess:
486
+ distance_km = benchmark_helper.calculate_distance(
487
+ true_coords, final_guess
488
+ )
489
+ if distance_km is not None:
490
+ is_success = distance_km <= SUCCESS_THRESHOLD_KM
491
+
492
+ col1, col2, col3 = st.columns(3)
493
+ col1.metric(
494
+ "Final Guess", f"{final_guess[0]:.3f}, {final_guess[1]:.3f}"
495
+ )
496
+ col2.metric(
497
+ "Ground Truth",
498
+ f"{true_coords['lat']:.3f}, {true_coords['lng']:.3f}",
499
+ )
500
+ col3.metric(
501
+ "Distance",
502
+ f"{distance_km:.1f} km",
503
+ delta="Success" if is_success else "Failed",
504
+ )
505
+ else:
506
+ st.error("No final guess made")
507
+
508
+ all_results.append(
509
+ {
510
+ "sample_id": sample.get("id"),
511
+ "model": model_choice,
512
+ "steps_taken": len(sample_steps_data),
513
+ "max_steps": steps_per_sample,
514
+ "temperature": temperature,
515
+ "true_coordinates": true_coords,
516
+ "predicted_coordinates": final_guess,
517
+ "distance_km": distance_km,
518
+ "success": is_success,
519
+ }
520
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
521
 
522
+ progress_bar.progress((i + 1) / num_samples)
523
+
524
+ # Final Summary
525
+ st.divider()
526
+ st.header("🏁 Final Results")
527
+
528
+ # Calculate summary stats
529
+ successes = [r for r in all_results if r["success"]]
530
+ success_rate = len(successes) / len(all_results) if all_results else 0
531
+
532
+ valid_distances = [
533
+ r["distance_km"] for r in all_results if r["distance_km"] is not None
534
+ ]
535
+ avg_distance = sum(valid_distances) / len(valid_distances) if valid_distances else 0
536
+
537
+ # Overall metrics
538
+ col1, col2, col3 = st.columns(3)
539
+ col1.metric("Success Rate", f"{success_rate * 100:.1f}%")
540
+ col2.metric("Average Distance", f"{avg_distance:.1f} km")
541
+ col3.metric("Total Samples", len(all_results))
542
+
543
+ # Detailed results table
544
+ st.subheader("Detailed Results")
545
+ st.dataframe(all_results, use_container_width=True)
546
+
547
+ # Success/failure breakdown
548
+ if successes:
549
+ st.subheader("✅ Successful Samples")
550
+ st.dataframe(successes, use_container_width=True)
551
+
552
+ failures = [r for r in all_results if not r["success"]]
553
+ if failures:
554
+ st.subheader("❌ Failed Samples")
555
+ st.dataframe(failures, use_container_width=True)
556
+
557
+ # Export functionality
558
+ if st.button("💾 Export Results"):
559
+ results_json = json.dumps(all_results, indent=2)
560
+ st.download_button(
561
+ label="Download results.json",
562
+ data=results_json,
563
+ file_name=f"geo_results_{dataset_choice}_{model_choice}_{num_samples}samples.json",
564
+ mime="application/json",
565
+ )
566
 
567
 
568
  def handle_tab_completion():
benchmark.py CHANGED
@@ -99,6 +99,9 @@ class MapGuesserBenchmark:
99
  print(f"📍 Sample {i + 1}/{len(test_samples)}")
100
  try:
101
  result = self.run_single_test_with_bot(bot, sample)
 
 
 
102
  all_results.append(result)
103
 
104
  status = (
@@ -154,6 +157,8 @@ class MapGuesserBenchmark:
154
  }
155
 
156
  predicted_lat_lon = bot.analyze_image(screenshot)
 
 
157
  inference_time = time.time() - start_time
158
 
159
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
@@ -163,7 +168,7 @@ class MapGuesserBenchmark:
163
  print(f"🔍 True coords: {true_coords}")
164
  print(f"🔍 Predicted coords: {predicted_lat_lon}")
165
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
166
-
167
  is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
168
 
169
  return {
 
99
  print(f"📍 Sample {i + 1}/{len(test_samples)}")
100
  try:
101
  result = self.run_single_test_with_bot(bot, sample)
102
+ if result is None:
103
+ print(f"❌ Sample_{i+1} test failed: No predicted coords")
104
+ continue
105
  all_results.append(result)
106
 
107
  status = (
 
157
  }
158
 
159
  predicted_lat_lon = bot.analyze_image(screenshot)
160
+ if predicted_lat_lon is None:
161
+ return None
162
  inference_time = time.time() - start_time
163
 
164
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
 
168
  print(f"🔍 True coords: {true_coords}")
169
  print(f"🔍 Predicted coords: {predicted_lat_lon}")
170
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
171
+
172
  is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
173
 
174
  return {
config.py CHANGED
@@ -38,12 +38,12 @@ DEFAULT_TEMPERATURE = 1.0
38
  # Model configurations
39
  MODELS_CONFIG = {
40
  "gpt-4o": {
41
- "class": "ChatOpenAI",
42
  "model_name": "gpt-4o",
43
  "description": "OpenAI GPT-4o",
44
  },
45
  "gpt-4o-mini": {
46
- "class": "ChatOpenAI",
47
  "model_name": "gpt-4o-mini",
48
  "description": "OpenAI GPT-4o Mini",
49
  },
 
38
  # Model configurations
39
  MODELS_CONFIG = {
40
  "gpt-4o": {
41
+ "class": "OpenRouter",
42
  "model_name": "gpt-4o",
43
  "description": "OpenAI GPT-4o",
44
  },
45
  "gpt-4o-mini": {
46
+ "class": "OpenRouter",
47
  "model_name": "gpt-4o-mini",
48
  "description": "OpenAI GPT-4o Mini",
49
  },
datasets/test/golden_labels.json ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "dataset_name": "test",
4
+ "collection_date": "2025-08-07T12:43:38.026706",
5
+ "collection_options": {}
6
+ },
7
+ "samples": [
8
+ {
9
+ "id": "d6250b7f-4da5-42c1-8c8d-0423e67e77be",
10
+ "timestamp": "2025-08-07T12:40:37.875459",
11
+ "lat": 47.66613320444537,
12
+ "lng": 26.011012145674016,
13
+ "address": "3 DJ178A, Suceava County",
14
+ "pano_id": "PdysAkpL3AJpCWhOBC5GwQ",
15
+ "pov": {
16
+ "heading": -238.14999999999998,
17
+ "pitch": 5,
18
+ "zoom": 0.9999999999999997
19
+ },
20
+ "url_slug": "47.666133_26.011012_-238.15_-5.00_0",
21
+ "thumbnail_path": "d6250b7f-4da5-42c1-8c8d-0423e67e77be.jpg"
22
+ },
23
+ {
24
+ "id": "3bb51463-0a02-4ce4-9e61-6e0f28491897",
25
+ "timestamp": "2025-08-07T12:40:41.467083",
26
+ "lat": -5.239479425072147,
27
+ "lng": 71.82213288625383,
28
+ "address": "British Indian Ocean Territory",
29
+ "pano_id": "NYwG1Ym4ZqDLb5APiRn2Eg",
30
+ "pov": {
31
+ "heading": -48.14999999999998,
32
+ "pitch": 5,
33
+ "zoom": 0.9999999999999997
34
+ },
35
+ "url_slug": "-5.239479_71.822133_-48.15_-5.00_0",
36
+ "thumbnail_path": "3bb51463-0a02-4ce4-9e61-6e0f28491897.jpg"
37
+ },
38
+ {
39
+ "id": "4acf7d7e-8309-4e57-88b2-1ea1019c1719",
40
+ "timestamp": "2025-08-07T12:40:45.049516",
41
+ "lat": 44.577090525370025,
42
+ "lng": 132.8105749539221,
43
+ "address": "Spassk-Dalny, Primorsky Krai",
44
+ "pano_id": "l1mVXN5S-foFa6foYGvMaQ",
45
+ "pov": {
46
+ "heading": 100.85000000000002,
47
+ "pitch": 5,
48
+ "zoom": 0.9999999999999997
49
+ },
50
+ "url_slug": "44.577091_132.810575_100.85_-5.00_0",
51
+ "thumbnail_path": "4acf7d7e-8309-4e57-88b2-1ea1019c1719.jpg"
52
+ },
53
+ {
54
+ "id": "c4d4352f-6285-42c1-bbae-231ca95da48a",
55
+ "timestamp": "2025-08-07T12:40:48.577565",
56
+ "lat": -14.173449381539905,
57
+ "lng": -169.67773654813135,
58
+ "address": "Rte 20, Ofu, Manu'a District",
59
+ "pano_id": "GTAdq2n3eUJ33lWb-gJ5BA",
60
+ "pov": {
61
+ "heading": 321.85,
62
+ "pitch": 5,
63
+ "zoom": 0.9999999999999997
64
+ },
65
+ "url_slug": "-14.173449_-169.677737_321.85_-5.00_0",
66
+ "thumbnail_path": "c4d4352f-6285-42c1-bbae-231ca95da48a.jpg"
67
+ },
68
+ {
69
+ "id": "b4b889cf-008f-4b71-b901-bca7b3de3951",
70
+ "timestamp": "2025-08-07T12:40:52.470733",
71
+ "lat": 44.83634227352461,
72
+ "lng": -91.46694086852327,
73
+ "address": "3048 Winsor Dr, Eau Claire, Wisconsin",
74
+ "pano_id": "teO7CeeojzPT4y6Dz5V4qg",
75
+ "pov": {
76
+ "heading": -244.14999999999998,
77
+ "pitch": 5,
78
+ "zoom": 0.9999999999999997
79
+ },
80
+ "url_slug": "44.836342_-91.466941_-244.15_-5.00_0",
81
+ "thumbnail_path": "b4b889cf-008f-4b71-b901-bca7b3de3951.jpg"
82
+ },
83
+ {
84
+ "id": "7a606d59-46f3-4522-b2be-2e5a5576e155",
85
+ "timestamp": "2025-08-07T12:40:56.350929",
86
+ "lat": 32.28575621196474,
87
+ "lng": -64.77437787828177,
88
+ "address": "Bermuda Tourism",
89
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQ3VnTGZLUmc.",
90
+ "pov": {
91
+ "heading": -26.149999999999977,
92
+ "pitch": 5,
93
+ "zoom": 0.9999999999999997
94
+ },
95
+ "url_slug": "32.285756_-64.774378_-26.15_-5.00_0",
96
+ "thumbnail_path": "7a606d59-46f3-4522-b2be-2e5a5576e155.jpg"
97
+ },
98
+ {
99
+ "id": "262d348a-a60a-42d8-bd4f-68aafe98d1fb",
100
+ "timestamp": "2025-08-07T12:40:59.894064",
101
+ "lat": 11.275626069517537,
102
+ "lng": 104.8745358584606,
103
+ "address": "Tak\u00e9o Province",
104
+ "pano_id": "5Y1dyapSF2NxjeB4-ucZkA",
105
+ "pov": {
106
+ "heading": 134.85000000000002,
107
+ "pitch": 5,
108
+ "zoom": 0.9999999999999997
109
+ },
110
+ "url_slug": "11.275626_104.874536_134.85_-5.00_0",
111
+ "thumbnail_path": "262d348a-a60a-42d8-bd4f-68aafe98d1fb.jpg"
112
+ },
113
+ {
114
+ "id": "09ce31a1-a719-4ed9-a344-7987214902c1",
115
+ "timestamp": "2025-08-07T12:41:03.536576",
116
+ "lat": -17.362187562805154,
117
+ "lng": -63.14684639831983,
118
+ "address": "Camino Montero, Santa Cruz Department",
119
+ "pano_id": "51rbDOTj6SCtSi9vyN0-Pg",
120
+ "pov": {
121
+ "heading": -336.15,
122
+ "pitch": 5,
123
+ "zoom": 0.9999999999999997
124
+ },
125
+ "url_slug": "-17.362188_-63.146846_-336.15_-5.00_0",
126
+ "thumbnail_path": "09ce31a1-a719-4ed9-a344-7987214902c1.jpg"
127
+ },
128
+ {
129
+ "id": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb",
130
+ "timestamp": "2025-08-07T12:41:06.979405",
131
+ "lat": -21.458641127651422,
132
+ "lng": -47.59839773953906,
133
+ "address": "12160 Rodovia Conde Francisco Matarazzo J\u00fanior, State of S\u00e3o Paulo",
134
+ "pano_id": "PTMrd1Xosg9QO25i58gjAg",
135
+ "pov": {
136
+ "heading": -204.14999999999998,
137
+ "pitch": 5,
138
+ "zoom": 0.9999999999999997
139
+ },
140
+ "url_slug": "-21.458641_-47.598398_-204.15_-5.00_0",
141
+ "thumbnail_path": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb.jpg"
142
+ },
143
+ {
144
+ "id": "1929ea7c-af27-42d0-9931-66d5ad451d21",
145
+ "timestamp": "2025-08-07T12:41:10.587109",
146
+ "lat": -54.00998792650971,
147
+ "lng": -67.6803410996465,
148
+ "address": "17 RP F, Tierra del Fuego Province",
149
+ "pano_id": "HsUaaUVcACNjAhgLP2_YOg",
150
+ "pov": {
151
+ "heading": -73.14999999999998,
152
+ "pitch": 5,
153
+ "zoom": 0.9999999999999997
154
+ },
155
+ "url_slug": "-54.009988_-67.680341_-73.15_-5.00_0",
156
+ "thumbnail_path": "1929ea7c-af27-42d0-9931-66d5ad451d21.jpg"
157
+ },
158
+ {
159
+ "id": "7bc2a39e-ac61-4704-a950-203117b4aca2",
160
+ "timestamp": "2025-08-07T12:41:14.089628",
161
+ "lat": 42.44808280064812,
162
+ "lng": 1.4936480624654318,
163
+ "address": "Cam\u00ed els Hortells, Sant Juli\u00e0 de L\u00f2ria",
164
+ "pano_id": "kqVCVi1mPVjLpeHMyN_BPQ",
165
+ "pov": {
166
+ "heading": 111.85000000000002,
167
+ "pitch": 5,
168
+ "zoom": 0.9999999999999997
169
+ },
170
+ "url_slug": "42.448083_1.493648_111.85_-5.00_0",
171
+ "thumbnail_path": "7bc2a39e-ac61-4704-a950-203117b4aca2.jpg"
172
+ },
173
+ {
174
+ "id": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8",
175
+ "timestamp": "2025-08-07T12:41:17.644882",
176
+ "lat": 23.57625889505424,
177
+ "lng": 120.55489844246863,
178
+ "address": "Chiayi County",
179
+ "pano_id": "25efsk04WLxb8UuEELh0fQ",
180
+ "pov": {
181
+ "heading": 258.85,
182
+ "pitch": 5,
183
+ "zoom": 0.9999999999999997
184
+ },
185
+ "url_slug": "23.576259_120.554898_258.85_-5.00_0",
186
+ "thumbnail_path": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8.jpg"
187
+ },
188
+ {
189
+ "id": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c",
190
+ "timestamp": "2025-08-07T12:41:21.174601",
191
+ "lat": 49.76842154708744,
192
+ "lng": 6.236529746429928,
193
+ "address": "12 Regioun, Heffingen, Mersch",
194
+ "pano_id": "VLotZqwpyqKwg2D1uRhZLA",
195
+ "pov": {
196
+ "heading": -243.14999999999998,
197
+ "pitch": 5,
198
+ "zoom": 0.9999999999999997
199
+ },
200
+ "url_slug": "49.768422_6.236530_-243.15_-5.00_0",
201
+ "thumbnail_path": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c.jpg"
202
+ },
203
+ {
204
+ "id": "6a5589de-e1fb-46c4-96c3-85cfb161444e",
205
+ "timestamp": "2025-08-07T12:41:24.747595",
206
+ "lat": 49.78642391720587,
207
+ "lng": 6.199481729741201,
208
+ "address": "CR118, Larochette, Mersch",
209
+ "pano_id": "JqZpePEOedyFAYtwUC786g",
210
+ "pov": {
211
+ "heading": -104.14999999999998,
212
+ "pitch": 5,
213
+ "zoom": 0.9999999999999997
214
+ },
215
+ "url_slug": "49.786424_6.199482_-104.15_-5.00_0",
216
+ "thumbnail_path": "6a5589de-e1fb-46c4-96c3-85cfb161444e.jpg"
217
+ },
218
+ {
219
+ "id": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25",
220
+ "timestamp": "2025-08-07T12:41:28.269300",
221
+ "lat": 34.0994193037527,
222
+ "lng": 131.95163614377708,
223
+ "address": "\u770c\u9053140\u53f7, Shunan, Yamaguchi",
224
+ "pano_id": "6HXD7J5jRsnBD5_KjCTSPw",
225
+ "pov": {
226
+ "heading": 129.85000000000002,
227
+ "pitch": 5,
228
+ "zoom": 0.9999999999999997
229
+ },
230
+ "url_slug": "34.099419_131.951636_129.85_-5.00_0",
231
+ "thumbnail_path": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25.jpg"
232
+ },
233
+ {
234
+ "id": "3f0e8c12-109e-4db7-a228-52a156ca880d",
235
+ "timestamp": "2025-08-07T12:41:32.781435",
236
+ "lat": 14.7694212956585,
237
+ "lng": -16.695508234038147,
238
+ "address": "Khombole, Thi\u00e8s Region",
239
+ "pano_id": "BjJ0cU8LxMFDFJD3vj5YYQ",
240
+ "pov": {
241
+ "heading": 241.85000000000002,
242
+ "pitch": 5,
243
+ "zoom": 0.9999999999999997
244
+ },
245
+ "url_slug": "14.769421_-16.695508_241.85_-5.00_0",
246
+ "thumbnail_path": "3f0e8c12-109e-4db7-a228-52a156ca880d.jpg"
247
+ },
248
+ {
249
+ "id": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772",
250
+ "timestamp": "2025-08-07T12:41:36.390462",
251
+ "lat": 49.075733818467846,
252
+ "lng": 19.306522463680235,
253
+ "address": "32 J\u00e1na Jan\u010deka, Ru\u017eomberok, \u017dilina Region",
254
+ "pano_id": "4EOqYYxwF--FflZHgPGu2Q",
255
+ "pov": {
256
+ "heading": -233.14999999999998,
257
+ "pitch": 5,
258
+ "zoom": 0.9999999999999997
259
+ },
260
+ "url_slug": "49.075734_19.306522_-233.15_-5.00_0",
261
+ "thumbnail_path": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772.jpg"
262
+ },
263
+ {
264
+ "id": "a6f20438-972f-48b0-8dc6-e95baec1c8c2",
265
+ "timestamp": "2025-08-07T12:41:39.931459",
266
+ "lat": 32.28733167935287,
267
+ "lng": -64.77638248243588,
268
+ "address": "23 Lovers Ln, Paget Parish",
269
+ "pano_id": "ZHTVVVlJPR35oUPiShnqHw",
270
+ "pov": {
271
+ "heading": -113.14999999999998,
272
+ "pitch": 5,
273
+ "zoom": 0.9999999999999997
274
+ },
275
+ "url_slug": "32.287332_-64.776382_-113.15_-5.00_0",
276
+ "thumbnail_path": "a6f20438-972f-48b0-8dc6-e95baec1c8c2.jpg"
277
+ },
278
+ {
279
+ "id": "4ee65f3b-aeaa-49d1-abda-28e270cca142",
280
+ "timestamp": "2025-08-07T12:41:43.438726",
281
+ "lat": 41.49319498028777,
282
+ "lng": 21.92920765772765,
283
+ "address": "Municipality of Rosoman",
284
+ "pano_id": "NcxnpDckFi3vt4-ntoF44A",
285
+ "pov": {
286
+ "heading": 99.85000000000002,
287
+ "pitch": 5,
288
+ "zoom": 0.9999999999999997
289
+ },
290
+ "url_slug": "41.493195_21.929208_99.85_-5.00_0",
291
+ "thumbnail_path": "4ee65f3b-aeaa-49d1-abda-28e270cca142.jpg"
292
+ },
293
+ {
294
+ "id": "3933f509-49f4-413f-b32d-95398910b3b6",
295
+ "timestamp": "2025-08-07T12:41:47.006438",
296
+ "lat": 31.875513355699223,
297
+ "lng": 35.492798274434385,
298
+ "address": "Green jericho",
299
+ "pano_id": "i9EnnjI_H0LQZ80DD8caeQ",
300
+ "pov": {
301
+ "heading": 328.85,
302
+ "pitch": 5,
303
+ "zoom": 0.9999999999999997
304
+ },
305
+ "url_slug": "31.875513_35.492798_328.85_-5.00_0",
306
+ "thumbnail_path": "3933f509-49f4-413f-b32d-95398910b3b6.jpg"
307
+ },
308
+ {
309
+ "id": "e32c0681-97bc-440e-9d8e-c1cb9511d47d",
310
+ "timestamp": "2025-08-07T12:41:50.873515",
311
+ "lat": 55.115320287969766,
312
+ "lng": 26.163976401890817,
313
+ "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
314
+ "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
315
+ "pov": {
316
+ "heading": -192.14999999999998,
317
+ "pitch": 5,
318
+ "zoom": 0.9999999999999997
319
+ },
320
+ "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
321
+ "thumbnail_path": "e32c0681-97bc-440e-9d8e-c1cb9511d47d.jpg"
322
+ },
323
+ {
324
+ "id": "15861215-f932-426b-a6fa-08ae0cd5ae54",
325
+ "timestamp": "2025-08-07T12:41:54.439626",
326
+ "lat": 55.115320287969766,
327
+ "lng": 26.163976401890817,
328
+ "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
329
+ "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
330
+ "pov": {
331
+ "heading": -192.14999999999998,
332
+ "pitch": 5,
333
+ "zoom": 0.9999999999999997
334
+ },
335
+ "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
336
+ "thumbnail_path": "15861215-f932-426b-a6fa-08ae0cd5ae54.jpg"
337
+ },
338
+ {
339
+ "id": "9a6c5a97-8501-489d-bade-f07bbcbebeea",
340
+ "timestamp": "2025-08-07T12:42:01.229172",
341
+ "lat": 40.13741279140719,
342
+ "lng": 19.645404417111592,
343
+ "address": "Dh\u00ebrmiu Beach",
344
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJRHFqYW42YWc.",
345
+ "pov": {
346
+ "heading": 137.85000000000002,
347
+ "pitch": 5,
348
+ "zoom": 0.9999999999999997
349
+ },
350
+ "url_slug": "40.137413_19.645404_137.85_-5.00_0",
351
+ "thumbnail_path": "9a6c5a97-8501-489d-bade-f07bbcbebeea.jpg"
352
+ },
353
+ {
354
+ "id": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff",
355
+ "timestamp": "2025-08-07T12:42:04.457716",
356
+ "lat": 29.18167200058433,
357
+ "lng": -95.43500220590631,
358
+ "address": "Angleton, Texas",
359
+ "pano_id": "IayHlQ-Wr58p-_kVKSK1ug",
360
+ "pov": {
361
+ "heading": 270.85,
362
+ "pitch": 5,
363
+ "zoom": 0.9999999999999997
364
+ },
365
+ "url_slug": "29.181672_-95.435002_270.85_-5.00_0",
366
+ "thumbnail_path": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff.jpg"
367
+ },
368
+ {
369
+ "id": "011c76d0-d1cf-40f0-b243-3593448bce84",
370
+ "timestamp": "2025-08-07T12:42:07.779631",
371
+ "lat": 12.226344673460268,
372
+ "lng": 122.02353179975576,
373
+ "address": "Junction Guinhayaan - Malbog Port Rd, Looc, MIMAROPA",
374
+ "pano_id": "tAnV4HzcEaJ5IAm2Jgegiw",
375
+ "pov": {
376
+ "heading": -253.14999999999998,
377
+ "pitch": 5,
378
+ "zoom": 0.9999999999999997
379
+ },
380
+ "url_slug": "12.226345_122.023532_-253.15_-5.00_0",
381
+ "thumbnail_path": "011c76d0-d1cf-40f0-b243-3593448bce84.jpg"
382
+ },
383
+ {
384
+ "id": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86",
385
+ "timestamp": "2025-08-07T12:42:12.220880",
386
+ "lat": 34.062066594180294,
387
+ "lng": 133.86624813436472,
388
+ "address": "Tokushima Prefectural Rd No. 4, Higashimiyoshi, Tokushima",
389
+ "pano_id": "5Tp9jW_NWLnaKB_3NTeQSw",
390
+ "pov": {
391
+ "heading": -106.14999999999998,
392
+ "pitch": 5,
393
+ "zoom": 0.9999999999999997
394
+ },
395
+ "url_slug": "34.062067_133.866248_-106.15_-5.00_0",
396
+ "thumbnail_path": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86.jpg"
397
+ },
398
+ {
399
+ "id": "0246f9d3-be8d-40f0-805e-d0446ef2d183",
400
+ "timestamp": "2025-08-07T12:42:15.744386",
401
+ "lat": -41.21734957722994,
402
+ "lng": 172.11284555729617,
403
+ "address": "302 Rte 67, Karamea, West Coast Region",
404
+ "pano_id": "dcA7I3Arr0VPwKwgMxX_mQ",
405
+ "pov": {
406
+ "heading": 23.850000000000023,
407
+ "pitch": 5,
408
+ "zoom": 0.9999999999999997
409
+ },
410
+ "url_slug": "-41.217350_172.112846_23.85_-5.00_0",
411
+ "thumbnail_path": "0246f9d3-be8d-40f0-805e-d0446ef2d183.jpg"
412
+ },
413
+ {
414
+ "id": "54375156-8b78-4e60-afc9-f1172deba69d",
415
+ "timestamp": "2025-08-07T12:42:19.157383",
416
+ "lat": 46.10532360891025,
417
+ "lng": 15.119329939077309,
418
+ "address": "Podkraj, Podkraj, Municipality of Hrastnik",
419
+ "pano_id": "4bdhb8F41Au_r8UJIG8nCQ",
420
+ "pov": {
421
+ "heading": 204.85000000000002,
422
+ "pitch": 5,
423
+ "zoom": 0.9999999999999997
424
+ },
425
+ "url_slug": "46.105324_15.119330_204.85_-5.00_0",
426
+ "thumbnail_path": "54375156-8b78-4e60-afc9-f1172deba69d.jpg"
427
+ },
428
+ {
429
+ "id": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27",
430
+ "timestamp": "2025-08-07T12:42:22.677283",
431
+ "lat": 44.370875416206346,
432
+ "lng": 5.1514140758707585,
433
+ "address": "1450 Les Fonts, Nyons, Auvergne-Rh\u00f4ne-Alpes",
434
+ "pano_id": "30HH_X24i7QOn6dILzYoKw",
435
+ "pov": {
436
+ "heading": -320.15,
437
+ "pitch": 5,
438
+ "zoom": 0.9999999999999997
439
+ },
440
+ "url_slug": "44.370875_5.151414_-320.15_-5.00_0",
441
+ "thumbnail_path": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27.jpg"
442
+ },
443
+ {
444
+ "id": "08ef293d-2894-489f-b77f-377115c75921",
445
+ "timestamp": "2025-08-07T12:42:26.245168",
446
+ "lat": -19.541637267698466,
447
+ "lng": -63.55863586071773,
448
+ "address": "9, Santa Cruz Department",
449
+ "pano_id": "FmZr6VYcfqf_qwztM0cJ0g",
450
+ "pov": {
451
+ "heading": -125.14999999999998,
452
+ "pitch": 5,
453
+ "zoom": 0.9999999999999997
454
+ },
455
+ "url_slug": "-19.541637_-63.558636_-125.15_-5.00_0",
456
+ "thumbnail_path": "08ef293d-2894-489f-b77f-377115c75921.jpg"
457
+ },
458
+ {
459
+ "id": "8ff247f4-efdf-47e8-8aab-7752f7a7a033",
460
+ "timestamp": "2025-08-07T12:42:30.212571",
461
+ "lat": 25.60987433301616,
462
+ "lng": 55.754304628080014,
463
+ "address": "Al Alyaah St, Al Raafah, Emirate of Umm Al Quwain",
464
+ "pano_id": "3lt-n3rOsbk3GkZ3CiuMKQ",
465
+ "pov": {
466
+ "heading": 63.85000000000002,
467
+ "pitch": 5,
468
+ "zoom": 0.9999999999999997
469
+ },
470
+ "url_slug": "25.609874_55.754305_63.85_-5.00_0",
471
+ "thumbnail_path": "8ff247f4-efdf-47e8-8aab-7752f7a7a033.jpg"
472
+ },
473
+ {
474
+ "id": "41aa250b-f476-4c47-a8b3-1b170f892039",
475
+ "timestamp": "2025-08-07T12:42:33.795863",
476
+ "lat": 32.28525162497046,
477
+ "lng": -64.78725425926685,
478
+ "address": "Hodson's Ferry",
479
+ "pano_id": "CAoSF0NJSE0wb2dLRUlDQWdJQ0UzYkhPalFF",
480
+ "pov": {
481
+ "heading": 314.85,
482
+ "pitch": 5,
483
+ "zoom": 0.9999999999999997
484
+ },
485
+ "url_slug": "32.285252_-64.787254_314.85_-5.00_0",
486
+ "thumbnail_path": "41aa250b-f476-4c47-a8b3-1b170f892039.jpg"
487
+ },
488
+ {
489
+ "id": "1fc918f8-1b83-4aeb-a785-22a3cd15a407",
490
+ "timestamp": "2025-08-07T12:42:37.657812",
491
+ "lat": 45.888594934068315,
492
+ "lng": 16.65941553063258,
493
+ "address": "Bol\u010d, Zagreb County",
494
+ "pano_id": "EwgAJqZjebsU51bDLswlGg",
495
+ "pov": {
496
+ "heading": -201.14999999999998,
497
+ "pitch": 5,
498
+ "zoom": 0.9999999999999997
499
+ },
500
+ "url_slug": "45.888595_16.659416_-201.15_-5.00_0",
501
+ "thumbnail_path": "1fc918f8-1b83-4aeb-a785-22a3cd15a407.jpg"
502
+ },
503
+ {
504
+ "id": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83",
505
+ "timestamp": "2025-08-07T12:42:41.528253",
506
+ "lat": 68.72859088427079,
507
+ "lng": 16.900531665561935,
508
+ "address": "Fv848, Troms",
509
+ "pano_id": "HUtqcc4YFuJA6EGOvLaOSg",
510
+ "pov": {
511
+ "heading": -33.14999999999998,
512
+ "pitch": 5,
513
+ "zoom": 0.9999999999999997
514
+ },
515
+ "url_slug": "68.728591_16.900532_-33.15_-5.00_0",
516
+ "thumbnail_path": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83.jpg"
517
+ },
518
+ {
519
+ "id": "f9d01601-da06-4286-b83f-aad48292ef56",
520
+ "timestamp": "2025-08-07T12:42:45.046606",
521
+ "lat": 56.718393213855904,
522
+ "lng": 25.74434588961816,
523
+ "address": "Aizkraukle Municipality",
524
+ "pano_id": "lFBbYokbq5Azj-WuXKkAww",
525
+ "pov": {
526
+ "heading": 112.85000000000002,
527
+ "pitch": 5,
528
+ "zoom": 0.9999999999999997
529
+ },
530
+ "url_slug": "56.718393_25.744346_112.85_-5.00_0",
531
+ "thumbnail_path": "f9d01601-da06-4286-b83f-aad48292ef56.jpg"
532
+ },
533
+ {
534
+ "id": "29521be4-0c47-40b4-9fe5-14dd37686eed",
535
+ "timestamp": "2025-08-07T12:42:48.525979",
536
+ "lat": 38.22079108487478,
537
+ "lng": -1.0621034114314583,
538
+ "address": "MU-412, Abanilla, Region of Murcia",
539
+ "pano_id": "YoaYr1t8aZ65kAKY_xoa4Q",
540
+ "pov": {
541
+ "heading": 338.85,
542
+ "pitch": 5,
543
+ "zoom": 0.9999999999999997
544
+ },
545
+ "url_slug": "38.220791_-1.062103_338.85_-5.00_0",
546
+ "thumbnail_path": "29521be4-0c47-40b4-9fe5-14dd37686eed.jpg"
547
+ },
548
+ {
549
+ "id": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036",
550
+ "timestamp": "2025-08-07T12:42:52.036523",
551
+ "lat": 14.586380510782684,
552
+ "lng": -91.12495671396474,
553
+ "address": "RN-11, Patulul, Solol\u00e1 Department",
554
+ "pano_id": "e83Ymkc4WsPjYZSXQSkhlQ",
555
+ "pov": {
556
+ "heading": -249.14999999999998,
557
+ "pitch": 5,
558
+ "zoom": 0.9999999999999997
559
+ },
560
+ "url_slug": "14.586381_-91.124957_-249.15_-5.00_0",
561
+ "thumbnail_path": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036.jpg"
562
+ },
563
+ {
564
+ "id": "0049770c-0e79-4f6e-a230-85815c5afca4",
565
+ "timestamp": "2025-08-07T12:42:55.545371",
566
+ "lat": 41.24761837711202,
567
+ "lng": 19.900912328789897,
568
+ "address": "SH3, B\u00ebrzhit\u00eb, Tirana County",
569
+ "pano_id": "H4OtJUEIjqNM4h3b3zJiog",
570
+ "pov": {
571
+ "heading": -38.14999999999998,
572
+ "pitch": 5,
573
+ "zoom": 0.9999999999999997
574
+ },
575
+ "url_slug": "41.247618_19.900912_-38.15_-5.00_0",
576
+ "thumbnail_path": "0049770c-0e79-4f6e-a230-85815c5afca4.jpg"
577
+ },
578
+ {
579
+ "id": "108d3530-8cd1-4554-9e27-f4161c25b64f",
580
+ "timestamp": "2025-08-07T12:42:59.039576",
581
+ "lat": 23.106680960105503,
582
+ "lng": 120.31480234033475,
583
+ "address": "Tainan City",
584
+ "pano_id": "wPKvYXSO2t3Cjb9d_92vbQ",
585
+ "pov": {
586
+ "heading": 177.85000000000002,
587
+ "pitch": 5,
588
+ "zoom": 0.9999999999999997
589
+ },
590
+ "url_slug": "23.106681_120.314802_177.85_-5.00_0",
591
+ "thumbnail_path": "108d3530-8cd1-4554-9e27-f4161c25b64f.jpg"
592
+ },
593
+ {
594
+ "id": "684589c2-db98-4fa0-a909-26677d622781",
595
+ "timestamp": "2025-08-07T12:43:02.607203",
596
+ "lat": 24.280060413908377,
597
+ "lng": 91.40645644538027,
598
+ "address": "Sylhet Division",
599
+ "pano_id": "vnxugWDu7BvOIQKU2pGreQ",
600
+ "pov": {
601
+ "heading": 289.85,
602
+ "pitch": 5,
603
+ "zoom": 0.9999999999999997
604
+ },
605
+ "url_slug": "24.280060_91.406456_289.85_-5.00_0",
606
+ "thumbnail_path": "684589c2-db98-4fa0-a909-26677d622781.jpg"
607
+ },
608
+ {
609
+ "id": "9e52e1ef-b7c8-4290-a50c-dea42684329c",
610
+ "timestamp": "2025-08-07T12:43:06.097012",
611
+ "lat": 23.075670254787028,
612
+ "lng": 120.16583641147342,
613
+ "address": "Tainan City",
614
+ "pano_id": "KT8dvKAlDqRIWqXVig9tRA",
615
+ "pov": {
616
+ "heading": -217.14999999999998,
617
+ "pitch": 5,
618
+ "zoom": 0.9999999999999997
619
+ },
620
+ "url_slug": "23.075670_120.165836_-217.15_-5.00_0",
621
+ "thumbnail_path": "9e52e1ef-b7c8-4290-a50c-dea42684329c.jpg"
622
+ },
623
+ {
624
+ "id": "54ccc34f-ae30-449b-83cf-3f6485186e38",
625
+ "timestamp": "2025-08-07T12:43:09.571839",
626
+ "lat": 16.069303835253045,
627
+ "lng": -13.917845261546633,
628
+ "address": "N2, Saint-Louis Region",
629
+ "pano_id": "AOhMIvzxsCcRhsHw2BVUzA",
630
+ "pov": {
631
+ "heading": -106.14999999999998,
632
+ "pitch": 5,
633
+ "zoom": 0.9999999999999997
634
+ },
635
+ "url_slug": "16.069304_-13.917845_-106.15_-5.00_0",
636
+ "thumbnail_path": "54ccc34f-ae30-449b-83cf-3f6485186e38.jpg"
637
+ },
638
+ {
639
+ "id": "9903bb23-294e-44a2-9ecf-180808b82d67",
640
+ "timestamp": "2025-08-07T12:43:12.991252",
641
+ "lat": -32.83743900844668,
642
+ "lng": -70.95213519080639,
643
+ "address": "218 Capit\u00e1n Avalos, Llay-Llay, Valpara\u00edso",
644
+ "pano_id": "xNJYW4PSgzGV2TEqMpEBpA",
645
+ "pov": {
646
+ "heading": 68.85000000000002,
647
+ "pitch": 5,
648
+ "zoom": 0.9999999999999997
649
+ },
650
+ "url_slug": "-32.837439_-70.952135_68.85_-5.00_0",
651
+ "thumbnail_path": "9903bb23-294e-44a2-9ecf-180808b82d67.jpg"
652
+ },
653
+ {
654
+ "id": "4381807b-d04c-4c04-8b93-78a588016cb7",
655
+ "timestamp": "2025-08-07T12:43:16.523957",
656
+ "lat": 4.5400338406517715,
657
+ "lng": -76.1944593680759,
658
+ "address": "El Dovio-Versalles, Valle del Cauca",
659
+ "pano_id": "wgWdWsvikF8kFmi_FZVstg",
660
+ "pov": {
661
+ "heading": 333.85,
662
+ "pitch": 5,
663
+ "zoom": 0.9999999999999997
664
+ },
665
+ "url_slug": "4.540034_-76.194459_333.85_-5.00_0",
666
+ "thumbnail_path": "4381807b-d04c-4c04-8b93-78a588016cb7.jpg"
667
+ },
668
+ {
669
+ "id": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a",
670
+ "timestamp": "2025-08-07T12:43:20.060957",
671
+ "lat": 31.65645279027197,
672
+ "lng": 34.9414288862752,
673
+ "address": "Nir Louk",
674
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQzZqcWJXQ2c.",
675
+ "pov": {
676
+ "heading": -248.14999999999998,
677
+ "pitch": 5,
678
+ "zoom": 0.9999999999999997
679
+ },
680
+ "url_slug": "31.656453_34.941429_-248.15_-5.00_0",
681
+ "thumbnail_path": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a.jpg"
682
+ },
683
+ {
684
+ "id": "574ac51d-1de1-46b2-9f90-5b1da1d79339",
685
+ "timestamp": "2025-08-07T12:43:23.601528",
686
+ "lat": 5.90176654207688,
687
+ "lng": 0.9886556847260388,
688
+ "address": "Keta, Volta Region",
689
+ "pano_id": "ipwTobbIbpx2SEjFzq6kww",
690
+ "pov": {
691
+ "heading": -146.14999999999998,
692
+ "pitch": 5,
693
+ "zoom": 0.9999999999999997
694
+ },
695
+ "url_slug": "5.901767_0.988656_-146.15_-5.00_0",
696
+ "thumbnail_path": "574ac51d-1de1-46b2-9f90-5b1da1d79339.jpg"
697
+ },
698
+ {
699
+ "id": "87e095f0-467b-4539-978b-46eecfdf1efc",
700
+ "timestamp": "2025-08-07T12:43:27.067655",
701
+ "lat": 46.29179908449921,
702
+ "lng": 16.580906762551983,
703
+ "address": "Komarnica Ludbre\u0161ka, Vara\u017edin County",
704
+ "pano_id": "ha0KsxP_lG1phxES1aSmGQ",
705
+ "pov": {
706
+ "heading": 111.85000000000002,
707
+ "pitch": 5,
708
+ "zoom": 0.9999999999999997
709
+ },
710
+ "url_slug": "46.291799_16.580907_111.85_-5.00_0",
711
+ "thumbnail_path": "87e095f0-467b-4539-978b-46eecfdf1efc.jpg"
712
+ },
713
+ {
714
+ "id": "3badb1cb-5ffb-4c07-812e-ee85646a4279",
715
+ "timestamp": "2025-08-07T12:43:30.517183",
716
+ "lat": 43.891541352607554,
717
+ "lng": 5.774287870706945,
718
+ "address": "Dauphin, Provence-Alpes-C\u00f4te d'Azur",
719
+ "pano_id": "b9cJ5iGIYH2JHWrRmDDSFg",
720
+ "pov": {
721
+ "heading": -351.15,
722
+ "pitch": 5,
723
+ "zoom": 0.9999999999999997
724
+ },
725
+ "url_slug": "43.891541_5.774288_-351.15_-5.00_0",
726
+ "thumbnail_path": "3badb1cb-5ffb-4c07-812e-ee85646a4279.jpg"
727
+ },
728
+ {
729
+ "id": "1acb3834-1f22-4c0c-8cd3-b992e4546f88",
730
+ "timestamp": "2025-08-07T12:43:34.052597",
731
+ "lat": 20.805812868893106,
732
+ "lng": -89.6933791766117,
733
+ "address": "Hotzuc, Yucatan",
734
+ "pano_id": "ShCiTFG-KoqkokeXeCyG2w",
735
+ "pov": {
736
+ "heading": -236.14999999999998,
737
+ "pitch": 5,
738
+ "zoom": 0.9999999999999997
739
+ },
740
+ "url_slug": "20.805813_-89.693379_-236.15_-5.00_0",
741
+ "thumbnail_path": "1acb3834-1f22-4c0c-8cd3-b992e4546f88.jpg"
742
+ },
743
+ {
744
+ "id": "8dacb066-8fa4-4f03-87e3-34d86f5863fb",
745
+ "timestamp": "2025-08-07T12:43:37.674750",
746
+ "lat": 47.974591513902844,
747
+ "lng": 108.47688185828954,
748
+ "address": "Baganuur-Mengenmorit, Mungunmorit, T\u00f6v, Mongolia",
749
+ "pano_id": "V0i3_HH4f4IM9hTEg0QRqg",
750
+ "pov": {
751
+ "heading": 13.850000000000023,
752
+ "pitch": 5,
753
+ "zoom": 0.9999999999999997
754
+ },
755
+ "url_slug": "47.974592_108.476882_13.85_-5.00_0",
756
+ "thumbnail_path": "8dacb066-8fa4-4f03-87e3-34d86f5863fb.jpg"
757
+ }
758
+ ]
759
+ }
geo_bot.py CHANGED
@@ -69,6 +69,72 @@ Your response MUST be a valid JSON object wrapped in ```json ... ```.
69
  ```
70
  """
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  BENCHMARK_PROMPT = """
73
  Analyze the image and determine its geographic coordinates.
74
  1. Describe visual clues.
@@ -250,10 +316,54 @@ class GeoBot:
250
  decision = {
251
  "reasoning": "Recovery due to parsing failure or model error.",
252
  "action_details": {"action": "PAN_RIGHT"},
 
253
  }
254
 
255
  return decision
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  def execute_action(self, action: str) -> bool:
258
  """
259
  Execute the given action using the controller.
@@ -271,6 +381,77 @@ class GeoBot:
271
  self.controller.pan_view("right")
272
  return True
273
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
274
  def run_agent_loop(
275
  self, max_steps: int = 10, step_callback=None
276
  ) -> Optional[Tuple[float, float]]:
@@ -347,6 +528,7 @@ class GeoBot:
347
  "reasoning": decision.get("reasoning", "N/A"),
348
  "action_details": decision.get("action_details", {"action": "N/A"}),
349
  "history": history.copy(), # History up to current step (excluding current)
 
350
  }
351
 
352
  action_details = decision.get("action_details", {})
 
69
  ```
70
  """
71
 
72
+ TEST_AGENT_PROMPT_TEMPLATE = """
73
+ **Mission:** You are an expert geo-location agent. Your goal is to pinpoint our position based on the surroundings and your observation history.
74
+
75
+ **Current Status**
76
+ • Actions You Can Take *this* turn: {available_actions}
77
+
78
+ ────────────────────────────────
79
+ **Core Principles**
80
+
81
+ 1. **Observe → Orient → Act**
82
+ Start each turn with a structured three-part reasoning block:
83
+ **(1) Visual Clues —** plainly describe what you see (signs, text language, road lines, vegetation, building styles, vehicles, terrain, weather, etc.).
84
+ **(2) Potential Regions —** list the most plausible regions/countries those clues suggest.
85
+ **(3) Most Probable + Plan —** pick the single likeliest region and explain the next action (move/pan or guess).
86
+
87
+ 2. **Navigate with Labels:**
88
+ - `MOVE_FORWARD` follows the green **UP** arrow.
89
+ - `MOVE_BACKWARD` follows the red **DOWN** arrow.
90
+ - No arrow ⇒ you cannot move that way.
91
+
92
+ 3. **Efficient Exploration:**
93
+ - **Pan Before You Move:** At fresh spots/intersections, use `PAN_LEFT` / `PAN_RIGHT` first.
94
+ - After ~2 or 3 fruitless moves in repetitive scenery, turn around.
95
+
96
+ 4. **Be Decisive:** A unique, definitive clue (full address, rare town name, etc.) ⇒ `GUESS` immediately.
97
+
98
+ 5. **Final-Step Rule:** If **Remaining Steps = 1**, you **MUST** `GUESS` and you should carefully check the image and the surroundings.
99
+
100
+ 6. **Always Predict:** On EVERY step, provide your current best estimate of the location, even if you're not ready to make a final guess.
101
+
102
+ ────────────────────────────────
103
+ **Context & Task:**
104
+ Analyze your full journey history and current view, apply the Core Principles, and decide your next action in the required JSON format.
105
+
106
+ **Action History**
107
+ {history_text}
108
+
109
+ ────────────────────────────────
110
+ **JSON Output Format:**
111
+ Your response MUST be a valid JSON object wrapped in ```json ... ```.
112
+ {{
113
+ "reasoning": "…",
114
+ "current_prediction": {{
115
+ "lat": <float>,
116
+ "lon": <float>,
117
+ "location_description": "Brief description of predicted location"
118
+ }},
119
+ "action_details": {{"action": action chosen from the available actions}}
120
+ }}
121
+ **Example **
122
+ ```json
123
+ {{
124
+ "reasoning": "(1) Visual Clues — I see left-side driving, eucalyptus trees, and a yellow speed-warning sign; the road markings are solid white. (2) Potential Regions — Southeastern Australia, Tasmania, or the North Island of New Zealand. (3) Most Probable + Plan — The scene most likely sits in a suburb of Hobart, Tasmania. I will PAN_LEFT to look for additional road signs that confirm this.",
125
+ "current_prediction": {{
126
+ "lat": -42.8806,
127
+ "lon": 147.3250,
128
+ "location_description": "Hobart suburb, Tasmania, Australia"
129
+ }},
130
+ "action_details": {{
131
+ "action": "PAN_LEFT"
132
+ }}
133
+ }}
134
+ ```
135
+
136
+ """
137
+
138
  BENCHMARK_PROMPT = """
139
  Analyze the image and determine its geographic coordinates.
140
  1. Describe visual clues.
 
316
  decision = {
317
  "reasoning": "Recovery due to parsing failure or model error.",
318
  "action_details": {"action": "PAN_RIGHT"},
319
+ "debug_message": f"{response.content.strip()}",
320
  }
321
 
322
  return decision
323
 
324
+ def execute_test_agent_step(
325
+ self,
326
+ history: List[Dict[str, Any]],
327
+ current_screenshot_b64: str,
328
+ available_actions: List[str],
329
+ ) -> Optional[Dict[str, Any]]:
330
+ """
331
+ Execute a single agent step: generate prompt, get AI decision, return decision.
332
+ This is the core step logic extracted for reuse.
333
+ """
334
+ history_text = self.generate_history_text(history)
335
+ image_b64_for_prompt = self.get_history_images(history) + [
336
+ current_screenshot_b64
337
+ ]
338
+
339
+ prompt = TEST_AGENT_PROMPT_TEMPLATE.format(
340
+ history_text=history_text,
341
+ available_actions=available_actions,
342
+ )
343
+
344
+ try:
345
+ message = self._create_message_with_history(
346
+ prompt, image_b64_for_prompt[-1:]
347
+ )
348
+ response = self.model.invoke(message)
349
+ decision = self._parse_agent_response(response)
350
+ except Exception as e:
351
+ print(f"Error during model invocation: {e}")
352
+ decision = None
353
+
354
+ if not decision:
355
+ print(
356
+ "Response parsing failed or model error. Using default recovery action: PAN_RIGHT."
357
+ )
358
+ decision = {
359
+ "reasoning": "Recovery due to parsing failure or model error.",
360
+ "action_details": {"action": "PAN_RIGHT"},
361
+ "current_prediction": "N/A",
362
+ "debug_message": f"{response.content.strip() if response is not None else 'N/A'}",
363
+ }
364
+
365
+ return decision
366
+
367
  def execute_action(self, action: str) -> bool:
368
  """
369
  Execute the given action using the controller.
 
381
  self.controller.pan_view("right")
382
  return True
383
 
384
+ def test_run_agent_loop(self, max_steps: int = 10, step_callback=None) -> Optional[list[Tuple[float, float]]]:
385
+ history = self.init_history()
386
+ predictions = []
387
+ for step in range(max_steps, 0, -1):
388
+ # Setup and screenshot
389
+ self.controller.setup_clean_environment()
390
+ self.controller.label_arrows_on_screen()
391
+
392
+ screenshot_bytes = self.controller.take_street_view_screenshot()
393
+ if not screenshot_bytes:
394
+ print("Failed to take screenshot. Ending agent loop.")
395
+ return None
396
+
397
+ current_screenshot_b64 = self.pil_to_base64(
398
+ image=Image.open(BytesIO(screenshot_bytes))
399
+ )
400
+ available_actions = self.controller.get_test_available_actions()
401
+ # print(f"Available actions: {available_actions}")
402
+
403
+
404
+ # Normal step execution
405
+ decision = self.execute_test_agent_step(
406
+ history, current_screenshot_b64, available_actions
407
+ )
408
+
409
+ # Create step_info with current history BEFORE adding current step
410
+ # This shows the history up to (but not including) the current step
411
+ step_info = {
412
+ "max_steps": max_steps,
413
+ "remaining_steps": step,
414
+ "screenshot_bytes": screenshot_bytes,
415
+ "screenshot_b64": current_screenshot_b64,
416
+ "available_actions": available_actions,
417
+ "is_final_step": step == 1,
418
+ "reasoning": decision.get("reasoning", "N/A"),
419
+ "action_details": decision.get("action_details", {"action": "N/A"}),
420
+ "history": history.copy(), # History up to current step (excluding current)
421
+ "debug_message": decision.get("debug_message", "N/A"),
422
+ "current_prediction": decision.get("current_prediction", "N/A"),
423
+ }
424
+
425
+ action_details = decision.get("action_details", {})
426
+ action = action_details.get("action")
427
+ # print(f"AI Reasoning: {decision.get('reasoning', 'N/A')}")
428
+ # print(f"AI Current Prediction: {decision.get('current_prediction', 'N/A')}")
429
+ # print(f"AI Action: {action}")
430
+
431
+
432
+ # Add step to history AFTER callback (so next iteration has this step in history)
433
+ self.add_step_to_history(history, current_screenshot_b64, decision)
434
+
435
+ current_prediction = decision.get("current_prediction")
436
+ if current_prediction and isinstance(current_prediction, dict):
437
+ current_prediction["reasoning"] = decision.get("reasoning", "N/A")
438
+ predictions.append(current_prediction)
439
+ else:
440
+ # Fallback: create a basic prediction structure
441
+ print(f"Invalid current prediction: {current_prediction}")
442
+ fallback_prediction = {
443
+ "lat": 0.0,
444
+ "lon": 0.0,
445
+ "confidence": 0.0,
446
+ "location_description": "N/A",
447
+ "reasoning": decision.get("reasoning", "N/A")
448
+ }
449
+ predictions.append(fallback_prediction)
450
+
451
+ self.execute_action(action)
452
+
453
+ return predictions
454
+
455
  def run_agent_loop(
456
  self, max_steps: int = 10, step_callback=None
457
  ) -> Optional[Tuple[float, float]]:
 
528
  "reasoning": decision.get("reasoning", "N/A"),
529
  "action_details": decision.get("action_details", {"action": "N/A"}),
530
  "history": history.copy(), # History up to current step (excluding current)
531
+ "debug_message": decision.get("debug_message", "N/A"),
532
  }
533
 
534
  action_details = decision.get("action_details", {})
main.py CHANGED
@@ -1,11 +1,15 @@
1
  import argparse
2
  import json
 
 
3
 
4
  from geo_bot import GeoBot
5
  from benchmark import MapGuesserBenchmark
6
  from data_collector import DataCollector
7
  from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
8
-
 
 
9
 
10
  def agent_mode(
11
  model_name: str,
@@ -147,11 +151,165 @@ def collect_mode(dataset_name: str, samples: int, headless: bool):
147
  print(f"Data collection complete for dataset '{dataset_name}'.")
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def main():
151
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
152
  parser.add_argument(
153
  "--mode",
154
- choices=["agent", "benchmark", "collect"],
155
  default="agent",
156
  help="Operation mode.",
157
  )
@@ -190,6 +348,7 @@ def main():
190
  default=0.0,
191
  help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
192
  )
 
193
 
194
  args = parser.parse_args()
195
 
@@ -216,6 +375,16 @@ def main():
216
  dataset_name=args.dataset,
217
  temperature=args.temperature,
218
  )
 
 
 
 
 
 
 
 
 
 
219
 
220
 
221
  if __name__ == "__main__":
 
1
  import argparse
2
  import json
3
+ import os
4
+ from datetime import datetime
5
 
6
  from geo_bot import GeoBot
7
  from benchmark import MapGuesserBenchmark
8
  from data_collector import DataCollector
9
  from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
10
+ from collections import OrderedDict
11
+ from tqdm import tqdm
12
+ import matplotlib.pyplot as plt
13
 
14
  def agent_mode(
15
  model_name: str,
 
151
  print(f"Data collection complete for dataset '{dataset_name}'.")
152
 
153
 
154
+ def test_mode(
155
+ models: list,
156
+ samples: int,
157
+ runs: int,
158
+ steps: int,
159
+ dataset_name: str = "default",
160
+ temperature: float = 0.0,
161
+ headless: bool = True,
162
+ ):
163
+ """
164
+ CLI multi-model / multi-run benchmark.
165
+ For each model:
166
+ • run N times
167
+ • each run evaluates `samples` images
168
+ • record hit-rate per step and average distance
169
+ """
170
+
171
+ # ---------- load dataset ----------
172
+ data_paths = get_data_paths(dataset_name)
173
+ try:
174
+ with open(data_paths["golden_labels"], "r", encoding="utf-8") as f:
175
+ all_samples = json.load(f)["samples"]
176
+ except FileNotFoundError:
177
+ print(f"❌ dataset '{dataset_name}' not found.")
178
+ return
179
+
180
+ if not all_samples:
181
+ print("❌ dataset is empty.")
182
+ return
183
+
184
+ test_samples = all_samples[:samples]
185
+ print(f"📊 loaded {len(test_samples)} samples from '{dataset_name}'")
186
+
187
+ benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_name, headless=headless)
188
+ summary_by_step: dict[str, list[float]] = OrderedDict()
189
+ avg_distances: dict[str, float] = {}
190
+
191
+ time_tag = datetime.now().strftime("%Y%m%d_%H%M%S")
192
+ base_dir = os.path.join("./results", "test", time_tag)
193
+ os.makedirs(base_dir, exist_ok=True)
194
+ # ---------- iterate over models ----------
195
+ for model_name in models:
196
+ log_json={}
197
+ print(f"\n===== {model_name} =====")
198
+ cfg = MODELS_CONFIG[model_name]
199
+ model_cls = get_model_class(cfg["class"])
200
+
201
+ hits_per_step = [0] * steps
202
+ distance_per_step = [0.0] * steps
203
+ total_iterations = runs * len(test_samples)
204
+
205
+ with tqdm(total=total_iterations, desc=model_name) as pbar:
206
+ for _ in range(runs):
207
+ with GeoBot(
208
+ model=model_cls,
209
+ model_name=cfg["model_name"],
210
+ headless=headless,
211
+ temperature=temperature,
212
+ ) as bot:
213
+ for sample in test_samples:
214
+ if not bot.controller.load_location_from_data(sample):
215
+ pbar.update(1)
216
+ continue
217
+
218
+ preds = bot.test_run_agent_loop(max_steps=steps)
219
+ gt = {"lat": sample["lat"], "lng": sample["lng"]}
220
+ if sample["id"] not in log_json:
221
+ log_json[sample["id"]] = []
222
+
223
+ for idx, pred in enumerate(preds):
224
+
225
+ if isinstance(pred, dict) and "lat" in pred:
226
+ dist = benchmark_helper.calculate_distance(
227
+ gt, (pred["lat"], pred["lon"])
228
+ )
229
+ if dist is not None:
230
+ distance_per_step[idx] += dist
231
+ preds[idx]["distance"] = dist
232
+ if dist <= SUCCESS_THRESHOLD_KM:
233
+ hits_per_step[idx] += 1
234
+ preds[idx]["success"] = True
235
+ else:
236
+ preds[idx]["success"] = False
237
+ log_json[sample["id"]].append({
238
+ "run_id": _,
239
+ "predictions": preds,
240
+ })
241
+ pbar.update(1)
242
+ os.makedirs(f"{base_dir}/{model_name}", exist_ok=True)
243
+ with open(f"{base_dir}/{model_name}/{model_name}_log.json", "w") as f:
244
+ json.dump(log_json, f, indent=2)
245
+ denom = runs * len(test_samples)
246
+ summary_by_step[model_name] = [h / denom for h in hits_per_step]
247
+ avg_distances[model_name] = [d / denom for d in distance_per_step]
248
+ payload = {
249
+ "avg_distance_km": avg_distances[model_name],
250
+ "accuracy_per_step": summary_by_step[model_name]
251
+ }
252
+ with open(f"{base_dir}/{model_name}/{model_name}.json", "w") as f:
253
+ json.dump(payload, f, indent=2)
254
+ print(f"💾 results saved to {base_dir}")
255
+
256
+ # ---------- pretty table ----------
257
+ header = ["Step"] + list(summary_by_step.keys())
258
+ row_width = max(len(h) for h in header) + 2
259
+ print("\n=== ACCURACY PER STEP ===")
260
+ print(" | ".join(h.center(row_width) for h in header))
261
+ print("-" * (row_width + 3) * len(header))
262
+ for i in range(steps):
263
+ cells = [str(i + 1).center(row_width)]
264
+ for m in summary_by_step:
265
+ cells.append(f"{summary_by_step[m][i]*100:5.1f}%".center(row_width))
266
+ print(" | ".join(cells))
267
+
268
+ print("\n=== AVG DISTANCE PER STEP (km) ===")
269
+ header = ["Step"] + list(avg_distances.keys())
270
+ row_w = max(len(h) for h in header) + 2
271
+ print(" | ".join(h.center(row_w) for h in header))
272
+ print("-" * (row_w + 3) * len(header))
273
+
274
+ for i in range(steps):
275
+ cells = [str(i+1).center(row_w)]
276
+ for m in avg_distances:
277
+ v = avg_distances[m][i]
278
+ cells.append(f"{v:6.1f}" if v is not None else " N/A ".center(row_w))
279
+ print(" | ".join(cells))
280
+
281
+ try:
282
+ for model, acc in summary_by_step.items():
283
+ plt.plot(range(1, steps + 1), acc, marker="o", label=model)
284
+ plt.xlabel("step")
285
+ plt.ylabel("accuracy")
286
+ plt.ylim(0, 1)
287
+ plt.legend()
288
+ plt.grid(True, alpha=0.3)
289
+ plt.title("Accuracy vs Step")
290
+ plt.savefig(f"{base_dir}/accuracy_step.png", dpi=120)
291
+ print("\n📈 saved plot to accuracy_step.png")
292
+
293
+ # Plot average distance per model
294
+ plt.figure()
295
+ for model, acc in avg_distances.items():
296
+ plt.plot(range(1, steps + 1), acc, marker="o", label=model)
297
+ plt.xlabel("step")
298
+ plt.ylabel("Avg Distance (km)")
299
+ plt.title("Average Distance per Model")
300
+ plt.xticks(rotation=45, ha="right")
301
+ plt.tight_layout()
302
+ plt.savefig(f"{base_dir}/avg_distance.png", dpi=120)
303
+ print("📈 saved plot to avg_distance.png")
304
+ except Exception as e:
305
+ print(f"⚠️ plot skipped: {e}")
306
+
307
+
308
  def main():
309
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
310
  parser.add_argument(
311
  "--mode",
312
+ choices=["agent", "benchmark", "collect", "test"],
313
  default="agent",
314
  help="Operation mode.",
315
  )
 
348
  default=0.0,
349
  help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
350
  )
351
+ parser.add_argument("--runs", type=int, default=3, help="[Test] Runs per model")
352
 
353
  args = parser.parse_args()
354
 
 
375
  dataset_name=args.dataset,
376
  temperature=args.temperature,
377
  )
378
+ elif args.mode == "test":
379
+ test_mode(
380
+ models=args.models or [args.model],
381
+ samples=args.samples,
382
+ runs=args.runs,
383
+ steps=args.steps,
384
+ dataset_name=args.dataset,
385
+ temperature=args.temperature,
386
+ headless=args.headless,
387
+ )
388
 
389
 
390
  if __name__ == "__main__":
mapcrunch_controller.py CHANGED
@@ -11,53 +11,80 @@ from config import MAPCRUNCH_URL, SELECTORS, DATA_COLLECTION_CONFIG
11
 
12
  class MapCrunchController:
13
  def __init__(self, headless: bool = False):
14
- options = uc.ChromeOptions()
15
- options.add_argument(
16
- "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
17
- )
18
- options.add_argument("--window-size=1920,1080")
19
- options.set_capability("goog:loggingPrefs", {"browser": "ALL"})
20
-
21
- if headless:
22
- options.add_argument("--headless=new")
 
 
 
 
 
23
 
24
- self.driver = uc.Chrome(options=options, use_subprocess=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  self.wait = WebDriverWait(self.driver, 10)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
 
27
- # Here we are injecting a script to the page to disable the browser detection.
28
- # Basically, we are setting the badBrowser property to 0, which is a property that is used to detect if the browser is being controlled by a script.
29
- # In the main.min.js, we can see some js code like this:
30
- # if (badBrowser) {
31
- # alert("Unsupported browser!");
32
- # } else {
33
- # window.panorama = { ... }
34
- # }
35
- self.driver.execute_cdp_cmd(
36
- "Page.addScriptToEvaluateOnNewDocument",
37
- {
38
- "source": """
39
- Object.defineProperty(window, 'badBrowser', {
40
- value: 0,
41
- writable: false,
42
- configurable: false
43
- });
44
- window.alert = function() {};
45
- Object.defineProperty(navigator, 'webdriver', {
46
- get: () => undefined
47
- });
48
- """
49
- },
50
- )
51
-
52
  for retry in range(3):
53
  try:
54
  self.driver.get(MAPCRUNCH_URL)
55
  time.sleep(3)
 
56
  break
57
  except Exception as e:
58
  if retry == 2:
59
  raise e
60
- print(f"Failed to load MapCrunch, retry {retry + 1}/3")
61
  time.sleep(2)
62
 
63
  def setup_clean_environment(self):
@@ -187,6 +214,16 @@ class MapCrunchController:
187
  base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
188
  return base_actions
189
 
 
 
 
 
 
 
 
 
 
 
190
  def get_current_address(self) -> Optional[str]:
191
  try:
192
  address_element = self.wait.until(
 
11
 
12
  class MapCrunchController:
13
  def __init__(self, headless: bool = False):
14
+ # Try to initialize ChromeDriver with version 137 (your current Chrome version)
15
+ try:
16
+ # Create fresh ChromeOptions for first attempt
17
+ options = uc.ChromeOptions()
18
+ options.add_argument("--no-sandbox")
19
+ options.add_argument("--disable-dev-shm-usage")
20
+ options.add_argument("--disable-gpu")
21
+ options.add_argument("--window-size=1920,1080")
22
+ options.add_argument("--disable-web-security")
23
+ options.add_argument("--disable-features=VizDisplayCompositor")
24
+ options.add_argument("--disable-blink-features=AutomationControlled")
25
+
26
+ if headless:
27
+ options.add_argument("--headless=new")
28
 
29
+ self.driver = uc.Chrome(options=options, use_subprocess=True, version_main=137)
30
+ print("✅ ChromeDriver initialized successfully with version 137")
31
+ except Exception as e:
32
+ print(f"Failed with version 137: {e}")
33
+ try:
34
+ # Create fresh ChromeOptions for fallback attempt
35
+ options = uc.ChromeOptions()
36
+ options.add_argument("--no-sandbox")
37
+ options.add_argument("--disable-dev-shm-usage")
38
+ options.add_argument("--disable-gpu")
39
+ options.add_argument("--window-size=1920,1080")
40
+ options.add_argument("--disable-web-security")
41
+ options.add_argument("--disable-features=VizDisplayCompositor")
42
+ options.add_argument("--disable-blink-features=AutomationControlled")
43
+
44
+ if headless:
45
+ options.add_argument("--headless=new")
46
+
47
+ # Fallback to auto-detection
48
+ self.driver = uc.Chrome(options=options, use_subprocess=True)
49
+ print("✅ ChromeDriver initialized successfully with auto-detection")
50
+ except Exception as e2:
51
+ print(f"Failed with auto-detection: {e2}")
52
+ raise Exception(f"Could not initialize ChromeDriver. Please update Chrome or check compatibility. Errors: {e}, {e2}")
53
+
54
  self.wait = WebDriverWait(self.driver, 10)
55
+
56
+ # Inject browser detection bypass script
57
+ try:
58
+ self.driver.execute_cdp_cmd(
59
+ "Page.addScriptToEvaluateOnNewDocument",
60
+ {
61
+ "source": """
62
+ Object.defineProperty(window, 'badBrowser', {
63
+ value: 0,
64
+ writable: false,
65
+ configurable: false
66
+ });
67
+ window.alert = function() {};
68
+ Object.defineProperty(navigator, 'webdriver', {
69
+ get: () => undefined
70
+ });
71
+ """
72
+ },
73
+ )
74
+ except Exception as e:
75
+ print(f"Warning: Could not inject browser detection script: {e}")
76
 
77
+ # Load MapCrunch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  for retry in range(3):
79
  try:
80
  self.driver.get(MAPCRUNCH_URL)
81
  time.sleep(3)
82
+ print("✅ MapCrunch loaded successfully")
83
  break
84
  except Exception as e:
85
  if retry == 2:
86
  raise e
87
+ print(f"Failed to load MapCrunch, retry {retry + 1}/3: {e}")
88
  time.sleep(2)
89
 
90
  def setup_clean_environment(self):
 
214
  base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
215
  return base_actions
216
 
217
+ def get_test_available_actions(self) -> List[str]:
218
+ """
219
+ Checks for movement links via JavaScript.
220
+ """
221
+ base_actions = ["PAN_LEFT", "PAN_RIGHT"]
222
+ links = self.driver.execute_script("return window.panorama.getLinks();")
223
+ if links and len(links) > 0:
224
+ base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
225
+ return base_actions
226
+
227
  def get_current_address(self) -> Optional[str]:
228
  try:
229
  address_element = self.wait.until(