Spaces:

Omniscient001
/

Omniscient

Sleeping

@@ -2,6 +2,8 @@ import streamlit as st
 import json
 import os
 import time
 import re
 from pathlib import Path
@@ -67,7 +69,7 @@ with st.sidebar:
     st.header("Configuration")
     # Mode selection
-    mode = st.radio("Mode", ["Dataset Mode", "Online Mode"], index=0)
     if mode == "Dataset Mode":
         # Get available datasets and ensure we have a valid default
@@ -114,6 +116,43 @@ with st.sidebar:
         num_samples = st.slider(
             "Samples to Test", 1, len(golden_labels), min(3, len(golden_labels))
         )
     else:  # Online Mode
         st.info("Enter a URL to analyze a specific location")
@@ -211,219 +250,319 @@ with st.sidebar:
             help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
         )
     start_button = st.button("🚀 Start", type="primary")
 # Main Logic
 if start_button:
-    test_samples = golden_labels[:num_samples]
-    config = MODELS_CONFIG[model_choice]
-    model_class = get_model_class(config["class"])
-    benchmark_helper = MapGuesserBenchmark(
-        dataset_name=dataset_choice if mode == "Dataset Mode" else "online"
-    )
-    all_results = []
-    progress_bar = st.progress(0)
-    with GeoBot(
-        model=model_class,
-        model_name=config["model_name"],
-        headless=True,
-        temperature=temperature,
-    ) as bot:
-        for i, sample in enumerate(test_samples):
-            st.divider()
-            st.header(f"Sample {i + 1}/{num_samples}")
-            if mode == "Online Mode":
-                # Load the MapCrunch URL directly
-                bot.controller.load_url(sample["url"])
-            else:
-                # Load from dataset as before
-                bot.controller.load_location_from_data(sample)
-            bot.controller.setup_clean_environment()
-            # Create containers for UI updates
-            sample_container = st.container()
-            # Initialize UI state for this sample
-            step_containers = {}
-            sample_steps_data = []
-            def ui_step_callback(step_info):
-                """Callback function to update UI after each step"""
-                step_num = step_info["step_num"]
-                # Store step data
-                sample_steps_data.append(step_info)
-                with sample_container:
-                    # Create step container if it doesn't exist
-                    if step_num not in step_containers:
-                        step_containers[step_num] = st.container()
-                    with step_containers[step_num]:
-                        st.subheader(f"Step {step_num}/{step_info['max_steps']}")
-                        col1, col2 = st.columns([1, 2])
-                        with col1:
-                            # Display screenshot
-                            st.image(
-                                step_info["screenshot_bytes"],
-                                caption=f"What AI sees - Step {step_num}",
-                                use_column_width=True,
-                            )
-                        with col2:
-                            # Show available actions
-                            st.write("**Available Actions:**")
-                            st.code(
-                                json.dumps(step_info["available_actions"], indent=2)
-                            )
-                            # Show history context - use the history from step_info
-                            current_history = step_info.get("history", [])
-                            history_text = bot.generate_history_text(current_history)
-                            st.write("**AI Context:**")
-                            st.text_area(
-                                "History",
-                                history_text,
-                                height=100,
-                                disabled=True,
-                                key=f"history_{i}_{step_num}",
-                            )
-                            # Show AI reasoning and action
-                            action = step_info.get("action_details", {}).get(
-                                "action", "N/A"
-                            )
-                            if step_info.get("is_final_step") and action != "GUESS":
-                                st.warning("Max steps reached. Forcing GUESS.")
-                            st.write("**AI Reasoning:**")
-                            st.info(step_info.get("reasoning", "N/A"))
-                            st.write("**AI Action:**")
-                            if action == "GUESS":
-                                lat = step_info.get("action_details", {}).get("lat")
-                                lon = step_info.get("action_details", {}).get("lon")
-                                st.success(f"`{action}` - {lat:.4f}, {lon:.4f}")
-                            else:
-                                st.success(f"`{action}`")
-                            # Show decision details for debugging
-                            with st.expander("Decision Details"):
-                                decision_data = {
-                                    "reasoning": step_info.get("reasoning"),
-                                    "action_details": step_info.get("action_details"),
-                                    "remaining_steps": step_info.get("remaining_steps"),
-                                }
-                                st.json(decision_data)
-                # Force UI refresh
-                time.sleep(0.5)  # Small delay to ensure UI updates are visible
-            # Run the agent loop with UI callback
-            try:
-                final_guess = bot.run_agent_loop(
-                    max_steps=steps_per_sample, step_callback=ui_step_callback
-                )
-            except Exception as e:
-                st.error(f"Error during agent execution: {e}")
-                final_guess = None
-            # Sample Results
-            with sample_container:
-                st.subheader("Sample Result")
-                true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
-                distance_km = None
-                is_success = False
-                if final_guess:
-                    distance_km = benchmark_helper.calculate_distance(
-                        true_coords, final_guess
-                    )
-                    if distance_km is not None:
-                        is_success = distance_km <= SUCCESS_THRESHOLD_KM
-                    col1, col2, col3 = st.columns(3)
-                    col1.metric(
-                        "Final Guess", f"{final_guess[0]:.3f}, {final_guess[1]:.3f}"
-                    )
-                    col2.metric(
-                        "Ground Truth",
-                        f"{true_coords['lat']:.3f}, {true_coords['lng']:.3f}",
                     )
-                    col3.metric(
-                        "Distance",
-                        f"{distance_km:.1f} km",
-                        delta="Success" if is_success else "Failed",
                     )
-                else:
-                    st.error("No final guess made")
-                all_results.append(
-                    {
-                        "sample_id": sample.get("id"),
-                        "model": model_choice,
-                        "steps_taken": len(sample_steps_data),
-                        "max_steps": steps_per_sample,
-                        "temperature": temperature,
-                        "true_coordinates": true_coords,
-                        "predicted_coordinates": final_guess,
-                        "distance_km": distance_km,
-                        "success": is_success,
-                    }
-                )
-            progress_bar.progress((i + 1) / num_samples)
-    # Final Summary
-    st.divider()
-    st.header("🏁 Final Results")
-    # Calculate summary stats
-    successes = [r for r in all_results if r["success"]]
-    success_rate = len(successes) / len(all_results) if all_results else 0
-    valid_distances = [
-        r["distance_km"] for r in all_results if r["distance_km"] is not None
-    ]
-    avg_distance = sum(valid_distances) / len(valid_distances) if valid_distances else 0
-    # Overall metrics
-    col1, col2, col3 = st.columns(3)
-    col1.metric("Success Rate", f"{success_rate * 100:.1f}%")
-    col2.metric("Average Distance", f"{avg_distance:.1f} km")
-    col3.metric("Total Samples", len(all_results))
-    # Detailed results table
-    st.subheader("Detailed Results")
-    st.dataframe(all_results, use_container_width=True)
-    # Success/failure breakdown
-    if successes:
-        st.subheader("✅ Successful Samples")
-        st.dataframe(successes, use_container_width=True)
-    failures = [r for r in all_results if not r["success"]]
-    if failures:
-        st.subheader("❌ Failed Samples")
-        st.dataframe(failures, use_container_width=True)
-    # Export functionality
-    if st.button("💾 Export Results"):
-        results_json = json.dumps(all_results, indent=2)
-        st.download_button(
-            label="Download results.json",
-            data=results_json,
-            file_name=f"geo_results_{dataset_choice}_{model_choice}_{num_samples}samples.json",
-            mime="application/json",
-        )
 def handle_tab_completion():

 import json
 import os
 import time
+import pandas as pd
+import altair as alt
 import re
 from pathlib import Path
     st.header("Configuration")
     # Mode selection
+    mode = st.radio("Mode", ["Dataset Mode", "Online Mode", "Test Mode"], index=0)
     if mode == "Dataset Mode":
         # Get available datasets and ensure we have a valid default
         num_samples = st.slider(
             "Samples to Test", 1, len(golden_labels), min(3, len(golden_labels))
         )
+    elif mode == "Test Mode":
+        st.info("🔬 Multi-Model Benchmark Testing")
+        available_datasets = get_available_datasets()
+        dataset_choice = st.selectbox("Dataset", available_datasets, index=0)
+        selected_models = st.multiselect(
+            "Select Models to Compare",
+            list(MODELS_CONFIG.keys()),
+            default=[DEFAULT_MODEL],
+        )
+        if not selected_models:
+            st.warning("Please select at least one model to run the test.")
+            st.stop()
+        steps_per_sample = st.slider("Max Steps", 1, 50, 10)
+        temperature = st.slider(
+            "Temperature",
+            0.0,
+            2.0,
+            DEFAULT_TEMPERATURE,
+            0.1,
+            help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
+        )
+        # load dataset
+        data_paths = get_data_paths(dataset_choice)
+        try:
+            with open(data_paths["golden_labels"], "r") as f:
+                golden_labels = json.load(f).get("samples", [])
+            st.success(f"Dataset '{dataset_choice}' loaded with {len(golden_labels)} samples")
+        except Exception as e:
+            st.error(f"Error loading dataset '{dataset_choice}': {str(e)}")
+            st.stop()
+        num_samples = st.slider("Samples per Run", 1, len(golden_labels), min(10, len(golden_labels)))
+        runs_per_model = st.slider("Runs per Model", 1, 10, 5)
     else:  # Online Mode
         st.info("Enter a URL to analyze a specific location")
             help="Controls randomness in AI responses. 0.0 = deterministic, higher = more creative",
         )
+    # common start button
     start_button = st.button("🚀 Start", type="primary")
 # Main Logic
 if start_button:
+    if mode == "Test Mode":
+        benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_choice)
+        summary_by_step = {}
+        avg_distance_by_step = {}
+        progress_bar = st.progress(0)
+        for mi, model_name in enumerate(selected_models):
+            st.header(f"Model: {model_name}")
+            config = MODELS_CONFIG[model_name]
+            model_class = get_model_class(config["class"])
+            successes_per_step = [0]*steps_per_sample
+            dist_sum_per_step = [0.0]*steps_per_sample
+            dist_cnt_per_step = [0]*steps_per_sample
+            total_iterations = runs_per_model * num_samples
+            model_bar = st.progress(0, text=f"Running {model_name}")
+            iteration_counter = 0
+            for run_idx in range(runs_per_model):
+                with GeoBot(model=model_class, model_name=config["model_name"], headless=True, temperature=temperature) as bot:
+                    for si, sample in enumerate(golden_labels[:num_samples]):
+                        if not bot.controller.load_location_from_data(sample):
+                            iteration_counter += 1
+                            model_bar.progress(iteration_counter/total_iterations)
+                            continue
+                        predictions = bot.test_run_agent_loop(max_steps=steps_per_sample)
+                        true_coords = {"lat": sample["lat"], "lng": sample["lng"]}
+                        for step_idx, pred in enumerate(predictions):
+                            if isinstance(pred, dict) and "lat" in pred:
+                                dist = benchmark_helper.calculate_distance(true_coords, (pred["lat"], pred["lon"]))
+                                if dist is not None:
+                                    # 新增：累计距离与计数
+                                    dist_sum_per_step[step_idx] += dist
+                                    dist_cnt_per_step[step_idx] += 1
+                                    # 原有：成功数
+                                    if dist <= SUCCESS_THRESHOLD_KM:
+                                        successes_per_step[step_idx] += 1
+                        iteration_counter += 1
+                        model_bar.progress(iteration_counter/total_iterations)
+            acc_per_step = [s/(num_samples*runs_per_model) for s in successes_per_step]
+            summary_by_step[model_name] = acc_per_step
+            avg_per_step = [
+                (dist_sum_per_step[i]/dist_cnt_per_step[i]) if dist_cnt_per_step[i] else None
+                for i in range(steps_per_sample)
+            ]
+            avg_distance_by_step[model_name] = avg_per_step
+            progress_bar.progress((mi+1)/len(selected_models))
+        # plot
+        st.subheader("Accuracy vs Steps")
+        # summary_by_step {model: [acc_step1, acc_step2, ...]}
+        df_wide = pd.DataFrame(summary_by_step)
+        df_long = (
+            df_wide
+            .reset_index(names="Step")
+            .melt(id_vars="Step", var_name="Model", value_name="Accuracy")
+        )
+        chart = (
+            alt.Chart(df_long)
+            .mark_line(point=True)
+            .encode(
+                x=alt.X("Step:O", title="Step #"),
+                y=alt.Y("Accuracy:Q", title="Accuracy", scale=alt.Scale(domain=[0, 1])),
+                color=alt.Color("Model:N", title="Model"),
+                tooltip=["Model:N", "Step:O", alt.Tooltip("Accuracy:Q", format=".2%")],
+            )
+            .properties(width=700, height=400)
+        )
+        st.altair_chart(chart, use_container_width=True)
+        st.subheader("Average Distance vs Steps (km)")
+        df_wide_dist = pd.DataFrame(avg_distance_by_step)
+        df_long_dist = (
+            df_wide_dist
+            .reset_index(names="Step")
+            .melt(id_vars="Step", var_name="Model", value_name="AvgDistanceKm")
+        )
+        dist_chart = (
+            alt.Chart(df_long_dist)
+            .mark_line(point=True)
+            .encode(
+                x=alt.X("Step:O", title="Step #"),
+                y=alt.Y("AvgDistanceKm:Q", title="Avg Distance (km)", scale=alt.Scale(zero=True)),
+                color=alt.Color("Model:N", title="Model"),
+                tooltip=["Model:N", "Step:O", alt.Tooltip("AvgDistanceKm:Q", format=".1f")],
+            )
+            .properties(width=700, height=400)
+        )
+        st.altair_chart(dist_chart, use_container_width=True)
+        st.stop()
+    else:
+        test_samples = golden_labels[:num_samples]
+        config = MODELS_CONFIG[model_choice]
+        model_class = get_model_class(config["class"])
+        benchmark_helper = MapGuesserBenchmark(
+            dataset_name=dataset_choice if mode == "Dataset Mode" else "online"
+        )
+        all_results = []
+        progress_bar = st.progress(0)
+        with GeoBot(
+            model=model_class,
+            model_name=config["model_name"],
+            headless=True,
+            temperature=temperature,
+        ) as bot:
+            for i, sample in enumerate(test_samples):
+                st.divider()
+                st.header(f"Sample {i + 1}/{num_samples}")
+                if mode == "Online Mode":
+                    # Load the MapCrunch URL directly
+                    bot.controller.load_url(sample["url"])
+                else:
+                    # Load from dataset as before
+                    bot.controller.load_location_from_data(sample)
+                bot.controller.setup_clean_environment()
+                # Create containers for UI updates
+                sample_container = st.container()
+                # Initialize UI state for this sample
+                step_containers = {}
+                sample_steps_data = []
+                def ui_step_callback(step_info):
+                    """Callback function to update UI after each step"""
+                    step_num = step_info["step_num"]
+                    # Store step data
+                    sample_steps_data.append(step_info)
+                    with sample_container:
+                        # Create step container if it doesn't exist
+                        if step_num not in step_containers:
+                            step_containers[step_num] = st.container()
+                        with step_containers[step_num]:
+                            st.subheader(f"Step {step_num}/{step_info['max_steps']}")
+                            col1, col2 = st.columns([1, 2])
+                            with col1:
+                                # Display screenshot
+                                st.image(
+                                    step_info["screenshot_bytes"],
+                                    caption=f"What AI sees - Step {step_num}",
+                                    use_column_width=True,
+                                )
+                            with col2:
+                                # Show available actions
+                                st.write("**Available Actions:**")
+                                st.code(
+                                    json.dumps(step_info["available_actions"], indent=2)
+                                )
+                                # Show history context - use the history from step_info
+                                current_history = step_info.get("history", [])
+                                history_text = bot.generate_history_text(current_history)
+                                st.write("**AI Context:**")
+                                st.text_area(
+                                    "History",
+                                    history_text,
+                                    height=100,
+                                    disabled=True,
+                                    key=f"history_{i}_{step_num}",
+                                )
+                                # Show AI reasoning and action
+                                action = step_info.get("action_details", {}).get(
+                                    "action", "N/A"
+                                )
+                                if step_info.get("is_final_step") and action != "GUESS":
+                                    st.warning("Max steps reached. Forcing GUESS.")
+                                st.write("**AI Reasoning:**")
+                                st.info(step_info.get("reasoning", "N/A"))
+                                if step_info.get("debug_message") != "N/A":
+                                    st.write("**AI Debug Message:**")
+                                    st.code(step_info.get("debug_message"), language="json")
+                                st.write("**AI Action:**")
+                                if action == "GUESS":
+                                    lat = step_info.get("action_details", {}).get("lat")
+                                    lon = step_info.get("action_details", {}).get("lon")
+                                    st.success(f"`{action}` - {lat:.4f}, {lon:.4f}")
+                                else:
+                                    st.success(f"`{action}`")
+                                # Show decision details for debugging
+                                with st.expander("Decision Details"):
+                                    decision_data = {
+                                        "reasoning": step_info.get("reasoning"),
+                                        "action_details": step_info.get("action_details"),
+                                        "remaining_steps": step_info.get("remaining_steps"),
+                                    }
+                                    st.json(decision_data)
+                    # Force UI refresh
+                    time.sleep(0.5)  # Small delay to ensure UI updates are visible
+                # Run the agent loop with UI callback
+                try:
+                    final_guess = bot.run_agent_loop(
+                        max_steps=steps_per_sample, step_callback=ui_step_callback
                     )
+                except Exception as e:
+                    st.error(f"Error during agent execution: {e}")
+                    final_guess = None
+                # Sample Results
+                with sample_container:
+                    st.subheader("Sample Result")
+                    true_coords = {"lat": sample.get("lat"), "lng": sample.get("lng")}
+                    distance_km = None
+                    is_success = False
+                    if final_guess:
+                        distance_km = benchmark_helper.calculate_distance(
+                            true_coords, final_guess
+                        )
+                        if distance_km is not None:
+                            is_success = distance_km <= SUCCESS_THRESHOLD_KM
+                        col1, col2, col3 = st.columns(3)
+                        col1.metric(
+                            "Final Guess", f"{final_guess[0]:.3f}, {final_guess[1]:.3f}"
+                        )
+                        col2.metric(
+                            "Ground Truth",
+                            f"{true_coords['lat']:.3f}, {true_coords['lng']:.3f}",
+                        )
+                        col3.metric(
+                            "Distance",
+                            f"{distance_km:.1f} km",
+                            delta="Success" if is_success else "Failed",
+                        )
+                    else:
+                        st.error("No final guess made")
+                    all_results.append(
+                        {
+                            "sample_id": sample.get("id"),
+                            "model": model_choice,
+                            "steps_taken": len(sample_steps_data),
+                            "max_steps": steps_per_sample,
+                            "temperature": temperature,
+                            "true_coordinates": true_coords,
+                            "predicted_coordinates": final_guess,
+                            "distance_km": distance_km,
+                            "success": is_success,
+                        }
                     )
+                progress_bar.progress((i + 1) / num_samples)
+        # Final Summary
+        st.divider()
+        st.header("🏁 Final Results")
+        # Calculate summary stats
+        successes = [r for r in all_results if r["success"]]
+        success_rate = len(successes) / len(all_results) if all_results else 0
+        valid_distances = [
+            r["distance_km"] for r in all_results if r["distance_km"] is not None
+        ]
+        avg_distance = sum(valid_distances) / len(valid_distances) if valid_distances else 0
+        # Overall metrics
+        col1, col2, col3 = st.columns(3)
+        col1.metric("Success Rate", f"{success_rate * 100:.1f}%")
+        col2.metric("Average Distance", f"{avg_distance:.1f} km")
+        col3.metric("Total Samples", len(all_results))
+        # Detailed results table
+        st.subheader("Detailed Results")
+        st.dataframe(all_results, use_container_width=True)
+        # Success/failure breakdown
+        if successes:
+            st.subheader("✅ Successful Samples")
+            st.dataframe(successes, use_container_width=True)
+        failures = [r for r in all_results if not r["success"]]
+        if failures:
+            st.subheader("❌ Failed Samples")
+            st.dataframe(failures, use_container_width=True)
+        # Export functionality
+        if st.button("💾 Export Results"):
+            results_json = json.dumps(all_results, indent=2)
+            st.download_button(
+                label="Download results.json",
+                data=results_json,
+                file_name=f"geo_results_{dataset_choice}_{model_choice}_{num_samples}samples.json",
+                mime="application/json",
+            )
 def handle_tab_completion():

benchmark.py CHANGED Viewed

@@ -99,6 +99,9 @@ class MapGuesserBenchmark:
                         print(f"📍 Sample {i + 1}/{len(test_samples)}")
                         try:
                             result = self.run_single_test_with_bot(bot, sample)
                             all_results.append(result)
                             status = (
@@ -154,6 +157,8 @@ class MapGuesserBenchmark:
             }
         predicted_lat_lon = bot.analyze_image(screenshot)
         inference_time = time.time() - start_time
         true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
@@ -163,7 +168,7 @@ class MapGuesserBenchmark:
         print(f"🔍 True coords: {true_coords}")
         print(f"🔍 Predicted coords: {predicted_lat_lon}")
         distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
         is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
         return {

                         print(f"📍 Sample {i + 1}/{len(test_samples)}")
                         try:
                             result = self.run_single_test_with_bot(bot, sample)
+                            if result is None:
+                                print(f"❌ Sample_{i+1} test failed: No predicted coords")
+                                continue
                             all_results.append(result)
                             status = (
             }
         predicted_lat_lon = bot.analyze_image(screenshot)
+        if predicted_lat_lon is None:
+            return None
         inference_time = time.time() - start_time
         true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
         print(f"🔍 True coords: {true_coords}")
         print(f"🔍 Predicted coords: {predicted_lat_lon}")
         distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
         is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
         return {

config.py CHANGED Viewed

@@ -38,12 +38,12 @@ DEFAULT_TEMPERATURE = 1.0
 # Model configurations
 MODELS_CONFIG = {
     "gpt-4o": {
-        "class": "ChatOpenAI",
         "model_name": "gpt-4o",
         "description": "OpenAI GPT-4o",
     },
     "gpt-4o-mini": {
-        "class": "ChatOpenAI",
         "model_name": "gpt-4o-mini",
         "description": "OpenAI GPT-4o Mini",
     },

 # Model configurations
 MODELS_CONFIG = {
     "gpt-4o": {
+        "class": "OpenRouter",
         "model_name": "gpt-4o",
         "description": "OpenAI GPT-4o",
     },
     "gpt-4o-mini": {
+        "class": "OpenRouter",
         "model_name": "gpt-4o-mini",
         "description": "OpenAI GPT-4o Mini",
     },

datasets/test/golden_labels.json ADDED Viewed

	@@ -0,0 +1,759 @@

+{
+  "metadata": {
+    "dataset_name": "test",
+    "collection_date": "2025-08-07T12:43:38.026706",
+    "collection_options": {}
+  },
+  "samples": [
+    {
+      "id": "d6250b7f-4da5-42c1-8c8d-0423e67e77be",
+      "timestamp": "2025-08-07T12:40:37.875459",
+      "lat": 47.66613320444537,
+      "lng": 26.011012145674016,
+      "address": "3 DJ178A, Suceava County",
+      "pano_id": "PdysAkpL3AJpCWhOBC5GwQ",
+      "pov": {
+        "heading": -238.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "47.666133_26.011012_-238.15_-5.00_0",
+      "thumbnail_path": "d6250b7f-4da5-42c1-8c8d-0423e67e77be.jpg"
+    },
+    {
+      "id": "3bb51463-0a02-4ce4-9e61-6e0f28491897",
+      "timestamp": "2025-08-07T12:40:41.467083",
+      "lat": -5.239479425072147,
+      "lng": 71.82213288625383,
+      "address": "British Indian Ocean Territory",
+      "pano_id": "NYwG1Ym4ZqDLb5APiRn2Eg",
+      "pov": {
+        "heading": -48.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-5.239479_71.822133_-48.15_-5.00_0",
+      "thumbnail_path": "3bb51463-0a02-4ce4-9e61-6e0f28491897.jpg"
+    },
+    {
+      "id": "4acf7d7e-8309-4e57-88b2-1ea1019c1719",
+      "timestamp": "2025-08-07T12:40:45.049516",
+      "lat": 44.577090525370025,
+      "lng": 132.8105749539221,
+      "address": "Spassk-Dalny, Primorsky Krai",
+      "pano_id": "l1mVXN5S-foFa6foYGvMaQ",
+      "pov": {
+        "heading": 100.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "44.577091_132.810575_100.85_-5.00_0",
+      "thumbnail_path": "4acf7d7e-8309-4e57-88b2-1ea1019c1719.jpg"
+    },
+    {
+      "id": "c4d4352f-6285-42c1-bbae-231ca95da48a",
+      "timestamp": "2025-08-07T12:40:48.577565",
+      "lat": -14.173449381539905,
+      "lng": -169.67773654813135,
+      "address": "Rte 20, Ofu, Manu'a District",
+      "pano_id": "GTAdq2n3eUJ33lWb-gJ5BA",
+      "pov": {
+        "heading": 321.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-14.173449_-169.677737_321.85_-5.00_0",
+      "thumbnail_path": "c4d4352f-6285-42c1-bbae-231ca95da48a.jpg"
+    },
+    {
+      "id": "b4b889cf-008f-4b71-b901-bca7b3de3951",
+      "timestamp": "2025-08-07T12:40:52.470733",
+      "lat": 44.83634227352461,
+      "lng": -91.46694086852327,
+      "address": "3048 Winsor Dr, Eau Claire, Wisconsin",
+      "pano_id": "teO7CeeojzPT4y6Dz5V4qg",
+      "pov": {
+        "heading": -244.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "44.836342_-91.466941_-244.15_-5.00_0",
+      "thumbnail_path": "b4b889cf-008f-4b71-b901-bca7b3de3951.jpg"
+    },
+    {
+      "id": "7a606d59-46f3-4522-b2be-2e5a5576e155",
+      "timestamp": "2025-08-07T12:40:56.350929",
+      "lat": 32.28575621196474,
+      "lng": -64.77437787828177,
+      "address": "Bermuda Tourism",
+      "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQ3VnTGZLUmc.",
+      "pov": {
+        "heading": -26.149999999999977,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "32.285756_-64.774378_-26.15_-5.00_0",
+      "thumbnail_path": "7a606d59-46f3-4522-b2be-2e5a5576e155.jpg"
+    },
+    {
+      "id": "262d348a-a60a-42d8-bd4f-68aafe98d1fb",
+      "timestamp": "2025-08-07T12:40:59.894064",
+      "lat": 11.275626069517537,
+      "lng": 104.8745358584606,
+      "address": "Tak\u00e9o Province",
+      "pano_id": "5Y1dyapSF2NxjeB4-ucZkA",
+      "pov": {
+        "heading": 134.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "11.275626_104.874536_134.85_-5.00_0",
+      "thumbnail_path": "262d348a-a60a-42d8-bd4f-68aafe98d1fb.jpg"
+    },
+    {
+      "id": "09ce31a1-a719-4ed9-a344-7987214902c1",
+      "timestamp": "2025-08-07T12:41:03.536576",
+      "lat": -17.362187562805154,
+      "lng": -63.14684639831983,
+      "address": "Camino Montero, Santa Cruz Department",
+      "pano_id": "51rbDOTj6SCtSi9vyN0-Pg",
+      "pov": {
+        "heading": -336.15,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-17.362188_-63.146846_-336.15_-5.00_0",
+      "thumbnail_path": "09ce31a1-a719-4ed9-a344-7987214902c1.jpg"
+    },
+    {
+      "id": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb",
+      "timestamp": "2025-08-07T12:41:06.979405",
+      "lat": -21.458641127651422,
+      "lng": -47.59839773953906,
+      "address": "12160 Rodovia Conde Francisco Matarazzo J\u00fanior, State of S\u00e3o Paulo",
+      "pano_id": "PTMrd1Xosg9QO25i58gjAg",
+      "pov": {
+        "heading": -204.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-21.458641_-47.598398_-204.15_-5.00_0",
+      "thumbnail_path": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb.jpg"
+    },
+    {
+      "id": "1929ea7c-af27-42d0-9931-66d5ad451d21",
+      "timestamp": "2025-08-07T12:41:10.587109",
+      "lat": -54.00998792650971,
+      "lng": -67.6803410996465,
+      "address": "17 RP F, Tierra del Fuego Province",
+      "pano_id": "HsUaaUVcACNjAhgLP2_YOg",
+      "pov": {
+        "heading": -73.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-54.009988_-67.680341_-73.15_-5.00_0",
+      "thumbnail_path": "1929ea7c-af27-42d0-9931-66d5ad451d21.jpg"
+    },
+    {
+      "id": "7bc2a39e-ac61-4704-a950-203117b4aca2",
+      "timestamp": "2025-08-07T12:41:14.089628",
+      "lat": 42.44808280064812,
+      "lng": 1.4936480624654318,
+      "address": "Cam\u00ed els Hortells, Sant Juli\u00e0 de L\u00f2ria",
+      "pano_id": "kqVCVi1mPVjLpeHMyN_BPQ",
+      "pov": {
+        "heading": 111.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "42.448083_1.493648_111.85_-5.00_0",
+      "thumbnail_path": "7bc2a39e-ac61-4704-a950-203117b4aca2.jpg"
+    },
+    {
+      "id": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8",
+      "timestamp": "2025-08-07T12:41:17.644882",
+      "lat": 23.57625889505424,
+      "lng": 120.55489844246863,
+      "address": "Chiayi County",
+      "pano_id": "25efsk04WLxb8UuEELh0fQ",
+      "pov": {
+        "heading": 258.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "23.576259_120.554898_258.85_-5.00_0",
+      "thumbnail_path": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8.jpg"
+    },
+    {
+      "id": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c",
+      "timestamp": "2025-08-07T12:41:21.174601",
+      "lat": 49.76842154708744,
+      "lng": 6.236529746429928,
+      "address": "12 Regioun, Heffingen, Mersch",
+      "pano_id": "VLotZqwpyqKwg2D1uRhZLA",
+      "pov": {
+        "heading": -243.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "49.768422_6.236530_-243.15_-5.00_0",
+      "thumbnail_path": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c.jpg"
+    },
+    {
+      "id": "6a5589de-e1fb-46c4-96c3-85cfb161444e",
+      "timestamp": "2025-08-07T12:41:24.747595",
+      "lat": 49.78642391720587,
+      "lng": 6.199481729741201,
+      "address": "CR118, Larochette, Mersch",
+      "pano_id": "JqZpePEOedyFAYtwUC786g",
+      "pov": {
+        "heading": -104.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "49.786424_6.199482_-104.15_-5.00_0",
+      "thumbnail_path": "6a5589de-e1fb-46c4-96c3-85cfb161444e.jpg"
+    },
+    {
+      "id": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25",
+      "timestamp": "2025-08-07T12:41:28.269300",
+      "lat": 34.0994193037527,
+      "lng": 131.95163614377708,
+      "address": "\u770c\u9053140\u53f7, Shunan, Yamaguchi",
+      "pano_id": "6HXD7J5jRsnBD5_KjCTSPw",
+      "pov": {
+        "heading": 129.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "34.099419_131.951636_129.85_-5.00_0",
+      "thumbnail_path": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25.jpg"
+    },
+    {
+      "id": "3f0e8c12-109e-4db7-a228-52a156ca880d",
+      "timestamp": "2025-08-07T12:41:32.781435",
+      "lat": 14.7694212956585,
+      "lng": -16.695508234038147,
+      "address": "Khombole, Thi\u00e8s Region",
+      "pano_id": "BjJ0cU8LxMFDFJD3vj5YYQ",
+      "pov": {
+        "heading": 241.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "14.769421_-16.695508_241.85_-5.00_0",
+      "thumbnail_path": "3f0e8c12-109e-4db7-a228-52a156ca880d.jpg"
+    },
+    {
+      "id": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772",
+      "timestamp": "2025-08-07T12:41:36.390462",
+      "lat": 49.075733818467846,
+      "lng": 19.306522463680235,
+      "address": "32 J\u00e1na Jan\u010deka, Ru\u017eomberok, \u017dilina Region",
+      "pano_id": "4EOqYYxwF--FflZHgPGu2Q",
+      "pov": {
+        "heading": -233.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "49.075734_19.306522_-233.15_-5.00_0",
+      "thumbnail_path": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772.jpg"
+    },
+    {
+      "id": "a6f20438-972f-48b0-8dc6-e95baec1c8c2",
+      "timestamp": "2025-08-07T12:41:39.931459",
+      "lat": 32.28733167935287,
+      "lng": -64.77638248243588,
+      "address": "23 Lovers Ln, Paget Parish",
+      "pano_id": "ZHTVVVlJPR35oUPiShnqHw",
+      "pov": {
+        "heading": -113.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "32.287332_-64.776382_-113.15_-5.00_0",
+      "thumbnail_path": "a6f20438-972f-48b0-8dc6-e95baec1c8c2.jpg"
+    },
+    {
+      "id": "4ee65f3b-aeaa-49d1-abda-28e270cca142",
+      "timestamp": "2025-08-07T12:41:43.438726",
+      "lat": 41.49319498028777,
+      "lng": 21.92920765772765,
+      "address": "Municipality of Rosoman",
+      "pano_id": "NcxnpDckFi3vt4-ntoF44A",
+      "pov": {
+        "heading": 99.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "41.493195_21.929208_99.85_-5.00_0",
+      "thumbnail_path": "4ee65f3b-aeaa-49d1-abda-28e270cca142.jpg"
+    },
+    {
+      "id": "3933f509-49f4-413f-b32d-95398910b3b6",
+      "timestamp": "2025-08-07T12:41:47.006438",
+      "lat": 31.875513355699223,
+      "lng": 35.492798274434385,
+      "address": "Green jericho",
+      "pano_id": "i9EnnjI_H0LQZ80DD8caeQ",
+      "pov": {
+        "heading": 328.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "31.875513_35.492798_328.85_-5.00_0",
+      "thumbnail_path": "3933f509-49f4-413f-b32d-95398910b3b6.jpg"
+    },
+    {
+      "id": "e32c0681-97bc-440e-9d8e-c1cb9511d47d",
+      "timestamp": "2025-08-07T12:41:50.873515",
+      "lat": 55.115320287969766,
+      "lng": 26.163976401890817,
+      "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
+      "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
+      "pov": {
+        "heading": -192.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
+      "thumbnail_path": "e32c0681-97bc-440e-9d8e-c1cb9511d47d.jpg"
+    },
+    {
+      "id": "15861215-f932-426b-a6fa-08ae0cd5ae54",
+      "timestamp": "2025-08-07T12:41:54.439626",
+      "lat": 55.115320287969766,
+      "lng": 26.163976401890817,
+      "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
+      "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
+      "pov": {
+        "heading": -192.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
+      "thumbnail_path": "15861215-f932-426b-a6fa-08ae0cd5ae54.jpg"
+    },
+    {
+      "id": "9a6c5a97-8501-489d-bade-f07bbcbebeea",
+      "timestamp": "2025-08-07T12:42:01.229172",
+      "lat": 40.13741279140719,
+      "lng": 19.645404417111592,
+      "address": "Dh\u00ebrmiu Beach",
+      "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJRHFqYW42YWc.",
+      "pov": {
+        "heading": 137.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "40.137413_19.645404_137.85_-5.00_0",
+      "thumbnail_path": "9a6c5a97-8501-489d-bade-f07bbcbebeea.jpg"
+    },
+    {
+      "id": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff",
+      "timestamp": "2025-08-07T12:42:04.457716",
+      "lat": 29.18167200058433,
+      "lng": -95.43500220590631,
+      "address": "Angleton, Texas",
+      "pano_id": "IayHlQ-Wr58p-_kVKSK1ug",
+      "pov": {
+        "heading": 270.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "29.181672_-95.435002_270.85_-5.00_0",
+      "thumbnail_path": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff.jpg"
+    },
+    {
+      "id": "011c76d0-d1cf-40f0-b243-3593448bce84",
+      "timestamp": "2025-08-07T12:42:07.779631",
+      "lat": 12.226344673460268,
+      "lng": 122.02353179975576,
+      "address": "Junction Guinhayaan - Malbog Port Rd, Looc, MIMAROPA",
+      "pano_id": "tAnV4HzcEaJ5IAm2Jgegiw",
+      "pov": {
+        "heading": -253.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "12.226345_122.023532_-253.15_-5.00_0",
+      "thumbnail_path": "011c76d0-d1cf-40f0-b243-3593448bce84.jpg"
+    },
+    {
+      "id": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86",
+      "timestamp": "2025-08-07T12:42:12.220880",
+      "lat": 34.062066594180294,
+      "lng": 133.86624813436472,
+      "address": "Tokushima Prefectural Rd No. 4, Higashimiyoshi, Tokushima",
+      "pano_id": "5Tp9jW_NWLnaKB_3NTeQSw",
+      "pov": {
+        "heading": -106.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "34.062067_133.866248_-106.15_-5.00_0",
+      "thumbnail_path": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86.jpg"
+    },
+    {
+      "id": "0246f9d3-be8d-40f0-805e-d0446ef2d183",
+      "timestamp": "2025-08-07T12:42:15.744386",
+      "lat": -41.21734957722994,
+      "lng": 172.11284555729617,
+      "address": "302 Rte 67, Karamea, West Coast Region",
+      "pano_id": "dcA7I3Arr0VPwKwgMxX_mQ",
+      "pov": {
+        "heading": 23.850000000000023,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-41.217350_172.112846_23.85_-5.00_0",
+      "thumbnail_path": "0246f9d3-be8d-40f0-805e-d0446ef2d183.jpg"
+    },
+    {
+      "id": "54375156-8b78-4e60-afc9-f1172deba69d",
+      "timestamp": "2025-08-07T12:42:19.157383",
+      "lat": 46.10532360891025,
+      "lng": 15.119329939077309,
+      "address": "Podkraj, Podkraj, Municipality of Hrastnik",
+      "pano_id": "4bdhb8F41Au_r8UJIG8nCQ",
+      "pov": {
+        "heading": 204.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "46.105324_15.119330_204.85_-5.00_0",
+      "thumbnail_path": "54375156-8b78-4e60-afc9-f1172deba69d.jpg"
+    },
+    {
+      "id": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27",
+      "timestamp": "2025-08-07T12:42:22.677283",
+      "lat": 44.370875416206346,
+      "lng": 5.1514140758707585,
+      "address": "1450 Les Fonts, Nyons, Auvergne-Rh\u00f4ne-Alpes",
+      "pano_id": "30HH_X24i7QOn6dILzYoKw",
+      "pov": {
+        "heading": -320.15,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "44.370875_5.151414_-320.15_-5.00_0",
+      "thumbnail_path": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27.jpg"
+    },
+    {
+      "id": "08ef293d-2894-489f-b77f-377115c75921",
+      "timestamp": "2025-08-07T12:42:26.245168",
+      "lat": -19.541637267698466,
+      "lng": -63.55863586071773,
+      "address": "9, Santa Cruz Department",
+      "pano_id": "FmZr6VYcfqf_qwztM0cJ0g",
+      "pov": {
+        "heading": -125.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-19.541637_-63.558636_-125.15_-5.00_0",
+      "thumbnail_path": "08ef293d-2894-489f-b77f-377115c75921.jpg"
+    },
+    {
+      "id": "8ff247f4-efdf-47e8-8aab-7752f7a7a033",
+      "timestamp": "2025-08-07T12:42:30.212571",
+      "lat": 25.60987433301616,
+      "lng": 55.754304628080014,
+      "address": "Al Alyaah St, Al Raafah, Emirate of Umm Al Quwain",
+      "pano_id": "3lt-n3rOsbk3GkZ3CiuMKQ",
+      "pov": {
+        "heading": 63.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "25.609874_55.754305_63.85_-5.00_0",
+      "thumbnail_path": "8ff247f4-efdf-47e8-8aab-7752f7a7a033.jpg"
+    },
+    {
+      "id": "41aa250b-f476-4c47-a8b3-1b170f892039",
+      "timestamp": "2025-08-07T12:42:33.795863",
+      "lat": 32.28525162497046,
+      "lng": -64.78725425926685,
+      "address": "Hodson's Ferry",
+      "pano_id": "CAoSF0NJSE0wb2dLRUlDQWdJQ0UzYkhPalFF",
+      "pov": {
+        "heading": 314.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "32.285252_-64.787254_314.85_-5.00_0",
+      "thumbnail_path": "41aa250b-f476-4c47-a8b3-1b170f892039.jpg"
+    },
+    {
+      "id": "1fc918f8-1b83-4aeb-a785-22a3cd15a407",
+      "timestamp": "2025-08-07T12:42:37.657812",
+      "lat": 45.888594934068315,
+      "lng": 16.65941553063258,
+      "address": "Bol\u010d, Zagreb County",
+      "pano_id": "EwgAJqZjebsU51bDLswlGg",
+      "pov": {
+        "heading": -201.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "45.888595_16.659416_-201.15_-5.00_0",
+      "thumbnail_path": "1fc918f8-1b83-4aeb-a785-22a3cd15a407.jpg"
+    },
+    {
+      "id": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83",
+      "timestamp": "2025-08-07T12:42:41.528253",
+      "lat": 68.72859088427079,
+      "lng": 16.900531665561935,
+      "address": "Fv848, Troms",
+      "pano_id": "HUtqcc4YFuJA6EGOvLaOSg",
+      "pov": {
+        "heading": -33.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "68.728591_16.900532_-33.15_-5.00_0",
+      "thumbnail_path": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83.jpg"
+    },
+    {
+      "id": "f9d01601-da06-4286-b83f-aad48292ef56",
+      "timestamp": "2025-08-07T12:42:45.046606",
+      "lat": 56.718393213855904,
+      "lng": 25.74434588961816,
+      "address": "Aizkraukle Municipality",
+      "pano_id": "lFBbYokbq5Azj-WuXKkAww",
+      "pov": {
+        "heading": 112.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "56.718393_25.744346_112.85_-5.00_0",
+      "thumbnail_path": "f9d01601-da06-4286-b83f-aad48292ef56.jpg"
+    },
+    {
+      "id": "29521be4-0c47-40b4-9fe5-14dd37686eed",
+      "timestamp": "2025-08-07T12:42:48.525979",
+      "lat": 38.22079108487478,
+      "lng": -1.0621034114314583,
+      "address": "MU-412, Abanilla, Region of Murcia",
+      "pano_id": "YoaYr1t8aZ65kAKY_xoa4Q",
+      "pov": {
+        "heading": 338.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "38.220791_-1.062103_338.85_-5.00_0",
+      "thumbnail_path": "29521be4-0c47-40b4-9fe5-14dd37686eed.jpg"
+    },
+    {
+      "id": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036",
+      "timestamp": "2025-08-07T12:42:52.036523",
+      "lat": 14.586380510782684,
+      "lng": -91.12495671396474,
+      "address": "RN-11, Patulul, Solol\u00e1 Department",
+      "pano_id": "e83Ymkc4WsPjYZSXQSkhlQ",
+      "pov": {
+        "heading": -249.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "14.586381_-91.124957_-249.15_-5.00_0",
+      "thumbnail_path": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036.jpg"
+    },
+    {
+      "id": "0049770c-0e79-4f6e-a230-85815c5afca4",
+      "timestamp": "2025-08-07T12:42:55.545371",
+      "lat": 41.24761837711202,
+      "lng": 19.900912328789897,
+      "address": "SH3, B\u00ebrzhit\u00eb, Tirana County",
+      "pano_id": "H4OtJUEIjqNM4h3b3zJiog",
+      "pov": {
+        "heading": -38.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "41.247618_19.900912_-38.15_-5.00_0",
+      "thumbnail_path": "0049770c-0e79-4f6e-a230-85815c5afca4.jpg"
+    },
+    {
+      "id": "108d3530-8cd1-4554-9e27-f4161c25b64f",
+      "timestamp": "2025-08-07T12:42:59.039576",
+      "lat": 23.106680960105503,
+      "lng": 120.31480234033475,
+      "address": "Tainan City",
+      "pano_id": "wPKvYXSO2t3Cjb9d_92vbQ",
+      "pov": {
+        "heading": 177.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "23.106681_120.314802_177.85_-5.00_0",
+      "thumbnail_path": "108d3530-8cd1-4554-9e27-f4161c25b64f.jpg"
+    },
+    {
+      "id": "684589c2-db98-4fa0-a909-26677d622781",
+      "timestamp": "2025-08-07T12:43:02.607203",
+      "lat": 24.280060413908377,
+      "lng": 91.40645644538027,
+      "address": "Sylhet Division",
+      "pano_id": "vnxugWDu7BvOIQKU2pGreQ",
+      "pov": {
+        "heading": 289.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "24.280060_91.406456_289.85_-5.00_0",
+      "thumbnail_path": "684589c2-db98-4fa0-a909-26677d622781.jpg"
+    },
+    {
+      "id": "9e52e1ef-b7c8-4290-a50c-dea42684329c",
+      "timestamp": "2025-08-07T12:43:06.097012",
+      "lat": 23.075670254787028,
+      "lng": 120.16583641147342,
+      "address": "Tainan City",
+      "pano_id": "KT8dvKAlDqRIWqXVig9tRA",
+      "pov": {
+        "heading": -217.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "23.075670_120.165836_-217.15_-5.00_0",
+      "thumbnail_path": "9e52e1ef-b7c8-4290-a50c-dea42684329c.jpg"
+    },
+    {
+      "id": "54ccc34f-ae30-449b-83cf-3f6485186e38",
+      "timestamp": "2025-08-07T12:43:09.571839",
+      "lat": 16.069303835253045,
+      "lng": -13.917845261546633,
+      "address": "N2, Saint-Louis Region",
+      "pano_id": "AOhMIvzxsCcRhsHw2BVUzA",
+      "pov": {
+        "heading": -106.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "16.069304_-13.917845_-106.15_-5.00_0",
+      "thumbnail_path": "54ccc34f-ae30-449b-83cf-3f6485186e38.jpg"
+    },
+    {
+      "id": "9903bb23-294e-44a2-9ecf-180808b82d67",
+      "timestamp": "2025-08-07T12:43:12.991252",
+      "lat": -32.83743900844668,
+      "lng": -70.95213519080639,
+      "address": "218 Capit\u00e1n Avalos, Llay-Llay, Valpara\u00edso",
+      "pano_id": "xNJYW4PSgzGV2TEqMpEBpA",
+      "pov": {
+        "heading": 68.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "-32.837439_-70.952135_68.85_-5.00_0",
+      "thumbnail_path": "9903bb23-294e-44a2-9ecf-180808b82d67.jpg"
+    },
+    {
+      "id": "4381807b-d04c-4c04-8b93-78a588016cb7",
+      "timestamp": "2025-08-07T12:43:16.523957",
+      "lat": 4.5400338406517715,
+      "lng": -76.1944593680759,
+      "address": "El Dovio-Versalles, Valle del Cauca",
+      "pano_id": "wgWdWsvikF8kFmi_FZVstg",
+      "pov": {
+        "heading": 333.85,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "4.540034_-76.194459_333.85_-5.00_0",
+      "thumbnail_path": "4381807b-d04c-4c04-8b93-78a588016cb7.jpg"
+    },
+    {
+      "id": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a",
+      "timestamp": "2025-08-07T12:43:20.060957",
+      "lat": 31.65645279027197,
+      "lng": 34.9414288862752,
+      "address": "Nir Louk",
+      "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQzZqcWJXQ2c.",
+      "pov": {
+        "heading": -248.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "31.656453_34.941429_-248.15_-5.00_0",
+      "thumbnail_path": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a.jpg"
+    },
+    {
+      "id": "574ac51d-1de1-46b2-9f90-5b1da1d79339",
+      "timestamp": "2025-08-07T12:43:23.601528",
+      "lat": 5.90176654207688,
+      "lng": 0.9886556847260388,
+      "address": "Keta, Volta Region",
+      "pano_id": "ipwTobbIbpx2SEjFzq6kww",
+      "pov": {
+        "heading": -146.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "5.901767_0.988656_-146.15_-5.00_0",
+      "thumbnail_path": "574ac51d-1de1-46b2-9f90-5b1da1d79339.jpg"
+    },
+    {
+      "id": "87e095f0-467b-4539-978b-46eecfdf1efc",
+      "timestamp": "2025-08-07T12:43:27.067655",
+      "lat": 46.29179908449921,
+      "lng": 16.580906762551983,
+      "address": "Komarnica Ludbre\u0161ka, Vara\u017edin County",
+      "pano_id": "ha0KsxP_lG1phxES1aSmGQ",
+      "pov": {
+        "heading": 111.85000000000002,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "46.291799_16.580907_111.85_-5.00_0",
+      "thumbnail_path": "87e095f0-467b-4539-978b-46eecfdf1efc.jpg"
+    },
+    {
+      "id": "3badb1cb-5ffb-4c07-812e-ee85646a4279",
+      "timestamp": "2025-08-07T12:43:30.517183",
+      "lat": 43.891541352607554,
+      "lng": 5.774287870706945,
+      "address": "Dauphin, Provence-Alpes-C\u00f4te d'Azur",
+      "pano_id": "b9cJ5iGIYH2JHWrRmDDSFg",
+      "pov": {
+        "heading": -351.15,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "43.891541_5.774288_-351.15_-5.00_0",
+      "thumbnail_path": "3badb1cb-5ffb-4c07-812e-ee85646a4279.jpg"
+    },
+    {
+      "id": "1acb3834-1f22-4c0c-8cd3-b992e4546f88",
+      "timestamp": "2025-08-07T12:43:34.052597",
+      "lat": 20.805812868893106,
+      "lng": -89.6933791766117,
+      "address": "Hotzuc, Yucatan",
+      "pano_id": "ShCiTFG-KoqkokeXeCyG2w",
+      "pov": {
+        "heading": -236.14999999999998,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "20.805813_-89.693379_-236.15_-5.00_0",
+      "thumbnail_path": "1acb3834-1f22-4c0c-8cd3-b992e4546f88.jpg"
+    },
+    {
+      "id": "8dacb066-8fa4-4f03-87e3-34d86f5863fb",
+      "timestamp": "2025-08-07T12:43:37.674750",
+      "lat": 47.974591513902844,
+      "lng": 108.47688185828954,
+      "address": "Baganuur-Mengenmorit, Mungunmorit, T\u00f6v, Mongolia",
+      "pano_id": "V0i3_HH4f4IM9hTEg0QRqg",
+      "pov": {
+        "heading": 13.850000000000023,
+        "pitch": 5,
+        "zoom": 0.9999999999999997
+      },
+      "url_slug": "47.974592_108.476882_13.85_-5.00_0",
+      "thumbnail_path": "8dacb066-8fa4-4f03-87e3-34d86f5863fb.jpg"
+    }
+  ]
+}

geo_bot.py CHANGED Viewed

@@ -69,6 +69,72 @@ Your response MUST be a valid JSON object wrapped in ```json ... ```.
 ```
 """
 BENCHMARK_PROMPT = """
 Analyze the image and determine its geographic coordinates.
 1.  Describe visual clues.
@@ -250,10 +316,54 @@ class GeoBot:
             decision = {
                 "reasoning": "Recovery due to parsing failure or model error.",
                 "action_details": {"action": "PAN_RIGHT"},
             }
         return decision
     def execute_action(self, action: str) -> bool:
         """
         Execute the given action using the controller.
@@ -271,6 +381,77 @@ class GeoBot:
             self.controller.pan_view("right")
         return True
     def run_agent_loop(
         self, max_steps: int = 10, step_callback=None
     ) -> Optional[Tuple[float, float]]:
@@ -347,6 +528,7 @@ class GeoBot:
                 "reasoning": decision.get("reasoning", "N/A"),
                 "action_details": decision.get("action_details", {"action": "N/A"}),
                 "history": history.copy(),  # History up to current step (excluding current)
             }
             action_details = decision.get("action_details", {})

 ```
 """
+TEST_AGENT_PROMPT_TEMPLATE = """
+**Mission:** You are an expert geo-location agent. Your goal is to pinpoint our position based on the surroundings and your observation history.
+**Current Status**
+• Actions You Can Take *this* turn: {available_actions}
+────────────────────────────────
+**Core Principles**
+1.  **Observe → Orient → Act**
+    Start each turn with a structured three-part reasoning block:
+    **(1) Visual Clues —** plainly describe what you see (signs, text language, road lines, vegetation, building styles, vehicles, terrain, weather, etc.).
+    **(2) Potential Regions —** list the most plausible regions/countries those clues suggest.
+    **(3) Most Probable + Plan —** pick the single likeliest region and explain the next action (move/pan or guess).
+2.  **Navigate with Labels:**
+    - `MOVE_FORWARD` follows the green **UP** arrow.
+    - `MOVE_BACKWARD` follows the red **DOWN** arrow.
+    - No arrow ⇒ you cannot move that way.
+3.  **Efficient Exploration:**
+    - **Pan Before You Move:** At fresh spots/intersections, use `PAN_LEFT` / `PAN_RIGHT` first.
+    - After ~2 or 3 fruitless moves in repetitive scenery, turn around.
+4.  **Be Decisive:** A unique, definitive clue (full address, rare town name, etc.) ⇒ `GUESS` immediately.
+5.  **Final-Step Rule:** If **Remaining Steps = 1**, you **MUST** `GUESS` and you should carefully check the image and the surroundings.
+6.  **Always Predict:** On EVERY step, provide your current best estimate of the location, even if you're not ready to make a final guess.
+────────────────────────────────
+**Context & Task:**
+Analyze your full journey history and current view, apply the Core Principles, and decide your next action in the required JSON format.
+**Action History**
+{history_text}
+────────────────────────────────
+**JSON Output Format:**
+Your response MUST be a valid JSON object wrapped in ```json ... ```.
+{{
+  "reasoning": "…",
+  "current_prediction": {{
+    "lat": <float>,
+    "lon": <float>,
+    "location_description": "Brief description of predicted location"
+  }},
+  "action_details": {{"action": action chosen from the available actions}}
+}}
+**Example **
+```json
+{{
+  "reasoning": "(1) Visual Clues — I see left-side driving, eucalyptus trees, and a yellow speed-warning sign; the road markings are solid white. (2) Potential Regions — Southeastern Australia, Tasmania, or the North Island of New Zealand. (3) Most Probable + Plan — The scene most likely sits in a suburb of Hobart, Tasmania. I will PAN_LEFT to look for additional road signs that confirm this.",
+  "current_prediction": {{
+    "lat": -42.8806,
+    "lon": 147.3250,
+    "location_description": "Hobart suburb, Tasmania, Australia"
+  }},
+  "action_details": {{
+    "action": "PAN_LEFT"
+  }}
+}}
+```
+"""
 BENCHMARK_PROMPT = """
 Analyze the image and determine its geographic coordinates.
 1.  Describe visual clues.
             decision = {
                 "reasoning": "Recovery due to parsing failure or model error.",
                 "action_details": {"action": "PAN_RIGHT"},
+                "debug_message": f"{response.content.strip()}",
             }
         return decision
+    def execute_test_agent_step(
+        self,
+        history: List[Dict[str, Any]],
+        current_screenshot_b64: str,
+        available_actions: List[str],
+    ) -> Optional[Dict[str, Any]]:
+        """
+        Execute a single agent step: generate prompt, get AI decision, return decision.
+        This is the core step logic extracted for reuse.
+        """
+        history_text = self.generate_history_text(history)
+        image_b64_for_prompt = self.get_history_images(history) + [
+            current_screenshot_b64
+        ]
+        prompt = TEST_AGENT_PROMPT_TEMPLATE.format(
+            history_text=history_text,
+            available_actions=available_actions,
+        )
+        try:
+            message = self._create_message_with_history(
+                prompt, image_b64_for_prompt[-1:]
+            )
+            response = self.model.invoke(message)
+            decision = self._parse_agent_response(response)
+        except Exception as e:
+            print(f"Error during model invocation: {e}")
+            decision = None
+        if not decision:
+            print(
+                "Response parsing failed or model error. Using default recovery action: PAN_RIGHT."
+            )
+            decision = {
+                "reasoning": "Recovery due to parsing failure or model error.",
+                "action_details": {"action": "PAN_RIGHT"},
+                "current_prediction": "N/A",
+                "debug_message": f"{response.content.strip() if response is not None else 'N/A'}",
+            }
+        return decision
     def execute_action(self, action: str) -> bool:
         """
         Execute the given action using the controller.
             self.controller.pan_view("right")
         return True
+    def test_run_agent_loop(self, max_steps: int = 10, step_callback=None) -> Optional[list[Tuple[float, float]]]:
+        history = self.init_history()
+        predictions = []
+        for step in range(max_steps, 0, -1):
+            # Setup and screenshot
+            self.controller.setup_clean_environment()
+            self.controller.label_arrows_on_screen()
+            screenshot_bytes = self.controller.take_street_view_screenshot()
+            if not screenshot_bytes:
+                print("Failed to take screenshot. Ending agent loop.")
+                return None
+            current_screenshot_b64 = self.pil_to_base64(
+                image=Image.open(BytesIO(screenshot_bytes))
+            )
+            available_actions = self.controller.get_test_available_actions()
+            # print(f"Available actions: {available_actions}")
+            # Normal step execution
+            decision = self.execute_test_agent_step(
+                history, current_screenshot_b64, available_actions
+            )
+            # Create step_info with current history BEFORE adding current step
+            # This shows the history up to (but not including) the current step
+            step_info = {
+                "max_steps": max_steps,
+                "remaining_steps": step,
+                "screenshot_bytes": screenshot_bytes,
+                "screenshot_b64": current_screenshot_b64,
+                "available_actions": available_actions,
+                "is_final_step": step == 1,
+                "reasoning": decision.get("reasoning", "N/A"),
+                "action_details": decision.get("action_details", {"action": "N/A"}),
+                "history": history.copy(),  # History up to current step (excluding current)
+                "debug_message": decision.get("debug_message", "N/A"),
+                "current_prediction": decision.get("current_prediction", "N/A"),
+            }
+            action_details = decision.get("action_details", {})
+            action = action_details.get("action")
+            # print(f"AI Reasoning: {decision.get('reasoning', 'N/A')}")
+            # print(f"AI Current Prediction: {decision.get('current_prediction', 'N/A')}")
+            # print(f"AI Action: {action}")
+            # Add step to history AFTER callback (so next iteration has this step in history)
+            self.add_step_to_history(history, current_screenshot_b64, decision)
+            current_prediction = decision.get("current_prediction")
+            if current_prediction and isinstance(current_prediction, dict):
+                current_prediction["reasoning"] = decision.get("reasoning", "N/A")
+                predictions.append(current_prediction)
+            else:
+                # Fallback: create a basic prediction structure
+                print(f"Invalid current prediction: {current_prediction}")
+                fallback_prediction = {
+                    "lat": 0.0,
+                    "lon": 0.0,
+                    "confidence": 0.0,
+                    "location_description": "N/A",
+                    "reasoning": decision.get("reasoning", "N/A")
+                }
+                predictions.append(fallback_prediction)
+            self.execute_action(action)
+        return predictions
     def run_agent_loop(
         self, max_steps: int = 10, step_callback=None
     ) -> Optional[Tuple[float, float]]:
                 "reasoning": decision.get("reasoning", "N/A"),
                 "action_details": decision.get("action_details", {"action": "N/A"}),
                 "history": history.copy(),  # History up to current step (excluding current)
+                "debug_message": decision.get("debug_message", "N/A"),
             }
             action_details = decision.get("action_details", {})

main.py CHANGED Viewed

@@ -1,11 +1,15 @@
 import argparse
 import json
 from geo_bot import GeoBot
 from benchmark import MapGuesserBenchmark
 from data_collector import DataCollector
 from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
 def agent_mode(
     model_name: str,
@@ -147,11 +151,165 @@ def collect_mode(dataset_name: str, samples: int, headless: bool):
     print(f"Data collection complete for dataset '{dataset_name}'.")
 def main():
     parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
     parser.add_argument(
         "--mode",
-        choices=["agent", "benchmark", "collect"],
         default="agent",
         help="Operation mode.",
     )
@@ -190,6 +348,7 @@ def main():
         default=0.0,
         help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
     )
     args = parser.parse_args()
@@ -216,6 +375,16 @@ def main():
             dataset_name=args.dataset,
             temperature=args.temperature,
         )
 if __name__ == "__main__":

 import argparse
 import json
+import os
+from datetime import datetime
 from geo_bot import GeoBot
 from benchmark import MapGuesserBenchmark
 from data_collector import DataCollector
 from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
+from collections import OrderedDict
+from tqdm import tqdm
+import matplotlib.pyplot as plt
 def agent_mode(
     model_name: str,
     print(f"Data collection complete for dataset '{dataset_name}'.")
+def test_mode(
+    models: list,
+    samples: int,
+    runs: int,
+    steps: int,
+    dataset_name: str = "default",
+    temperature: float = 0.0,
+    headless: bool = True,
+):
+    """
+    CLI multi-model / multi-run benchmark.
+    For each model:
+        • run N times
+        • each run evaluates `samples` images
+        • record hit-rate per step and average distance
+    """
+    # ---------- load dataset ----------
+    data_paths = get_data_paths(dataset_name)
+    try:
+        with open(data_paths["golden_labels"], "r", encoding="utf-8") as f:
+            all_samples = json.load(f)["samples"]
+    except FileNotFoundError:
+        print(f"❌ dataset '{dataset_name}' not found.")
+        return
+    if not all_samples:
+        print("❌ dataset is empty.")
+        return
+    test_samples = all_samples[:samples]
+    print(f"📊 loaded {len(test_samples)} samples from '{dataset_name}'")
+    benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_name, headless=headless)
+    summary_by_step: dict[str, list[float]] = OrderedDict()
+    avg_distances: dict[str, float] = {}
+    time_tag   = datetime.now().strftime("%Y%m%d_%H%M%S")
+    base_dir   = os.path.join("./results", "test", time_tag)
+    os.makedirs(base_dir, exist_ok=True)
+    # ---------- iterate over models ----------
+    for model_name in models:
+        log_json={}
+        print(f"\n===== {model_name} =====")
+        cfg = MODELS_CONFIG[model_name]
+        model_cls = get_model_class(cfg["class"])
+        hits_per_step = [0] * steps
+        distance_per_step = [0.0] * steps
+        total_iterations = runs * len(test_samples)
+        with tqdm(total=total_iterations, desc=model_name) as pbar:
+            for _ in range(runs):
+                with GeoBot(
+                    model=model_cls,
+                    model_name=cfg["model_name"],
+                    headless=headless,
+                    temperature=temperature,
+                ) as bot:
+                    for sample in test_samples:
+                        if not bot.controller.load_location_from_data(sample):
+                            pbar.update(1)
+                            continue
+                        preds = bot.test_run_agent_loop(max_steps=steps)
+                        gt = {"lat": sample["lat"], "lng": sample["lng"]}
+                        if sample["id"] not in log_json:
+                            log_json[sample["id"]] = []
+                        for idx, pred in enumerate(preds):
+                            if isinstance(pred, dict) and "lat" in pred:
+                                dist = benchmark_helper.calculate_distance(
+                                    gt, (pred["lat"], pred["lon"])
+                                )
+                                if dist is not None:
+                                    distance_per_step[idx] += dist
+                                    preds[idx]["distance"] = dist
+                                    if dist <= SUCCESS_THRESHOLD_KM:
+                                        hits_per_step[idx] += 1
+                                        preds[idx]["success"] = True
+                                    else:
+                                        preds[idx]["success"] = False
+                        log_json[sample["id"]].append({
+                            "run_id": _,
+                            "predictions": preds,
+                            })
+                        pbar.update(1)
+        os.makedirs(f"{base_dir}/{model_name}", exist_ok=True)
+        with open(f"{base_dir}/{model_name}/{model_name}_log.json", "w") as f:
+            json.dump(log_json, f, indent=2)
+        denom = runs * len(test_samples)
+        summary_by_step[model_name] = [h / denom for h in hits_per_step]
+        avg_distances[model_name] = [d / denom for d in distance_per_step]
+        payload = {
+            "avg_distance_km":  avg_distances[model_name],
+            "accuracy_per_step": summary_by_step[model_name]
+        }
+        with open(f"{base_dir}/{model_name}/{model_name}.json", "w") as f:
+            json.dump(payload, f, indent=2)
+        print(f"💾 results saved to {base_dir}")
+    # ---------- pretty table ----------
+    header = ["Step"] + list(summary_by_step.keys())
+    row_width = max(len(h) for h in header) + 2
+    print("\n=== ACCURACY PER STEP ===")
+    print(" | ".join(h.center(row_width) for h in header))
+    print("-" * (row_width + 3) * len(header))
+    for i in range(steps):
+        cells = [str(i + 1).center(row_width)]
+        for m in summary_by_step:
+            cells.append(f"{summary_by_step[m][i]*100:5.1f}%".center(row_width))
+        print(" | ".join(cells))
+    print("\n=== AVG DISTANCE PER STEP (km) ===")
+    header = ["Step"] + list(avg_distances.keys())
+    row_w  = max(len(h) for h in header) + 2
+    print(" | ".join(h.center(row_w) for h in header))
+    print("-" * (row_w + 3) * len(header))
+    for i in range(steps):
+        cells = [str(i+1).center(row_w)]
+        for m in avg_distances:
+            v = avg_distances[m][i]
+            cells.append(f"{v:6.1f}" if v is not None else "  N/A ".center(row_w))
+        print(" | ".join(cells))
+    try:
+        for model, acc in summary_by_step.items():
+            plt.plot(range(1, steps + 1), acc, marker="o", label=model)
+        plt.xlabel("step")
+        plt.ylabel("accuracy")
+        plt.ylim(0, 1)
+        plt.legend()
+        plt.grid(True, alpha=0.3)
+        plt.title("Accuracy vs Step")
+        plt.savefig(f"{base_dir}/accuracy_step.png", dpi=120)
+        print("\n📈 saved plot to accuracy_step.png")
+        # Plot average distance per model
+        plt.figure()
+        for model, acc in avg_distances.items():
+            plt.plot(range(1, steps + 1), acc, marker="o", label=model)
+        plt.xlabel("step")
+        plt.ylabel("Avg Distance (km)")
+        plt.title("Average Distance per Model")
+        plt.xticks(rotation=45, ha="right")
+        plt.tight_layout()
+        plt.savefig(f"{base_dir}/avg_distance.png", dpi=120)
+        print("📈 saved plot to avg_distance.png")
+    except Exception as e:
+        print(f"⚠️ plot skipped: {e}")
 def main():
     parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
     parser.add_argument(
         "--mode",
+        choices=["agent", "benchmark", "collect", "test"],
         default="agent",
         help="Operation mode.",
     )
         default=0.0,
         help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
     )
+    parser.add_argument("--runs", type=int, default=3, help="[Test] Runs per model")
     args = parser.parse_args()
             dataset_name=args.dataset,
             temperature=args.temperature,
         )
+    elif args.mode == "test":
+        test_mode(
+            models=args.models or [args.model],
+            samples=args.samples,
+            runs=args.runs,
+            steps=args.steps,
+            dataset_name=args.dataset,
+            temperature=args.temperature,
+            headless=args.headless,
+        )
 if __name__ == "__main__":

mapcrunch_controller.py CHANGED Viewed

@@ -11,53 +11,80 @@ from config import MAPCRUNCH_URL, SELECTORS, DATA_COLLECTION_CONFIG
 class MapCrunchController:
     def __init__(self, headless: bool = False):
-        options = uc.ChromeOptions()
-        options.add_argument(
-            "user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"
-        )
-        options.add_argument("--window-size=1920,1080")
-        options.set_capability("goog:loggingPrefs", {"browser": "ALL"})
-        if headless:
-            options.add_argument("--headless=new")
-        self.driver = uc.Chrome(options=options, use_subprocess=True)
         self.wait = WebDriverWait(self.driver, 10)
-        # Here we are injecting a script to the page to disable the browser detection.
-        # Basically, we are setting the badBrowser property to 0, which is a property that is used to detect if the browser is being controlled by a script.
-        # In the main.min.js, we can see some js code like this:
-        # if (badBrowser) {
-        #     alert("Unsupported browser!");
-        # } else {
-        #     window.panorama = { ... }
-        # }
-        self.driver.execute_cdp_cmd(
-            "Page.addScriptToEvaluateOnNewDocument",
-            {
-                "source": """
-                Object.defineProperty(window, 'badBrowser', {
-                  value: 0,
-                  writable: false,
-                  configurable: false
-                });
-                window.alert = function() {};
-                Object.defineProperty(navigator, 'webdriver', {
-                  get: () => undefined
-                });
-            """
-            },
-        )
         for retry in range(3):
             try:
                 self.driver.get(MAPCRUNCH_URL)
                 time.sleep(3)
                 break
             except Exception as e:
                 if retry == 2:
                     raise e
-                print(f"Failed to load MapCrunch, retry {retry + 1}/3")
                 time.sleep(2)
     def setup_clean_environment(self):
@@ -187,6 +214,16 @@ class MapCrunchController:
             base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
         return base_actions
     def get_current_address(self) -> Optional[str]:
         try:
             address_element = self.wait.until(

 class MapCrunchController:
     def __init__(self, headless: bool = False):
+        # Try to initialize ChromeDriver with version 137 (your current Chrome version)
+        try:
+            # Create fresh ChromeOptions for first attempt
+            options = uc.ChromeOptions()
+            options.add_argument("--no-sandbox")
+            options.add_argument("--disable-dev-shm-usage")
+            options.add_argument("--disable-gpu")
+            options.add_argument("--window-size=1920,1080")
+            options.add_argument("--disable-web-security")
+            options.add_argument("--disable-features=VizDisplayCompositor")
+            options.add_argument("--disable-blink-features=AutomationControlled")
+            if headless:
+                options.add_argument("--headless=new")
+            self.driver = uc.Chrome(options=options, use_subprocess=True, version_main=137)
+            print("✅ ChromeDriver initialized successfully with version 137")
+        except Exception as e:
+            print(f"Failed with version 137: {e}")
+            try:
+                # Create fresh ChromeOptions for fallback attempt
+                options = uc.ChromeOptions()
+                options.add_argument("--no-sandbox")
+                options.add_argument("--disable-dev-shm-usage")
+                options.add_argument("--disable-gpu")
+                options.add_argument("--window-size=1920,1080")
+                options.add_argument("--disable-web-security")
+                options.add_argument("--disable-features=VizDisplayCompositor")
+                options.add_argument("--disable-blink-features=AutomationControlled")
+                if headless:
+                    options.add_argument("--headless=new")
+                # Fallback to auto-detection
+                self.driver = uc.Chrome(options=options, use_subprocess=True)
+                print("✅ ChromeDriver initialized successfully with auto-detection")
+            except Exception as e2:
+                print(f"Failed with auto-detection: {e2}")
+                raise Exception(f"Could not initialize ChromeDriver. Please update Chrome or check compatibility. Errors: {e}, {e2}")
         self.wait = WebDriverWait(self.driver, 10)
+        # Inject browser detection bypass script
+        try:
+            self.driver.execute_cdp_cmd(
+                "Page.addScriptToEvaluateOnNewDocument",
+                {
+                    "source": """
+                    Object.defineProperty(window, 'badBrowser', {
+                      value: 0,
+                      writable: false,
+                      configurable: false
+                    });
+                    window.alert = function() {};
+                    Object.defineProperty(navigator, 'webdriver', {
+                      get: () => undefined
+                    });
+                """
+                },
+            )
+        except Exception as e:
+            print(f"Warning: Could not inject browser detection script: {e}")
+        # Load MapCrunch
         for retry in range(3):
             try:
                 self.driver.get(MAPCRUNCH_URL)
                 time.sleep(3)
+                print("✅ MapCrunch loaded successfully")
                 break
             except Exception as e:
                 if retry == 2:
                     raise e
+                print(f"Failed to load MapCrunch, retry {retry + 1}/3: {e}")
                 time.sleep(2)
     def setup_clean_environment(self):
             base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
         return base_actions
+    def get_test_available_actions(self) -> List[str]:
+        """
+        Checks for movement links via JavaScript.
+        """
+        base_actions = ["PAN_LEFT", "PAN_RIGHT"]
+        links = self.driver.execute_script("return window.panorama.getLinks();")
+        if links and len(links) > 0:
+            base_actions.extend(["MOVE_FORWARD", "MOVE_BACKWARD"])
+        return base_actions
     def get_current_address(self) -> Optional[str]:
         try:
             address_element = self.wait.until(