LRU1 commited on
Commit
3960a1f
·
1 Parent(s): 1a948ca

add test mode in main.py to eval the models' performances

Browse files
Files changed (7) hide show
  1. .gitignore +1 -1
  2. analyzer.ipynb +0 -0
  3. app.py +40 -3
  4. datasets/test/golden_labels.json +759 -0
  5. experiment_runner.py +0 -0
  6. geo_bot.py +21 -6
  7. main.py +171 -2
.gitignore CHANGED
@@ -14,4 +14,4 @@ datasets/*/thumbnails/
14
 
15
  # Legacy data directory (can be removed if no longer used)
16
  data/
17
- !data/golden_labels.json
 
14
 
15
  # Legacy data directory (can be removed if no longer used)
16
  data/
17
+ !data/golden_labels.json
analyzer.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -258,6 +258,7 @@ if start_button:
258
  if mode == "Test Mode":
259
  benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_choice)
260
  summary_by_step = {}
 
261
  progress_bar = st.progress(0)
262
  for mi, model_name in enumerate(selected_models):
263
  st.header(f"Model: {model_name}")
@@ -265,6 +266,10 @@ if start_button:
265
  model_class = get_model_class(config["class"])
266
 
267
  successes_per_step = [0]*steps_per_sample
 
 
 
 
268
  total_iterations = runs_per_model * num_samples
269
  model_bar = st.progress(0, text=f"Running {model_name}")
270
  iteration_counter = 0
@@ -280,13 +285,25 @@ if start_button:
280
  for step_idx, pred in enumerate(predictions):
281
  if isinstance(pred, dict) and "lat" in pred:
282
  dist = benchmark_helper.calculate_distance(true_coords, (pred["lat"], pred["lon"]))
283
- if dist is not None and dist <= SUCCESS_THRESHOLD_KM:
284
- successes_per_step[step_idx] += 1
 
 
 
 
 
285
  iteration_counter += 1
286
  model_bar.progress(iteration_counter/total_iterations)
287
- # calculate accuracy per step
288
  acc_per_step = [s/(num_samples*runs_per_model) for s in successes_per_step]
289
  summary_by_step[model_name] = acc_per_step
 
 
 
 
 
 
 
290
  progress_bar.progress((mi+1)/len(selected_models))
291
  # plot
292
  st.subheader("Accuracy vs Steps")
@@ -312,6 +329,26 @@ if start_button:
312
  )
313
 
314
  st.altair_chart(chart, use_container_width=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
315
  st.stop()
316
 
317
  else:
 
258
  if mode == "Test Mode":
259
  benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_choice)
260
  summary_by_step = {}
261
+ avg_distance_by_step = {}
262
  progress_bar = st.progress(0)
263
  for mi, model_name in enumerate(selected_models):
264
  st.header(f"Model: {model_name}")
 
266
  model_class = get_model_class(config["class"])
267
 
268
  successes_per_step = [0]*steps_per_sample
269
+
270
+ dist_sum_per_step = [0.0]*steps_per_sample
271
+ dist_cnt_per_step = [0]*steps_per_sample
272
+
273
  total_iterations = runs_per_model * num_samples
274
  model_bar = st.progress(0, text=f"Running {model_name}")
275
  iteration_counter = 0
 
285
  for step_idx, pred in enumerate(predictions):
286
  if isinstance(pred, dict) and "lat" in pred:
287
  dist = benchmark_helper.calculate_distance(true_coords, (pred["lat"], pred["lon"]))
288
+ if dist is not None:
289
+ # 新增:累计距离与计数
290
+ dist_sum_per_step[step_idx] += dist
291
+ dist_cnt_per_step[step_idx] += 1
292
+ # 原有:成功数
293
+ if dist <= SUCCESS_THRESHOLD_KM:
294
+ successes_per_step[step_idx] += 1
295
  iteration_counter += 1
296
  model_bar.progress(iteration_counter/total_iterations)
297
+
298
  acc_per_step = [s/(num_samples*runs_per_model) for s in successes_per_step]
299
  summary_by_step[model_name] = acc_per_step
300
+
301
+ avg_per_step = [
302
+ (dist_sum_per_step[i]/dist_cnt_per_step[i]) if dist_cnt_per_step[i] else None
303
+ for i in range(steps_per_sample)
304
+ ]
305
+ avg_distance_by_step[model_name] = avg_per_step
306
+
307
  progress_bar.progress((mi+1)/len(selected_models))
308
  # plot
309
  st.subheader("Accuracy vs Steps")
 
329
  )
330
 
331
  st.altair_chart(chart, use_container_width=True)
332
+
333
+ st.subheader("Average Distance vs Steps (km)")
334
+ df_wide_dist = pd.DataFrame(avg_distance_by_step)
335
+ df_long_dist = (
336
+ df_wide_dist
337
+ .reset_index(names="Step")
338
+ .melt(id_vars="Step", var_name="Model", value_name="AvgDistanceKm")
339
+ )
340
+ dist_chart = (
341
+ alt.Chart(df_long_dist)
342
+ .mark_line(point=True)
343
+ .encode(
344
+ x=alt.X("Step:O", title="Step #"),
345
+ y=alt.Y("AvgDistanceKm:Q", title="Avg Distance (km)", scale=alt.Scale(zero=True)),
346
+ color=alt.Color("Model:N", title="Model"),
347
+ tooltip=["Model:N", "Step:O", alt.Tooltip("AvgDistanceKm:Q", format=".1f")],
348
+ )
349
+ .properties(width=700, height=400)
350
+ )
351
+ st.altair_chart(dist_chart, use_container_width=True)
352
  st.stop()
353
 
354
  else:
datasets/test/golden_labels.json ADDED
@@ -0,0 +1,759 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "dataset_name": "test",
4
+ "collection_date": "2025-08-07T12:43:38.026706",
5
+ "collection_options": {}
6
+ },
7
+ "samples": [
8
+ {
9
+ "id": "d6250b7f-4da5-42c1-8c8d-0423e67e77be",
10
+ "timestamp": "2025-08-07T12:40:37.875459",
11
+ "lat": 47.66613320444537,
12
+ "lng": 26.011012145674016,
13
+ "address": "3 DJ178A, Suceava County",
14
+ "pano_id": "PdysAkpL3AJpCWhOBC5GwQ",
15
+ "pov": {
16
+ "heading": -238.14999999999998,
17
+ "pitch": 5,
18
+ "zoom": 0.9999999999999997
19
+ },
20
+ "url_slug": "47.666133_26.011012_-238.15_-5.00_0",
21
+ "thumbnail_path": "d6250b7f-4da5-42c1-8c8d-0423e67e77be.jpg"
22
+ },
23
+ {
24
+ "id": "3bb51463-0a02-4ce4-9e61-6e0f28491897",
25
+ "timestamp": "2025-08-07T12:40:41.467083",
26
+ "lat": -5.239479425072147,
27
+ "lng": 71.82213288625383,
28
+ "address": "British Indian Ocean Territory",
29
+ "pano_id": "NYwG1Ym4ZqDLb5APiRn2Eg",
30
+ "pov": {
31
+ "heading": -48.14999999999998,
32
+ "pitch": 5,
33
+ "zoom": 0.9999999999999997
34
+ },
35
+ "url_slug": "-5.239479_71.822133_-48.15_-5.00_0",
36
+ "thumbnail_path": "3bb51463-0a02-4ce4-9e61-6e0f28491897.jpg"
37
+ },
38
+ {
39
+ "id": "4acf7d7e-8309-4e57-88b2-1ea1019c1719",
40
+ "timestamp": "2025-08-07T12:40:45.049516",
41
+ "lat": 44.577090525370025,
42
+ "lng": 132.8105749539221,
43
+ "address": "Spassk-Dalny, Primorsky Krai",
44
+ "pano_id": "l1mVXN5S-foFa6foYGvMaQ",
45
+ "pov": {
46
+ "heading": 100.85000000000002,
47
+ "pitch": 5,
48
+ "zoom": 0.9999999999999997
49
+ },
50
+ "url_slug": "44.577091_132.810575_100.85_-5.00_0",
51
+ "thumbnail_path": "4acf7d7e-8309-4e57-88b2-1ea1019c1719.jpg"
52
+ },
53
+ {
54
+ "id": "c4d4352f-6285-42c1-bbae-231ca95da48a",
55
+ "timestamp": "2025-08-07T12:40:48.577565",
56
+ "lat": -14.173449381539905,
57
+ "lng": -169.67773654813135,
58
+ "address": "Rte 20, Ofu, Manu'a District",
59
+ "pano_id": "GTAdq2n3eUJ33lWb-gJ5BA",
60
+ "pov": {
61
+ "heading": 321.85,
62
+ "pitch": 5,
63
+ "zoom": 0.9999999999999997
64
+ },
65
+ "url_slug": "-14.173449_-169.677737_321.85_-5.00_0",
66
+ "thumbnail_path": "c4d4352f-6285-42c1-bbae-231ca95da48a.jpg"
67
+ },
68
+ {
69
+ "id": "b4b889cf-008f-4b71-b901-bca7b3de3951",
70
+ "timestamp": "2025-08-07T12:40:52.470733",
71
+ "lat": 44.83634227352461,
72
+ "lng": -91.46694086852327,
73
+ "address": "3048 Winsor Dr, Eau Claire, Wisconsin",
74
+ "pano_id": "teO7CeeojzPT4y6Dz5V4qg",
75
+ "pov": {
76
+ "heading": -244.14999999999998,
77
+ "pitch": 5,
78
+ "zoom": 0.9999999999999997
79
+ },
80
+ "url_slug": "44.836342_-91.466941_-244.15_-5.00_0",
81
+ "thumbnail_path": "b4b889cf-008f-4b71-b901-bca7b3de3951.jpg"
82
+ },
83
+ {
84
+ "id": "7a606d59-46f3-4522-b2be-2e5a5576e155",
85
+ "timestamp": "2025-08-07T12:40:56.350929",
86
+ "lat": 32.28575621196474,
87
+ "lng": -64.77437787828177,
88
+ "address": "Bermuda Tourism",
89
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQ3VnTGZLUmc.",
90
+ "pov": {
91
+ "heading": -26.149999999999977,
92
+ "pitch": 5,
93
+ "zoom": 0.9999999999999997
94
+ },
95
+ "url_slug": "32.285756_-64.774378_-26.15_-5.00_0",
96
+ "thumbnail_path": "7a606d59-46f3-4522-b2be-2e5a5576e155.jpg"
97
+ },
98
+ {
99
+ "id": "262d348a-a60a-42d8-bd4f-68aafe98d1fb",
100
+ "timestamp": "2025-08-07T12:40:59.894064",
101
+ "lat": 11.275626069517537,
102
+ "lng": 104.8745358584606,
103
+ "address": "Tak\u00e9o Province",
104
+ "pano_id": "5Y1dyapSF2NxjeB4-ucZkA",
105
+ "pov": {
106
+ "heading": 134.85000000000002,
107
+ "pitch": 5,
108
+ "zoom": 0.9999999999999997
109
+ },
110
+ "url_slug": "11.275626_104.874536_134.85_-5.00_0",
111
+ "thumbnail_path": "262d348a-a60a-42d8-bd4f-68aafe98d1fb.jpg"
112
+ },
113
+ {
114
+ "id": "09ce31a1-a719-4ed9-a344-7987214902c1",
115
+ "timestamp": "2025-08-07T12:41:03.536576",
116
+ "lat": -17.362187562805154,
117
+ "lng": -63.14684639831983,
118
+ "address": "Camino Montero, Santa Cruz Department",
119
+ "pano_id": "51rbDOTj6SCtSi9vyN0-Pg",
120
+ "pov": {
121
+ "heading": -336.15,
122
+ "pitch": 5,
123
+ "zoom": 0.9999999999999997
124
+ },
125
+ "url_slug": "-17.362188_-63.146846_-336.15_-5.00_0",
126
+ "thumbnail_path": "09ce31a1-a719-4ed9-a344-7987214902c1.jpg"
127
+ },
128
+ {
129
+ "id": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb",
130
+ "timestamp": "2025-08-07T12:41:06.979405",
131
+ "lat": -21.458641127651422,
132
+ "lng": -47.59839773953906,
133
+ "address": "12160 Rodovia Conde Francisco Matarazzo J\u00fanior, State of S\u00e3o Paulo",
134
+ "pano_id": "PTMrd1Xosg9QO25i58gjAg",
135
+ "pov": {
136
+ "heading": -204.14999999999998,
137
+ "pitch": 5,
138
+ "zoom": 0.9999999999999997
139
+ },
140
+ "url_slug": "-21.458641_-47.598398_-204.15_-5.00_0",
141
+ "thumbnail_path": "4d7925b6-c1b2-4968-b5cf-61a67b2c68fb.jpg"
142
+ },
143
+ {
144
+ "id": "1929ea7c-af27-42d0-9931-66d5ad451d21",
145
+ "timestamp": "2025-08-07T12:41:10.587109",
146
+ "lat": -54.00998792650971,
147
+ "lng": -67.6803410996465,
148
+ "address": "17 RP F, Tierra del Fuego Province",
149
+ "pano_id": "HsUaaUVcACNjAhgLP2_YOg",
150
+ "pov": {
151
+ "heading": -73.14999999999998,
152
+ "pitch": 5,
153
+ "zoom": 0.9999999999999997
154
+ },
155
+ "url_slug": "-54.009988_-67.680341_-73.15_-5.00_0",
156
+ "thumbnail_path": "1929ea7c-af27-42d0-9931-66d5ad451d21.jpg"
157
+ },
158
+ {
159
+ "id": "7bc2a39e-ac61-4704-a950-203117b4aca2",
160
+ "timestamp": "2025-08-07T12:41:14.089628",
161
+ "lat": 42.44808280064812,
162
+ "lng": 1.4936480624654318,
163
+ "address": "Cam\u00ed els Hortells, Sant Juli\u00e0 de L\u00f2ria",
164
+ "pano_id": "kqVCVi1mPVjLpeHMyN_BPQ",
165
+ "pov": {
166
+ "heading": 111.85000000000002,
167
+ "pitch": 5,
168
+ "zoom": 0.9999999999999997
169
+ },
170
+ "url_slug": "42.448083_1.493648_111.85_-5.00_0",
171
+ "thumbnail_path": "7bc2a39e-ac61-4704-a950-203117b4aca2.jpg"
172
+ },
173
+ {
174
+ "id": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8",
175
+ "timestamp": "2025-08-07T12:41:17.644882",
176
+ "lat": 23.57625889505424,
177
+ "lng": 120.55489844246863,
178
+ "address": "Chiayi County",
179
+ "pano_id": "25efsk04WLxb8UuEELh0fQ",
180
+ "pov": {
181
+ "heading": 258.85,
182
+ "pitch": 5,
183
+ "zoom": 0.9999999999999997
184
+ },
185
+ "url_slug": "23.576259_120.554898_258.85_-5.00_0",
186
+ "thumbnail_path": "a77120f7-f65b-4ea4-8419-4c2f599c2ed8.jpg"
187
+ },
188
+ {
189
+ "id": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c",
190
+ "timestamp": "2025-08-07T12:41:21.174601",
191
+ "lat": 49.76842154708744,
192
+ "lng": 6.236529746429928,
193
+ "address": "12 Regioun, Heffingen, Mersch",
194
+ "pano_id": "VLotZqwpyqKwg2D1uRhZLA",
195
+ "pov": {
196
+ "heading": -243.14999999999998,
197
+ "pitch": 5,
198
+ "zoom": 0.9999999999999997
199
+ },
200
+ "url_slug": "49.768422_6.236530_-243.15_-5.00_0",
201
+ "thumbnail_path": "bf12b96e-5ee7-4815-bc1e-2ef6ccaf3b5c.jpg"
202
+ },
203
+ {
204
+ "id": "6a5589de-e1fb-46c4-96c3-85cfb161444e",
205
+ "timestamp": "2025-08-07T12:41:24.747595",
206
+ "lat": 49.78642391720587,
207
+ "lng": 6.199481729741201,
208
+ "address": "CR118, Larochette, Mersch",
209
+ "pano_id": "JqZpePEOedyFAYtwUC786g",
210
+ "pov": {
211
+ "heading": -104.14999999999998,
212
+ "pitch": 5,
213
+ "zoom": 0.9999999999999997
214
+ },
215
+ "url_slug": "49.786424_6.199482_-104.15_-5.00_0",
216
+ "thumbnail_path": "6a5589de-e1fb-46c4-96c3-85cfb161444e.jpg"
217
+ },
218
+ {
219
+ "id": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25",
220
+ "timestamp": "2025-08-07T12:41:28.269300",
221
+ "lat": 34.0994193037527,
222
+ "lng": 131.95163614377708,
223
+ "address": "\u770c\u9053140\u53f7, Shunan, Yamaguchi",
224
+ "pano_id": "6HXD7J5jRsnBD5_KjCTSPw",
225
+ "pov": {
226
+ "heading": 129.85000000000002,
227
+ "pitch": 5,
228
+ "zoom": 0.9999999999999997
229
+ },
230
+ "url_slug": "34.099419_131.951636_129.85_-5.00_0",
231
+ "thumbnail_path": "9c9465d9-5bfa-48c0-8fa4-f4a1c7dd0c25.jpg"
232
+ },
233
+ {
234
+ "id": "3f0e8c12-109e-4db7-a228-52a156ca880d",
235
+ "timestamp": "2025-08-07T12:41:32.781435",
236
+ "lat": 14.7694212956585,
237
+ "lng": -16.695508234038147,
238
+ "address": "Khombole, Thi\u00e8s Region",
239
+ "pano_id": "BjJ0cU8LxMFDFJD3vj5YYQ",
240
+ "pov": {
241
+ "heading": 241.85000000000002,
242
+ "pitch": 5,
243
+ "zoom": 0.9999999999999997
244
+ },
245
+ "url_slug": "14.769421_-16.695508_241.85_-5.00_0",
246
+ "thumbnail_path": "3f0e8c12-109e-4db7-a228-52a156ca880d.jpg"
247
+ },
248
+ {
249
+ "id": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772",
250
+ "timestamp": "2025-08-07T12:41:36.390462",
251
+ "lat": 49.075733818467846,
252
+ "lng": 19.306522463680235,
253
+ "address": "32 J\u00e1na Jan\u010deka, Ru\u017eomberok, \u017dilina Region",
254
+ "pano_id": "4EOqYYxwF--FflZHgPGu2Q",
255
+ "pov": {
256
+ "heading": -233.14999999999998,
257
+ "pitch": 5,
258
+ "zoom": 0.9999999999999997
259
+ },
260
+ "url_slug": "49.075734_19.306522_-233.15_-5.00_0",
261
+ "thumbnail_path": "a76f6ed2-5bb0-4750-bfd0-5a01fa052772.jpg"
262
+ },
263
+ {
264
+ "id": "a6f20438-972f-48b0-8dc6-e95baec1c8c2",
265
+ "timestamp": "2025-08-07T12:41:39.931459",
266
+ "lat": 32.28733167935287,
267
+ "lng": -64.77638248243588,
268
+ "address": "23 Lovers Ln, Paget Parish",
269
+ "pano_id": "ZHTVVVlJPR35oUPiShnqHw",
270
+ "pov": {
271
+ "heading": -113.14999999999998,
272
+ "pitch": 5,
273
+ "zoom": 0.9999999999999997
274
+ },
275
+ "url_slug": "32.287332_-64.776382_-113.15_-5.00_0",
276
+ "thumbnail_path": "a6f20438-972f-48b0-8dc6-e95baec1c8c2.jpg"
277
+ },
278
+ {
279
+ "id": "4ee65f3b-aeaa-49d1-abda-28e270cca142",
280
+ "timestamp": "2025-08-07T12:41:43.438726",
281
+ "lat": 41.49319498028777,
282
+ "lng": 21.92920765772765,
283
+ "address": "Municipality of Rosoman",
284
+ "pano_id": "NcxnpDckFi3vt4-ntoF44A",
285
+ "pov": {
286
+ "heading": 99.85000000000002,
287
+ "pitch": 5,
288
+ "zoom": 0.9999999999999997
289
+ },
290
+ "url_slug": "41.493195_21.929208_99.85_-5.00_0",
291
+ "thumbnail_path": "4ee65f3b-aeaa-49d1-abda-28e270cca142.jpg"
292
+ },
293
+ {
294
+ "id": "3933f509-49f4-413f-b32d-95398910b3b6",
295
+ "timestamp": "2025-08-07T12:41:47.006438",
296
+ "lat": 31.875513355699223,
297
+ "lng": 35.492798274434385,
298
+ "address": "Green jericho",
299
+ "pano_id": "i9EnnjI_H0LQZ80DD8caeQ",
300
+ "pov": {
301
+ "heading": 328.85,
302
+ "pitch": 5,
303
+ "zoom": 0.9999999999999997
304
+ },
305
+ "url_slug": "31.875513_35.492798_328.85_-5.00_0",
306
+ "thumbnail_path": "3933f509-49f4-413f-b32d-95398910b3b6.jpg"
307
+ },
308
+ {
309
+ "id": "e32c0681-97bc-440e-9d8e-c1cb9511d47d",
310
+ "timestamp": "2025-08-07T12:41:50.873515",
311
+ "lat": 55.115320287969766,
312
+ "lng": 26.163976401890817,
313
+ "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
314
+ "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
315
+ "pov": {
316
+ "heading": -192.14999999999998,
317
+ "pitch": 5,
318
+ "zoom": 0.9999999999999997
319
+ },
320
+ "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
321
+ "thumbnail_path": "e32c0681-97bc-440e-9d8e-c1cb9511d47d.jpg"
322
+ },
323
+ {
324
+ "id": "15861215-f932-426b-a6fa-08ae0cd5ae54",
325
+ "timestamp": "2025-08-07T12:41:54.439626",
326
+ "lat": 55.115320287969766,
327
+ "lng": 26.163976401890817,
328
+ "address": "128 Str\u016bnai\u010dio g., \u0160ven\u010dionys, Vilnius County",
329
+ "pano_id": "kN6UgL1Chn6ffNKK7wQmxA",
330
+ "pov": {
331
+ "heading": -192.14999999999998,
332
+ "pitch": 5,
333
+ "zoom": 0.9999999999999997
334
+ },
335
+ "url_slug": "55.115320_26.163976_-192.15_-5.00_0",
336
+ "thumbnail_path": "15861215-f932-426b-a6fa-08ae0cd5ae54.jpg"
337
+ },
338
+ {
339
+ "id": "9a6c5a97-8501-489d-bade-f07bbcbebeea",
340
+ "timestamp": "2025-08-07T12:42:01.229172",
341
+ "lat": 40.13741279140719,
342
+ "lng": 19.645404417111592,
343
+ "address": "Dh\u00ebrmiu Beach",
344
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJRHFqYW42YWc.",
345
+ "pov": {
346
+ "heading": 137.85000000000002,
347
+ "pitch": 5,
348
+ "zoom": 0.9999999999999997
349
+ },
350
+ "url_slug": "40.137413_19.645404_137.85_-5.00_0",
351
+ "thumbnail_path": "9a6c5a97-8501-489d-bade-f07bbcbebeea.jpg"
352
+ },
353
+ {
354
+ "id": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff",
355
+ "timestamp": "2025-08-07T12:42:04.457716",
356
+ "lat": 29.18167200058433,
357
+ "lng": -95.43500220590631,
358
+ "address": "Angleton, Texas",
359
+ "pano_id": "IayHlQ-Wr58p-_kVKSK1ug",
360
+ "pov": {
361
+ "heading": 270.85,
362
+ "pitch": 5,
363
+ "zoom": 0.9999999999999997
364
+ },
365
+ "url_slug": "29.181672_-95.435002_270.85_-5.00_0",
366
+ "thumbnail_path": "cbbab275-9be4-4d3a-b077-45ae1f8d14ff.jpg"
367
+ },
368
+ {
369
+ "id": "011c76d0-d1cf-40f0-b243-3593448bce84",
370
+ "timestamp": "2025-08-07T12:42:07.779631",
371
+ "lat": 12.226344673460268,
372
+ "lng": 122.02353179975576,
373
+ "address": "Junction Guinhayaan - Malbog Port Rd, Looc, MIMAROPA",
374
+ "pano_id": "tAnV4HzcEaJ5IAm2Jgegiw",
375
+ "pov": {
376
+ "heading": -253.14999999999998,
377
+ "pitch": 5,
378
+ "zoom": 0.9999999999999997
379
+ },
380
+ "url_slug": "12.226345_122.023532_-253.15_-5.00_0",
381
+ "thumbnail_path": "011c76d0-d1cf-40f0-b243-3593448bce84.jpg"
382
+ },
383
+ {
384
+ "id": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86",
385
+ "timestamp": "2025-08-07T12:42:12.220880",
386
+ "lat": 34.062066594180294,
387
+ "lng": 133.86624813436472,
388
+ "address": "Tokushima Prefectural Rd No. 4, Higashimiyoshi, Tokushima",
389
+ "pano_id": "5Tp9jW_NWLnaKB_3NTeQSw",
390
+ "pov": {
391
+ "heading": -106.14999999999998,
392
+ "pitch": 5,
393
+ "zoom": 0.9999999999999997
394
+ },
395
+ "url_slug": "34.062067_133.866248_-106.15_-5.00_0",
396
+ "thumbnail_path": "a16553c1-8b4a-44f0-9d6d-9c23b1b93c86.jpg"
397
+ },
398
+ {
399
+ "id": "0246f9d3-be8d-40f0-805e-d0446ef2d183",
400
+ "timestamp": "2025-08-07T12:42:15.744386",
401
+ "lat": -41.21734957722994,
402
+ "lng": 172.11284555729617,
403
+ "address": "302 Rte 67, Karamea, West Coast Region",
404
+ "pano_id": "dcA7I3Arr0VPwKwgMxX_mQ",
405
+ "pov": {
406
+ "heading": 23.850000000000023,
407
+ "pitch": 5,
408
+ "zoom": 0.9999999999999997
409
+ },
410
+ "url_slug": "-41.217350_172.112846_23.85_-5.00_0",
411
+ "thumbnail_path": "0246f9d3-be8d-40f0-805e-d0446ef2d183.jpg"
412
+ },
413
+ {
414
+ "id": "54375156-8b78-4e60-afc9-f1172deba69d",
415
+ "timestamp": "2025-08-07T12:42:19.157383",
416
+ "lat": 46.10532360891025,
417
+ "lng": 15.119329939077309,
418
+ "address": "Podkraj, Podkraj, Municipality of Hrastnik",
419
+ "pano_id": "4bdhb8F41Au_r8UJIG8nCQ",
420
+ "pov": {
421
+ "heading": 204.85000000000002,
422
+ "pitch": 5,
423
+ "zoom": 0.9999999999999997
424
+ },
425
+ "url_slug": "46.105324_15.119330_204.85_-5.00_0",
426
+ "thumbnail_path": "54375156-8b78-4e60-afc9-f1172deba69d.jpg"
427
+ },
428
+ {
429
+ "id": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27",
430
+ "timestamp": "2025-08-07T12:42:22.677283",
431
+ "lat": 44.370875416206346,
432
+ "lng": 5.1514140758707585,
433
+ "address": "1450 Les Fonts, Nyons, Auvergne-Rh\u00f4ne-Alpes",
434
+ "pano_id": "30HH_X24i7QOn6dILzYoKw",
435
+ "pov": {
436
+ "heading": -320.15,
437
+ "pitch": 5,
438
+ "zoom": 0.9999999999999997
439
+ },
440
+ "url_slug": "44.370875_5.151414_-320.15_-5.00_0",
441
+ "thumbnail_path": "4fa45765-4ce7-4adc-a4fb-7f54149d6f27.jpg"
442
+ },
443
+ {
444
+ "id": "08ef293d-2894-489f-b77f-377115c75921",
445
+ "timestamp": "2025-08-07T12:42:26.245168",
446
+ "lat": -19.541637267698466,
447
+ "lng": -63.55863586071773,
448
+ "address": "9, Santa Cruz Department",
449
+ "pano_id": "FmZr6VYcfqf_qwztM0cJ0g",
450
+ "pov": {
451
+ "heading": -125.14999999999998,
452
+ "pitch": 5,
453
+ "zoom": 0.9999999999999997
454
+ },
455
+ "url_slug": "-19.541637_-63.558636_-125.15_-5.00_0",
456
+ "thumbnail_path": "08ef293d-2894-489f-b77f-377115c75921.jpg"
457
+ },
458
+ {
459
+ "id": "8ff247f4-efdf-47e8-8aab-7752f7a7a033",
460
+ "timestamp": "2025-08-07T12:42:30.212571",
461
+ "lat": 25.60987433301616,
462
+ "lng": 55.754304628080014,
463
+ "address": "Al Alyaah St, Al Raafah, Emirate of Umm Al Quwain",
464
+ "pano_id": "3lt-n3rOsbk3GkZ3CiuMKQ",
465
+ "pov": {
466
+ "heading": 63.85000000000002,
467
+ "pitch": 5,
468
+ "zoom": 0.9999999999999997
469
+ },
470
+ "url_slug": "25.609874_55.754305_63.85_-5.00_0",
471
+ "thumbnail_path": "8ff247f4-efdf-47e8-8aab-7752f7a7a033.jpg"
472
+ },
473
+ {
474
+ "id": "41aa250b-f476-4c47-a8b3-1b170f892039",
475
+ "timestamp": "2025-08-07T12:42:33.795863",
476
+ "lat": 32.28525162497046,
477
+ "lng": -64.78725425926685,
478
+ "address": "Hodson's Ferry",
479
+ "pano_id": "CAoSF0NJSE0wb2dLRUlDQWdJQ0UzYkhPalFF",
480
+ "pov": {
481
+ "heading": 314.85,
482
+ "pitch": 5,
483
+ "zoom": 0.9999999999999997
484
+ },
485
+ "url_slug": "32.285252_-64.787254_314.85_-5.00_0",
486
+ "thumbnail_path": "41aa250b-f476-4c47-a8b3-1b170f892039.jpg"
487
+ },
488
+ {
489
+ "id": "1fc918f8-1b83-4aeb-a785-22a3cd15a407",
490
+ "timestamp": "2025-08-07T12:42:37.657812",
491
+ "lat": 45.888594934068315,
492
+ "lng": 16.65941553063258,
493
+ "address": "Bol\u010d, Zagreb County",
494
+ "pano_id": "EwgAJqZjebsU51bDLswlGg",
495
+ "pov": {
496
+ "heading": -201.14999999999998,
497
+ "pitch": 5,
498
+ "zoom": 0.9999999999999997
499
+ },
500
+ "url_slug": "45.888595_16.659416_-201.15_-5.00_0",
501
+ "thumbnail_path": "1fc918f8-1b83-4aeb-a785-22a3cd15a407.jpg"
502
+ },
503
+ {
504
+ "id": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83",
505
+ "timestamp": "2025-08-07T12:42:41.528253",
506
+ "lat": 68.72859088427079,
507
+ "lng": 16.900531665561935,
508
+ "address": "Fv848, Troms",
509
+ "pano_id": "HUtqcc4YFuJA6EGOvLaOSg",
510
+ "pov": {
511
+ "heading": -33.14999999999998,
512
+ "pitch": 5,
513
+ "zoom": 0.9999999999999997
514
+ },
515
+ "url_slug": "68.728591_16.900532_-33.15_-5.00_0",
516
+ "thumbnail_path": "6186abe6-6343-41bd-b7c6-ef65e5fb5a83.jpg"
517
+ },
518
+ {
519
+ "id": "f9d01601-da06-4286-b83f-aad48292ef56",
520
+ "timestamp": "2025-08-07T12:42:45.046606",
521
+ "lat": 56.718393213855904,
522
+ "lng": 25.74434588961816,
523
+ "address": "Aizkraukle Municipality",
524
+ "pano_id": "lFBbYokbq5Azj-WuXKkAww",
525
+ "pov": {
526
+ "heading": 112.85000000000002,
527
+ "pitch": 5,
528
+ "zoom": 0.9999999999999997
529
+ },
530
+ "url_slug": "56.718393_25.744346_112.85_-5.00_0",
531
+ "thumbnail_path": "f9d01601-da06-4286-b83f-aad48292ef56.jpg"
532
+ },
533
+ {
534
+ "id": "29521be4-0c47-40b4-9fe5-14dd37686eed",
535
+ "timestamp": "2025-08-07T12:42:48.525979",
536
+ "lat": 38.22079108487478,
537
+ "lng": -1.0621034114314583,
538
+ "address": "MU-412, Abanilla, Region of Murcia",
539
+ "pano_id": "YoaYr1t8aZ65kAKY_xoa4Q",
540
+ "pov": {
541
+ "heading": 338.85,
542
+ "pitch": 5,
543
+ "zoom": 0.9999999999999997
544
+ },
545
+ "url_slug": "38.220791_-1.062103_338.85_-5.00_0",
546
+ "thumbnail_path": "29521be4-0c47-40b4-9fe5-14dd37686eed.jpg"
547
+ },
548
+ {
549
+ "id": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036",
550
+ "timestamp": "2025-08-07T12:42:52.036523",
551
+ "lat": 14.586380510782684,
552
+ "lng": -91.12495671396474,
553
+ "address": "RN-11, Patulul, Solol\u00e1 Department",
554
+ "pano_id": "e83Ymkc4WsPjYZSXQSkhlQ",
555
+ "pov": {
556
+ "heading": -249.14999999999998,
557
+ "pitch": 5,
558
+ "zoom": 0.9999999999999997
559
+ },
560
+ "url_slug": "14.586381_-91.124957_-249.15_-5.00_0",
561
+ "thumbnail_path": "cf0e39c0-67b7-4d72-a51b-fa006fa8e036.jpg"
562
+ },
563
+ {
564
+ "id": "0049770c-0e79-4f6e-a230-85815c5afca4",
565
+ "timestamp": "2025-08-07T12:42:55.545371",
566
+ "lat": 41.24761837711202,
567
+ "lng": 19.900912328789897,
568
+ "address": "SH3, B\u00ebrzhit\u00eb, Tirana County",
569
+ "pano_id": "H4OtJUEIjqNM4h3b3zJiog",
570
+ "pov": {
571
+ "heading": -38.14999999999998,
572
+ "pitch": 5,
573
+ "zoom": 0.9999999999999997
574
+ },
575
+ "url_slug": "41.247618_19.900912_-38.15_-5.00_0",
576
+ "thumbnail_path": "0049770c-0e79-4f6e-a230-85815c5afca4.jpg"
577
+ },
578
+ {
579
+ "id": "108d3530-8cd1-4554-9e27-f4161c25b64f",
580
+ "timestamp": "2025-08-07T12:42:59.039576",
581
+ "lat": 23.106680960105503,
582
+ "lng": 120.31480234033475,
583
+ "address": "Tainan City",
584
+ "pano_id": "wPKvYXSO2t3Cjb9d_92vbQ",
585
+ "pov": {
586
+ "heading": 177.85000000000002,
587
+ "pitch": 5,
588
+ "zoom": 0.9999999999999997
589
+ },
590
+ "url_slug": "23.106681_120.314802_177.85_-5.00_0",
591
+ "thumbnail_path": "108d3530-8cd1-4554-9e27-f4161c25b64f.jpg"
592
+ },
593
+ {
594
+ "id": "684589c2-db98-4fa0-a909-26677d622781",
595
+ "timestamp": "2025-08-07T12:43:02.607203",
596
+ "lat": 24.280060413908377,
597
+ "lng": 91.40645644538027,
598
+ "address": "Sylhet Division",
599
+ "pano_id": "vnxugWDu7BvOIQKU2pGreQ",
600
+ "pov": {
601
+ "heading": 289.85,
602
+ "pitch": 5,
603
+ "zoom": 0.9999999999999997
604
+ },
605
+ "url_slug": "24.280060_91.406456_289.85_-5.00_0",
606
+ "thumbnail_path": "684589c2-db98-4fa0-a909-26677d622781.jpg"
607
+ },
608
+ {
609
+ "id": "9e52e1ef-b7c8-4290-a50c-dea42684329c",
610
+ "timestamp": "2025-08-07T12:43:06.097012",
611
+ "lat": 23.075670254787028,
612
+ "lng": 120.16583641147342,
613
+ "address": "Tainan City",
614
+ "pano_id": "KT8dvKAlDqRIWqXVig9tRA",
615
+ "pov": {
616
+ "heading": -217.14999999999998,
617
+ "pitch": 5,
618
+ "zoom": 0.9999999999999997
619
+ },
620
+ "url_slug": "23.075670_120.165836_-217.15_-5.00_0",
621
+ "thumbnail_path": "9e52e1ef-b7c8-4290-a50c-dea42684329c.jpg"
622
+ },
623
+ {
624
+ "id": "54ccc34f-ae30-449b-83cf-3f6485186e38",
625
+ "timestamp": "2025-08-07T12:43:09.571839",
626
+ "lat": 16.069303835253045,
627
+ "lng": -13.917845261546633,
628
+ "address": "N2, Saint-Louis Region",
629
+ "pano_id": "AOhMIvzxsCcRhsHw2BVUzA",
630
+ "pov": {
631
+ "heading": -106.14999999999998,
632
+ "pitch": 5,
633
+ "zoom": 0.9999999999999997
634
+ },
635
+ "url_slug": "16.069304_-13.917845_-106.15_-5.00_0",
636
+ "thumbnail_path": "54ccc34f-ae30-449b-83cf-3f6485186e38.jpg"
637
+ },
638
+ {
639
+ "id": "9903bb23-294e-44a2-9ecf-180808b82d67",
640
+ "timestamp": "2025-08-07T12:43:12.991252",
641
+ "lat": -32.83743900844668,
642
+ "lng": -70.95213519080639,
643
+ "address": "218 Capit\u00e1n Avalos, Llay-Llay, Valpara\u00edso",
644
+ "pano_id": "xNJYW4PSgzGV2TEqMpEBpA",
645
+ "pov": {
646
+ "heading": 68.85000000000002,
647
+ "pitch": 5,
648
+ "zoom": 0.9999999999999997
649
+ },
650
+ "url_slug": "-32.837439_-70.952135_68.85_-5.00_0",
651
+ "thumbnail_path": "9903bb23-294e-44a2-9ecf-180808b82d67.jpg"
652
+ },
653
+ {
654
+ "id": "4381807b-d04c-4c04-8b93-78a588016cb7",
655
+ "timestamp": "2025-08-07T12:43:16.523957",
656
+ "lat": 4.5400338406517715,
657
+ "lng": -76.1944593680759,
658
+ "address": "El Dovio-Versalles, Valle del Cauca",
659
+ "pano_id": "wgWdWsvikF8kFmi_FZVstg",
660
+ "pov": {
661
+ "heading": 333.85,
662
+ "pitch": 5,
663
+ "zoom": 0.9999999999999997
664
+ },
665
+ "url_slug": "4.540034_-76.194459_333.85_-5.00_0",
666
+ "thumbnail_path": "4381807b-d04c-4c04-8b93-78a588016cb7.jpg"
667
+ },
668
+ {
669
+ "id": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a",
670
+ "timestamp": "2025-08-07T12:43:20.060957",
671
+ "lat": 31.65645279027197,
672
+ "lng": 34.9414288862752,
673
+ "address": "Nir Louk",
674
+ "pano_id": "CAoSFkNJSE0wb2dLRUlDQWdJQzZqcWJXQ2c.",
675
+ "pov": {
676
+ "heading": -248.14999999999998,
677
+ "pitch": 5,
678
+ "zoom": 0.9999999999999997
679
+ },
680
+ "url_slug": "31.656453_34.941429_-248.15_-5.00_0",
681
+ "thumbnail_path": "c9d4d2c0-be12-4104-9fdf-3ffd7b9b539a.jpg"
682
+ },
683
+ {
684
+ "id": "574ac51d-1de1-46b2-9f90-5b1da1d79339",
685
+ "timestamp": "2025-08-07T12:43:23.601528",
686
+ "lat": 5.90176654207688,
687
+ "lng": 0.9886556847260388,
688
+ "address": "Keta, Volta Region",
689
+ "pano_id": "ipwTobbIbpx2SEjFzq6kww",
690
+ "pov": {
691
+ "heading": -146.14999999999998,
692
+ "pitch": 5,
693
+ "zoom": 0.9999999999999997
694
+ },
695
+ "url_slug": "5.901767_0.988656_-146.15_-5.00_0",
696
+ "thumbnail_path": "574ac51d-1de1-46b2-9f90-5b1da1d79339.jpg"
697
+ },
698
+ {
699
+ "id": "87e095f0-467b-4539-978b-46eecfdf1efc",
700
+ "timestamp": "2025-08-07T12:43:27.067655",
701
+ "lat": 46.29179908449921,
702
+ "lng": 16.580906762551983,
703
+ "address": "Komarnica Ludbre\u0161ka, Vara\u017edin County",
704
+ "pano_id": "ha0KsxP_lG1phxES1aSmGQ",
705
+ "pov": {
706
+ "heading": 111.85000000000002,
707
+ "pitch": 5,
708
+ "zoom": 0.9999999999999997
709
+ },
710
+ "url_slug": "46.291799_16.580907_111.85_-5.00_0",
711
+ "thumbnail_path": "87e095f0-467b-4539-978b-46eecfdf1efc.jpg"
712
+ },
713
+ {
714
+ "id": "3badb1cb-5ffb-4c07-812e-ee85646a4279",
715
+ "timestamp": "2025-08-07T12:43:30.517183",
716
+ "lat": 43.891541352607554,
717
+ "lng": 5.774287870706945,
718
+ "address": "Dauphin, Provence-Alpes-C\u00f4te d'Azur",
719
+ "pano_id": "b9cJ5iGIYH2JHWrRmDDSFg",
720
+ "pov": {
721
+ "heading": -351.15,
722
+ "pitch": 5,
723
+ "zoom": 0.9999999999999997
724
+ },
725
+ "url_slug": "43.891541_5.774288_-351.15_-5.00_0",
726
+ "thumbnail_path": "3badb1cb-5ffb-4c07-812e-ee85646a4279.jpg"
727
+ },
728
+ {
729
+ "id": "1acb3834-1f22-4c0c-8cd3-b992e4546f88",
730
+ "timestamp": "2025-08-07T12:43:34.052597",
731
+ "lat": 20.805812868893106,
732
+ "lng": -89.6933791766117,
733
+ "address": "Hotzuc, Yucatan",
734
+ "pano_id": "ShCiTFG-KoqkokeXeCyG2w",
735
+ "pov": {
736
+ "heading": -236.14999999999998,
737
+ "pitch": 5,
738
+ "zoom": 0.9999999999999997
739
+ },
740
+ "url_slug": "20.805813_-89.693379_-236.15_-5.00_0",
741
+ "thumbnail_path": "1acb3834-1f22-4c0c-8cd3-b992e4546f88.jpg"
742
+ },
743
+ {
744
+ "id": "8dacb066-8fa4-4f03-87e3-34d86f5863fb",
745
+ "timestamp": "2025-08-07T12:43:37.674750",
746
+ "lat": 47.974591513902844,
747
+ "lng": 108.47688185828954,
748
+ "address": "Baganuur-Mengenmorit, Mungunmorit, T\u00f6v, Mongolia",
749
+ "pano_id": "V0i3_HH4f4IM9hTEg0QRqg",
750
+ "pov": {
751
+ "heading": 13.850000000000023,
752
+ "pitch": 5,
753
+ "zoom": 0.9999999999999997
754
+ },
755
+ "url_slug": "47.974592_108.476882_13.85_-5.00_0",
756
+ "thumbnail_path": "8dacb066-8fa4-4f03-87e3-34d86f5863fb.jpg"
757
+ }
758
+ ]
759
+ }
experiment_runner.py DELETED
File without changes
geo_bot.py CHANGED
@@ -359,7 +359,7 @@ class GeoBot:
359
  "reasoning": "Recovery due to parsing failure or model error.",
360
  "action_details": {"action": "PAN_RIGHT"},
361
  "current_prediction": "N/A",
362
- "debug_message": f"{response.content.strip()}",
363
  }
364
 
365
  return decision
@@ -398,7 +398,7 @@ class GeoBot:
398
  image=Image.open(BytesIO(screenshot_bytes))
399
  )
400
  available_actions = self.controller.get_test_available_actions()
401
- print(f"Available actions: {available_actions}")
402
 
403
 
404
  # Normal step execution
@@ -424,15 +424,30 @@ class GeoBot:
424
 
425
  action_details = decision.get("action_details", {})
426
  action = action_details.get("action")
427
- print(f"AI Reasoning: {decision.get('reasoning', 'N/A')}")
428
- print(f"AI Current Prediction: {decision.get('current_prediction', 'N/A')}")
429
- print(f"AI Action: {action}")
430
 
431
 
432
  # Add step to history AFTER callback (so next iteration has this step in history)
433
  self.add_step_to_history(history, current_screenshot_b64, decision)
434
 
435
- predictions.append(decision.get("current_prediction", "N/A"))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
436
  self.execute_action(action)
437
 
438
  return predictions
 
359
  "reasoning": "Recovery due to parsing failure or model error.",
360
  "action_details": {"action": "PAN_RIGHT"},
361
  "current_prediction": "N/A",
362
+ "debug_message": f"{response.content.strip() if response is not None else 'N/A'}",
363
  }
364
 
365
  return decision
 
398
  image=Image.open(BytesIO(screenshot_bytes))
399
  )
400
  available_actions = self.controller.get_test_available_actions()
401
+ # print(f"Available actions: {available_actions}")
402
 
403
 
404
  # Normal step execution
 
424
 
425
  action_details = decision.get("action_details", {})
426
  action = action_details.get("action")
427
+ # print(f"AI Reasoning: {decision.get('reasoning', 'N/A')}")
428
+ # print(f"AI Current Prediction: {decision.get('current_prediction', 'N/A')}")
429
+ # print(f"AI Action: {action}")
430
 
431
 
432
  # Add step to history AFTER callback (so next iteration has this step in history)
433
  self.add_step_to_history(history, current_screenshot_b64, decision)
434
 
435
+ current_prediction = decision.get("current_prediction")
436
+ if current_prediction and isinstance(current_prediction, dict):
437
+ current_prediction["reasoning"] = decision.get("reasoning", "N/A")
438
+ predictions.append(current_prediction)
439
+ else:
440
+ # Fallback: create a basic prediction structure
441
+ print(f"Invalid current prediction: {current_prediction}")
442
+ fallback_prediction = {
443
+ "lat": 0.0,
444
+ "lon": 0.0,
445
+ "confidence": 0.0,
446
+ "location_description": "N/A",
447
+ "reasoning": decision.get("reasoning", "N/A")
448
+ }
449
+ predictions.append(fallback_prediction)
450
+
451
  self.execute_action(action)
452
 
453
  return predictions
main.py CHANGED
@@ -1,11 +1,15 @@
1
  import argparse
2
  import json
 
 
3
 
4
  from geo_bot import GeoBot
5
  from benchmark import MapGuesserBenchmark
6
  from data_collector import DataCollector
7
  from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
8
-
 
 
9
 
10
  def agent_mode(
11
  model_name: str,
@@ -147,11 +151,165 @@ def collect_mode(dataset_name: str, samples: int, headless: bool):
147
  print(f"Data collection complete for dataset '{dataset_name}'.")
148
 
149
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
150
  def main():
151
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
152
  parser.add_argument(
153
  "--mode",
154
- choices=["agent", "benchmark", "collect"],
155
  default="agent",
156
  help="Operation mode.",
157
  )
@@ -190,6 +348,7 @@ def main():
190
  default=0.0,
191
  help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
192
  )
 
193
 
194
  args = parser.parse_args()
195
 
@@ -216,6 +375,16 @@ def main():
216
  dataset_name=args.dataset,
217
  temperature=args.temperature,
218
  )
 
 
 
 
 
 
 
 
 
 
219
 
220
 
221
  if __name__ == "__main__":
 
1
  import argparse
2
  import json
3
+ import os
4
+ from datetime import datetime
5
 
6
  from geo_bot import GeoBot
7
  from benchmark import MapGuesserBenchmark
8
  from data_collector import DataCollector
9
  from config import MODELS_CONFIG, get_data_paths, SUCCESS_THRESHOLD_KM, get_model_class
10
+ from collections import OrderedDict
11
+ from tqdm import tqdm
12
+ import matplotlib.pyplot as plt
13
 
14
  def agent_mode(
15
  model_name: str,
 
151
  print(f"Data collection complete for dataset '{dataset_name}'.")
152
 
153
 
154
+ def test_mode(
155
+ models: list,
156
+ samples: int,
157
+ runs: int,
158
+ steps: int,
159
+ dataset_name: str = "default",
160
+ temperature: float = 0.0,
161
+ headless: bool = True,
162
+ ):
163
+ """
164
+ CLI multi-model / multi-run benchmark.
165
+ For each model:
166
+ • run N times
167
+ • each run evaluates `samples` images
168
+ • record hit-rate per step and average distance
169
+ """
170
+
171
+ # ---------- load dataset ----------
172
+ data_paths = get_data_paths(dataset_name)
173
+ try:
174
+ with open(data_paths["golden_labels"], "r", encoding="utf-8") as f:
175
+ all_samples = json.load(f)["samples"]
176
+ except FileNotFoundError:
177
+ print(f"❌ dataset '{dataset_name}' not found.")
178
+ return
179
+
180
+ if not all_samples:
181
+ print("❌ dataset is empty.")
182
+ return
183
+
184
+ test_samples = all_samples[:samples]
185
+ print(f"📊 loaded {len(test_samples)} samples from '{dataset_name}'")
186
+
187
+ benchmark_helper = MapGuesserBenchmark(dataset_name=dataset_name, headless=headless)
188
+ summary_by_step: dict[str, list[float]] = OrderedDict()
189
+ avg_distances: dict[str, float] = {}
190
+
191
+ time_tag = datetime.now().strftime("%Y%m%d_%H%M%S")
192
+ base_dir = os.path.join("./results", "test", time_tag)
193
+ os.makedirs(base_dir, exist_ok=True)
194
+ # ---------- iterate over models ----------
195
+ for model_name in models:
196
+ log_json={}
197
+ print(f"\n===== {model_name} =====")
198
+ cfg = MODELS_CONFIG[model_name]
199
+ model_cls = get_model_class(cfg["class"])
200
+
201
+ hits_per_step = [0] * steps
202
+ distance_per_step = [0.0] * steps
203
+ total_iterations = runs * len(test_samples)
204
+
205
+ with tqdm(total=total_iterations, desc=model_name) as pbar:
206
+ for _ in range(runs):
207
+ with GeoBot(
208
+ model=model_cls,
209
+ model_name=cfg["model_name"],
210
+ headless=headless,
211
+ temperature=temperature,
212
+ ) as bot:
213
+ for sample in test_samples:
214
+ if not bot.controller.load_location_from_data(sample):
215
+ pbar.update(1)
216
+ continue
217
+
218
+ preds = bot.test_run_agent_loop(max_steps=steps)
219
+ gt = {"lat": sample["lat"], "lng": sample["lng"]}
220
+ if sample["id"] not in log_json:
221
+ log_json[sample["id"]] = []
222
+
223
+ for idx, pred in enumerate(preds):
224
+
225
+ if isinstance(pred, dict) and "lat" in pred:
226
+ dist = benchmark_helper.calculate_distance(
227
+ gt, (pred["lat"], pred["lon"])
228
+ )
229
+ if dist is not None:
230
+ distance_per_step[idx] += dist
231
+ preds[idx]["distance"] = dist
232
+ if dist <= SUCCESS_THRESHOLD_KM:
233
+ hits_per_step[idx] += 1
234
+ preds[idx]["success"] = True
235
+ else:
236
+ preds[idx]["success"] = False
237
+ log_json[sample["id"]].append({
238
+ "run_id": _,
239
+ "predictions": preds,
240
+ })
241
+ pbar.update(1)
242
+ os.makedirs(f"{base_dir}/{model_name}", exist_ok=True)
243
+ with open(f"{base_dir}/{model_name}/{model_name}_log.json", "w") as f:
244
+ json.dump(log_json, f, indent=2)
245
+ denom = runs * len(test_samples)
246
+ summary_by_step[model_name] = [h / denom for h in hits_per_step]
247
+ avg_distances[model_name] = [d / denom for d in distance_per_step]
248
+ payload = {
249
+ "avg_distance_km": avg_distances[model_name],
250
+ "accuracy_per_step": summary_by_step[model_name]
251
+ }
252
+ with open(f"{base_dir}/{model_name}/{model_name}.json", "w") as f:
253
+ json.dump(payload, f, indent=2)
254
+ print(f"💾 results saved to {base_dir}")
255
+
256
+ # ---------- pretty table ----------
257
+ header = ["Step"] + list(summary_by_step.keys())
258
+ row_width = max(len(h) for h in header) + 2
259
+ print("\n=== ACCURACY PER STEP ===")
260
+ print(" | ".join(h.center(row_width) for h in header))
261
+ print("-" * (row_width + 3) * len(header))
262
+ for i in range(steps):
263
+ cells = [str(i + 1).center(row_width)]
264
+ for m in summary_by_step:
265
+ cells.append(f"{summary_by_step[m][i]*100:5.1f}%".center(row_width))
266
+ print(" | ".join(cells))
267
+
268
+ print("\n=== AVG DISTANCE PER STEP (km) ===")
269
+ header = ["Step"] + list(avg_distances.keys())
270
+ row_w = max(len(h) for h in header) + 2
271
+ print(" | ".join(h.center(row_w) for h in header))
272
+ print("-" * (row_w + 3) * len(header))
273
+
274
+ for i in range(steps):
275
+ cells = [str(i+1).center(row_w)]
276
+ for m in avg_distances:
277
+ v = avg_distances[m][i]
278
+ cells.append(f"{v:6.1f}" if v is not None else " N/A ".center(row_w))
279
+ print(" | ".join(cells))
280
+
281
+ try:
282
+ for model, acc in summary_by_step.items():
283
+ plt.plot(range(1, steps + 1), acc, marker="o", label=model)
284
+ plt.xlabel("step")
285
+ plt.ylabel("accuracy")
286
+ plt.ylim(0, 1)
287
+ plt.legend()
288
+ plt.grid(True, alpha=0.3)
289
+ plt.title("Accuracy vs Step")
290
+ plt.savefig(f"{base_dir}/accuracy_step.png", dpi=120)
291
+ print("\n📈 saved plot to accuracy_step.png")
292
+
293
+ # Plot average distance per model
294
+ plt.figure()
295
+ for model, acc in avg_distances.items():
296
+ plt.plot(range(1, steps + 1), acc, marker="o", label=model)
297
+ plt.xlabel("step")
298
+ plt.ylabel("Avg Distance (km)")
299
+ plt.title("Average Distance per Model")
300
+ plt.xticks(rotation=45, ha="right")
301
+ plt.tight_layout()
302
+ plt.savefig(f"{base_dir}/avg_distance.png", dpi=120)
303
+ print("📈 saved plot to avg_distance.png")
304
+ except Exception as e:
305
+ print(f"⚠️ plot skipped: {e}")
306
+
307
+
308
  def main():
309
  parser = argparse.ArgumentParser(description="MapCrunch AI Agent & Benchmark")
310
  parser.add_argument(
311
  "--mode",
312
+ choices=["agent", "benchmark", "collect", "test"],
313
  default="agent",
314
  help="Operation mode.",
315
  )
 
348
  default=0.0,
349
  help="Temperature parameter for LLM sampling (0.0 = deterministic, higher = more random). Default: 0.0",
350
  )
351
+ parser.add_argument("--runs", type=int, default=3, help="[Test] Runs per model")
352
 
353
  args = parser.parse_args()
354
 
 
375
  dataset_name=args.dataset,
376
  temperature=args.temperature,
377
  )
378
+ elif args.mode == "test":
379
+ test_mode(
380
+ models=args.models or [args.model],
381
+ samples=args.samples,
382
+ runs=args.runs,
383
+ steps=args.steps,
384
+ dataset_name=args.dataset,
385
+ temperature=args.temperature,
386
+ headless=args.headless,
387
+ )
388
 
389
 
390
  if __name__ == "__main__":