LRU1 commited on
Commit
772ca75
Β·
1 Parent(s): 3ba0e70

add debug message to benchmark mode in hf UI

Browse files

- when the output of model can't be successfully parsed, the original model response will be displayed

benchmark mode add failure signal

Files changed (3) hide show
  1. app.py +3 -1
  2. benchmark.py +6 -1
  3. geo_bot.py +2 -0
app.py CHANGED
@@ -306,7 +306,9 @@ if start_button:
306
 
307
  st.write("**AI Reasoning:**")
308
  st.info(step_info.get("reasoning", "N/A"))
309
-
 
 
310
  st.write("**AI Action:**")
311
  if action == "GUESS":
312
  lat = step_info.get("action_details", {}).get("lat")
 
306
 
307
  st.write("**AI Reasoning:**")
308
  st.info(step_info.get("reasoning", "N/A"))
309
+ if step_info.get("debug_message") != "N/A":
310
+ st.write("**AI Debug Message:**")
311
+ st.code(step_info.get("debug_message"), language="json")
312
  st.write("**AI Action:**")
313
  if action == "GUESS":
314
  lat = step_info.get("action_details", {}).get("lat")
benchmark.py CHANGED
@@ -99,6 +99,9 @@ class MapGuesserBenchmark:
99
  print(f"πŸ“ Sample {i + 1}/{len(test_samples)}")
100
  try:
101
  result = self.run_single_test_with_bot(bot, sample)
 
 
 
102
  all_results.append(result)
103
 
104
  status = (
@@ -154,6 +157,8 @@ class MapGuesserBenchmark:
154
  }
155
 
156
  predicted_lat_lon = bot.analyze_image(screenshot)
 
 
157
  inference_time = time.time() - start_time
158
 
159
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
@@ -163,7 +168,7 @@ class MapGuesserBenchmark:
163
  print(f"πŸ” True coords: {true_coords}")
164
  print(f"πŸ” Predicted coords: {predicted_lat_lon}")
165
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
166
-
167
  is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
168
 
169
  return {
 
99
  print(f"πŸ“ Sample {i + 1}/{len(test_samples)}")
100
  try:
101
  result = self.run_single_test_with_bot(bot, sample)
102
+ if result is None:
103
+ print(f"❌ Sample_{i+1} test failed: No predicted coords")
104
+ continue
105
  all_results.append(result)
106
 
107
  status = (
 
157
  }
158
 
159
  predicted_lat_lon = bot.analyze_image(screenshot)
160
+ if predicted_lat_lon is None:
161
+ return None
162
  inference_time = time.time() - start_time
163
 
164
  true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
 
168
  print(f"πŸ” True coords: {true_coords}")
169
  print(f"πŸ” Predicted coords: {predicted_lat_lon}")
170
  distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
171
+
172
  is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
173
 
174
  return {
geo_bot.py CHANGED
@@ -250,6 +250,7 @@ class GeoBot:
250
  decision = {
251
  "reasoning": "Recovery due to parsing failure or model error.",
252
  "action_details": {"action": "PAN_RIGHT"},
 
253
  }
254
 
255
  return decision
@@ -347,6 +348,7 @@ class GeoBot:
347
  "reasoning": decision.get("reasoning", "N/A"),
348
  "action_details": decision.get("action_details", {"action": "N/A"}),
349
  "history": history.copy(), # History up to current step (excluding current)
 
350
  }
351
 
352
  action_details = decision.get("action_details", {})
 
250
  decision = {
251
  "reasoning": "Recovery due to parsing failure or model error.",
252
  "action_details": {"action": "PAN_RIGHT"},
253
+ "debug_message": f"{response.content.strip()}",
254
  }
255
 
256
  return decision
 
348
  "reasoning": decision.get("reasoning", "N/A"),
349
  "action_details": decision.get("action_details", {"action": "N/A"}),
350
  "history": history.copy(), # History up to current step (excluding current)
351
+ "debug_message": decision.get("debug_message", "N/A"),
352
  }
353
 
354
  action_details = decision.get("action_details", {})