Spaces:

Omniscient001
/

Omniscient

Running

LRU1 commited on Aug 3

Commit

772ca75

1 Parent(s): 3ba0e70

add debug message to benchmark mode in hf UI

- when the output of model can't be successfully parsed, the original model response will be displayed

benchmark mode add failure signal

Files changed (3) hide show

app.py CHANGED Viewed

@@ -306,7 +306,9 @@ if start_button:
                             st.write("**AI Reasoning:**")
                             st.info(step_info.get("reasoning", "N/A"))
                             st.write("**AI Action:**")
                             if action == "GUESS":
                                 lat = step_info.get("action_details", {}).get("lat")

                             st.write("**AI Reasoning:**")
                             st.info(step_info.get("reasoning", "N/A"))
+                            if step_info.get("debug_message") != "N/A":
+                                st.write("**AI Debug Message:**")
+                                st.code(step_info.get("debug_message"), language="json")
                             st.write("**AI Action:**")
                             if action == "GUESS":
                                 lat = step_info.get("action_details", {}).get("lat")

benchmark.py CHANGED Viewed

@@ -99,6 +99,9 @@ class MapGuesserBenchmark:
                         print(f"📍 Sample {i + 1}/{len(test_samples)}")
                         try:
                             result = self.run_single_test_with_bot(bot, sample)
                             all_results.append(result)
                             status = (
@@ -154,6 +157,8 @@ class MapGuesserBenchmark:
             }
         predicted_lat_lon = bot.analyze_image(screenshot)
         inference_time = time.time() - start_time
         true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
@@ -163,7 +168,7 @@ class MapGuesserBenchmark:
         print(f"🔍 True coords: {true_coords}")
         print(f"🔍 Predicted coords: {predicted_lat_lon}")
         distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
         is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
         return {

                         print(f"📍 Sample {i + 1}/{len(test_samples)}")
                         try:
                             result = self.run_single_test_with_bot(bot, sample)
+                            if result is None:
+                                print(f"❌ Sample_{i+1} test failed: No predicted coords")
+                                continue
                             all_results.append(result)
                             status = (
             }
         predicted_lat_lon = bot.analyze_image(screenshot)
+        if predicted_lat_lon is None:
+            return None
         inference_time = time.time() - start_time
         true_coords = {"lat": location_data.get("lat"), "lng": location_data.get("lng")}
         print(f"🔍 True coords: {true_coords}")
         print(f"🔍 Predicted coords: {predicted_lat_lon}")
         distance_km = self.calculate_distance(true_coords, predicted_lat_lon)
         is_success = distance_km is not None and distance_km <= SUCCESS_THRESHOLD_KM
         return {

geo_bot.py CHANGED Viewed

@@ -250,6 +250,7 @@ class GeoBot:
             decision = {
                 "reasoning": "Recovery due to parsing failure or model error.",
                 "action_details": {"action": "PAN_RIGHT"},
             }
         return decision
@@ -347,6 +348,7 @@ class GeoBot:
                 "reasoning": decision.get("reasoning", "N/A"),
                 "action_details": decision.get("action_details", {"action": "N/A"}),
                 "history": history.copy(),  # History up to current step (excluding current)
             }
             action_details = decision.get("action_details", {})

             decision = {
                 "reasoning": "Recovery due to parsing failure or model error.",
                 "action_details": {"action": "PAN_RIGHT"},
+                "debug_message": f"{response.content.strip()}",
             }
         return decision
                 "reasoning": decision.get("reasoning", "N/A"),
                 "action_details": decision.get("action_details", {"action": "N/A"}),
                 "history": history.copy(),  # History up to current step (excluding current)
+                "debug_message": decision.get("debug_message", "N/A"),
             }
             action_details = decision.get("action_details", {})