Spaces:
Running
Running
benediktstroebl
commited on
Commit
·
07044da
1
Parent(s):
6a40c60
Added Raw prediction dashboard
Browse files
app.py
CHANGED
@@ -9,7 +9,10 @@ import json
|
|
9 |
from utils import parse_json_files, create_scatter_plot, create_flow_chart
|
10 |
from huggingface_hub import snapshot_download
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
|
15 |
def restart_space():
|
@@ -96,6 +99,111 @@ def update_step_details(task_id, step_index):
|
|
96 |
return details
|
97 |
|
98 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
99 |
with gr.Blocks() as demo:
|
100 |
gr.Markdown("""
|
101 |
# 🥇 Agent Leaderboard
|
@@ -136,7 +244,7 @@ with gr.Blocks() as demo:
|
|
136 |
"results_accuracy": 20,
|
137 |
"results_total_cost": 20},
|
138 |
)
|
139 |
-
gr.Markdown("##
|
140 |
with gr.Row():
|
141 |
with gr.Column(scale=1):
|
142 |
task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
|
@@ -153,6 +261,27 @@ with gr.Blocks() as demo:
|
|
153 |
steps_dropdown.change(update_step_details,
|
154 |
inputs=[task_dropdown, steps_dropdown],
|
155 |
outputs=[step_details])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
156 |
|
157 |
|
158 |
with gr.Tab("About"):
|
|
|
9 |
from utils import parse_json_files, create_scatter_plot, create_flow_chart
|
10 |
from huggingface_hub import snapshot_download
|
11 |
from apscheduler.schedulers.background import BackgroundScheduler
|
12 |
+
from datetime import datetime
|
13 |
+
import json
|
14 |
+
import re
|
15 |
+
import markdown
|
16 |
|
17 |
|
18 |
def restart_space():
|
|
|
99 |
return details
|
100 |
|
101 |
|
102 |
+
def format_call_info(call, call_index):
|
103 |
+
call_data = call['call_data']
|
104 |
+
analysis = call['analysis']
|
105 |
+
|
106 |
+
def format_json(obj):
|
107 |
+
# if isinstance(obj, dict) and 'choices' in obj:
|
108 |
+
# # Special handling for message content
|
109 |
+
# formatted_content = format_message_content(obj['choices'][0])
|
110 |
+
# return f'<div class="message-content">{formatted_content}</div>'
|
111 |
+
# else:
|
112 |
+
json_str = json.dumps(obj, indent=2)
|
113 |
+
json_str = json_str.replace(' ', ' ')
|
114 |
+
json_str = json_str.replace('\n', '<br>')
|
115 |
+
return f'<div class="json-wrapper">{json_str}</div>'
|
116 |
+
|
117 |
+
# Currently not used but we can enable it to format message content
|
118 |
+
def format_message_content(content):
|
119 |
+
# Convert Markdown to HTML
|
120 |
+
html_content = markdown.markdown(content)
|
121 |
+
|
122 |
+
# Replace ``` code blocks with styled pre blocks
|
123 |
+
html_content = re.sub(r'```python\n(.*?)```', lambda m: f'<pre class="code-block">{m.group(1)}</pre>', html_content, flags=re.DOTALL)
|
124 |
+
|
125 |
+
return html_content
|
126 |
+
|
127 |
+
formatted_info = f"""
|
128 |
+
<style>
|
129 |
+
.json-wrapper {{
|
130 |
+
white-space: pre-wrap;
|
131 |
+
word-wrap: break-word;
|
132 |
+
font-family: monospace;
|
133 |
+
max-height: 300px;
|
134 |
+
overflow-y: auto;
|
135 |
+
background-color: #f5f5f5;
|
136 |
+
padding: 10px;
|
137 |
+
border-radius: 5px;
|
138 |
+
}}
|
139 |
+
.message-content {{
|
140 |
+
white-space: normal;
|
141 |
+
word-wrap: break-word;
|
142 |
+
font-family: Arial, sans-serif;
|
143 |
+
max-height: 500px;
|
144 |
+
overflow-y: auto;
|
145 |
+
background-color: #ffffff;
|
146 |
+
padding: 10px;
|
147 |
+
border-radius: 5px;
|
148 |
+
border: 1px solid #e0e0e0;
|
149 |
+
}}
|
150 |
+
.code-block {{
|
151 |
+
background-color: #f0f0f0;
|
152 |
+
padding: 10px;
|
153 |
+
border-radius: 5px;
|
154 |
+
font-family: monospace;
|
155 |
+
white-space: pre-wrap;
|
156 |
+
word-wrap: break-word;
|
157 |
+
}}
|
158 |
+
</style>
|
159 |
+
|
160 |
+
<h2>Step {call_index+1}: {analysis.get('step_outline', 'N/A')}</h2>
|
161 |
+
|
162 |
+
<h3>Call Metadata</h3>
|
163 |
+
<ul>
|
164 |
+
<li><strong>Weave Task ID:</strong> {call_data['weave_task_id']}</li>
|
165 |
+
<li><strong>Trace ID:</strong> {call_data['trace_id']}</li>
|
166 |
+
<li><strong>Project ID:</strong> {call_data['project_id']}</li>
|
167 |
+
<li><strong>Created Timestamp:</strong> {datetime.fromtimestamp(call_data['created_timestamp'])}</li>
|
168 |
+
<li><strong>Model:</strong> {call_data['inputs']['model']}</li>
|
169 |
+
</ul>
|
170 |
+
|
171 |
+
<h3>Inputs</h3>
|
172 |
+
{format_json(call_data['inputs'])}
|
173 |
+
|
174 |
+
<h3>Outputs</h3>
|
175 |
+
{format_json(call_data['outputs'])}
|
176 |
+
|
177 |
+
<h3>Usage</h3>
|
178 |
+
{format_json(call_data['summary'])}
|
179 |
+
|
180 |
+
<h3>Analysis</h3>
|
181 |
+
<ul>
|
182 |
+
<li><strong>Description:</strong> {analysis['description']}</li>
|
183 |
+
<li><strong>Assessment:</strong> {analysis['assessment']}</li>
|
184 |
+
<li><strong>Success:</strong> {analysis['success']}</li>
|
185 |
+
<li><strong>Action Type:</strong> {analysis['action_type']}</li>
|
186 |
+
</ul>
|
187 |
+
"""
|
188 |
+
return formatted_info
|
189 |
+
|
190 |
+
|
191 |
+
def update_call_details(task_id, call_index):
|
192 |
+
if task_id not in analyzed_traces or call_index is None:
|
193 |
+
return "Please select a task and step to view details."
|
194 |
+
|
195 |
+
calls = analyzed_traces[task_id]['steps']
|
196 |
+
if isinstance(call_index, tuple):
|
197 |
+
call_index = call_index[1]
|
198 |
+
|
199 |
+
if call_index < 0 or call_index >= len(calls):
|
200 |
+
return f"Invalid call index: {call_index}"
|
201 |
+
|
202 |
+
call = calls[call_index]
|
203 |
+
return format_call_info(call, call_index)
|
204 |
+
|
205 |
+
|
206 |
+
|
207 |
with gr.Blocks() as demo:
|
208 |
gr.Markdown("""
|
209 |
# 🥇 Agent Leaderboard
|
|
|
244 |
"results_accuracy": 20,
|
245 |
"results_total_cost": 20},
|
246 |
)
|
247 |
+
gr.Markdown("## Agent Monitor")
|
248 |
with gr.Row():
|
249 |
with gr.Column(scale=1):
|
250 |
task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
|
|
|
261 |
steps_dropdown.change(update_step_details,
|
262 |
inputs=[task_dropdown, steps_dropdown],
|
263 |
outputs=[step_details])
|
264 |
+
|
265 |
+
gr.Markdown("## Raw Predictions")
|
266 |
+
with gr.Row():
|
267 |
+
with gr.Column(scale=1):
|
268 |
+
task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
|
269 |
+
with gr.Column(scale=1):
|
270 |
+
call_dropdown = gr.Dropdown(label="Select Call")
|
271 |
+
|
272 |
+
with gr.Row():
|
273 |
+
call_details = gr.HTML()
|
274 |
+
|
275 |
+
def update_call_dropdown(task_id):
|
276 |
+
calls = analyzed_traces.get(task_id, [])
|
277 |
+
return gr.Dropdown(choices=[(f"Call {i+1}", i) for i in range(len(calls))])
|
278 |
+
|
279 |
+
task_dropdown.change(update_call_dropdown,
|
280 |
+
inputs=[task_dropdown],
|
281 |
+
outputs=[call_dropdown])
|
282 |
+
call_dropdown.change(update_call_details,
|
283 |
+
inputs=[task_dropdown, call_dropdown],
|
284 |
+
outputs=[call_details])
|
285 |
|
286 |
|
287 |
with gr.Tab("About"):
|