benediktstroebl commited on
Commit
07044da
·
1 Parent(s): 6a40c60

Added Raw prediction dashboard

Browse files
Files changed (1) hide show
  1. app.py +131 -2
app.py CHANGED
@@ -9,7 +9,10 @@ import json
9
  from utils import parse_json_files, create_scatter_plot, create_flow_chart
10
  from huggingface_hub import snapshot_download
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
-
 
 
 
13
 
14
 
15
  def restart_space():
@@ -96,6 +99,111 @@ def update_step_details(task_id, step_index):
96
  return details
97
 
98
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  with gr.Blocks() as demo:
100
  gr.Markdown("""
101
  # 🥇 Agent Leaderboard
@@ -136,7 +244,7 @@ with gr.Blocks() as demo:
136
  "results_accuracy": 20,
137
  "results_total_cost": 20},
138
  )
139
- gr.Markdown("## USACO Task Trace Explorer")
140
  with gr.Row():
141
  with gr.Column(scale=1):
142
  task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
@@ -153,6 +261,27 @@ with gr.Blocks() as demo:
153
  steps_dropdown.change(update_step_details,
154
  inputs=[task_dropdown, steps_dropdown],
155
  outputs=[step_details])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
 
157
 
158
  with gr.Tab("About"):
 
9
  from utils import parse_json_files, create_scatter_plot, create_flow_chart
10
  from huggingface_hub import snapshot_download
11
  from apscheduler.schedulers.background import BackgroundScheduler
12
+ from datetime import datetime
13
+ import json
14
+ import re
15
+ import markdown
16
 
17
 
18
  def restart_space():
 
99
  return details
100
 
101
 
102
+ def format_call_info(call, call_index):
103
+ call_data = call['call_data']
104
+ analysis = call['analysis']
105
+
106
+ def format_json(obj):
107
+ # if isinstance(obj, dict) and 'choices' in obj:
108
+ # # Special handling for message content
109
+ # formatted_content = format_message_content(obj['choices'][0])
110
+ # return f'<div class="message-content">{formatted_content}</div>'
111
+ # else:
112
+ json_str = json.dumps(obj, indent=2)
113
+ json_str = json_str.replace(' ', '&nbsp;')
114
+ json_str = json_str.replace('\n', '<br>')
115
+ return f'<div class="json-wrapper">{json_str}</div>'
116
+
117
+ # Currently not used but we can enable it to format message content
118
+ def format_message_content(content):
119
+ # Convert Markdown to HTML
120
+ html_content = markdown.markdown(content)
121
+
122
+ # Replace ``` code blocks with styled pre blocks
123
+ html_content = re.sub(r'```python\n(.*?)```', lambda m: f'<pre class="code-block">{m.group(1)}</pre>', html_content, flags=re.DOTALL)
124
+
125
+ return html_content
126
+
127
+ formatted_info = f"""
128
+ <style>
129
+ .json-wrapper {{
130
+ white-space: pre-wrap;
131
+ word-wrap: break-word;
132
+ font-family: monospace;
133
+ max-height: 300px;
134
+ overflow-y: auto;
135
+ background-color: #f5f5f5;
136
+ padding: 10px;
137
+ border-radius: 5px;
138
+ }}
139
+ .message-content {{
140
+ white-space: normal;
141
+ word-wrap: break-word;
142
+ font-family: Arial, sans-serif;
143
+ max-height: 500px;
144
+ overflow-y: auto;
145
+ background-color: #ffffff;
146
+ padding: 10px;
147
+ border-radius: 5px;
148
+ border: 1px solid #e0e0e0;
149
+ }}
150
+ .code-block {{
151
+ background-color: #f0f0f0;
152
+ padding: 10px;
153
+ border-radius: 5px;
154
+ font-family: monospace;
155
+ white-space: pre-wrap;
156
+ word-wrap: break-word;
157
+ }}
158
+ </style>
159
+
160
+ <h2>Step {call_index+1}: {analysis.get('step_outline', 'N/A')}</h2>
161
+
162
+ <h3>Call Metadata</h3>
163
+ <ul>
164
+ <li><strong>Weave Task ID:</strong> {call_data['weave_task_id']}</li>
165
+ <li><strong>Trace ID:</strong> {call_data['trace_id']}</li>
166
+ <li><strong>Project ID:</strong> {call_data['project_id']}</li>
167
+ <li><strong>Created Timestamp:</strong> {datetime.fromtimestamp(call_data['created_timestamp'])}</li>
168
+ <li><strong>Model:</strong> {call_data['inputs']['model']}</li>
169
+ </ul>
170
+
171
+ <h3>Inputs</h3>
172
+ {format_json(call_data['inputs'])}
173
+
174
+ <h3>Outputs</h3>
175
+ {format_json(call_data['outputs'])}
176
+
177
+ <h3>Usage</h3>
178
+ {format_json(call_data['summary'])}
179
+
180
+ <h3>Analysis</h3>
181
+ <ul>
182
+ <li><strong>Description:</strong> {analysis['description']}</li>
183
+ <li><strong>Assessment:</strong> {analysis['assessment']}</li>
184
+ <li><strong>Success:</strong> {analysis['success']}</li>
185
+ <li><strong>Action Type:</strong> {analysis['action_type']}</li>
186
+ </ul>
187
+ """
188
+ return formatted_info
189
+
190
+
191
+ def update_call_details(task_id, call_index):
192
+ if task_id not in analyzed_traces or call_index is None:
193
+ return "Please select a task and step to view details."
194
+
195
+ calls = analyzed_traces[task_id]['steps']
196
+ if isinstance(call_index, tuple):
197
+ call_index = call_index[1]
198
+
199
+ if call_index < 0 or call_index >= len(calls):
200
+ return f"Invalid call index: {call_index}"
201
+
202
+ call = calls[call_index]
203
+ return format_call_info(call, call_index)
204
+
205
+
206
+
207
  with gr.Blocks() as demo:
208
  gr.Markdown("""
209
  # 🥇 Agent Leaderboard
 
244
  "results_accuracy": 20,
245
  "results_total_cost": 20},
246
  )
247
+ gr.Markdown("## Agent Monitor")
248
  with gr.Row():
249
  with gr.Column(scale=1):
250
  task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
 
261
  steps_dropdown.change(update_step_details,
262
  inputs=[task_dropdown, steps_dropdown],
263
  outputs=[step_details])
264
+
265
+ gr.Markdown("## Raw Predictions")
266
+ with gr.Row():
267
+ with gr.Column(scale=1):
268
+ task_dropdown = gr.Dropdown(choices=list(analyzed_traces.keys()), label="Select USACO Task")
269
+ with gr.Column(scale=1):
270
+ call_dropdown = gr.Dropdown(label="Select Call")
271
+
272
+ with gr.Row():
273
+ call_details = gr.HTML()
274
+
275
+ def update_call_dropdown(task_id):
276
+ calls = analyzed_traces.get(task_id, [])
277
+ return gr.Dropdown(choices=[(f"Call {i+1}", i) for i in range(len(calls))])
278
+
279
+ task_dropdown.change(update_call_dropdown,
280
+ inputs=[task_dropdown],
281
+ outputs=[call_dropdown])
282
+ call_dropdown.change(update_call_details,
283
+ inputs=[task_dropdown, call_dropdown],
284
+ outputs=[call_details])
285
 
286
 
287
  with gr.Tab("About"):