benediktstroebl's picture
fixed tooltip error
2c6a894
import json
import plotly.express as px
from utils.pareto import Agent, compute_pareto_frontier
import plotly.graph_objects as go
import textwrap
import numpy as np
import pandas as pd
from scipy import stats
def create_leaderboard(df, ci_metrics = None):
# cast dtypes to string
df = df.astype(str)
# for each metric join metric and metric CI columns
if ci_metrics:
for metric in ci_metrics:
CI_metric = metric + ' CI'
# for rows in the df for which CI metric is not None, join the metric and CI columns by looping through the CI metrics columns
for i, row in df.iterrows():
if str(row[CI_metric]) != 'None':
df.at[i, metric] = str(round(float(row[metric]), 2)) + " (" + str(row[CI_metric]) + ")"
return df
def create_task_success_heatmap(df, benchmark_name):
# Calculate agent accuracy
agent_accuracy = df.groupby('Agent Name')['Success'].mean().sort_values(ascending=False)
# Calculate task success rate
task_success_rate = df.groupby('Task ID')['Success'].mean().sort_values(ascending=False)
# Pivot the dataframe to create a matrix of agents vs tasks
pivot_df = df.pivot(index='Agent Name', columns='Task ID', values='Success')
# Sort the pivot table
pivot_df = pivot_df.reindex(index=agent_accuracy.index, columns=task_success_rate.index)
# Calculate tasks solved across all agents
tasks_solved = (pivot_df.sum(axis=0) > 0).astype(int)
# Total number of tasks (columns)
total_tasks = len(pivot_df.columns)
if benchmark_name == "SWE-bench Verified (Mini)":
total_tasks = 50 # TODO - remove hardcoding
# Add the new row to the pivot table
tasks_solved_df = pd.DataFrame(tasks_solved).T
tasks_solved_df.index = [f'<b>Tasks Solved: {tasks_solved.sum()}/{total_tasks} (Any Agent)</b>']
# print number of tasks solved
pivot_df = pd.concat([pivot_df, tasks_solved_df])
num_agents = len(pivot_df.index)
row_height = 30 # Fixed height for each row in pixels
total_height = num_agents * row_height
# Create a custom colorscale
colorscale=[[0, 'white'], [1, '#3498db']]
# Create the heatmap
fig = go.Figure(data=go.Heatmap(
z=pivot_df.values,
y=pivot_df.index,
x=pivot_df.columns,
colorscale=colorscale,
showscale=False,
hovertemplate='<b>Agent:</b> %{y}<br>' +
'<b>Task:</b> %{x}<br>' +
'<b>Status:</b> %{z}<extra></extra>'
))
# Update the layout
fig.update_layout(
xaxis_title='Task ID',
height=total_height + 50, # Add extra space for the new row
yaxis=dict(
autorange='reversed',
showticklabels=True,
showline=True,
linecolor='black',
showgrid=False
),
xaxis=dict(
side='top',
showticklabels=False,
showline=True,
linecolor='black',
showgrid=False
),
plot_bgcolor='white',
paper_bgcolor='white',
hoverlabel=dict(
bgcolor="white",
font_size=12,
font_family="Arial"
),
modebar=dict(
activecolor='#1f77b4',
orientation='h',
bgcolor='rgba(255,255,255,0.8)',
color='#777',
add=['pan2d'],
remove=[
'zoom2d', 'zoomIn2d', 'zoomOut2d', 'resetScale2d',
'hoverClosestCartesian', 'hoverCompareCartesian',
'toggleSpikelines', 'lasso2d', 'lasso', 'select2d', 'select'
]
),
dragmode='pan'
)
return fig
def create_bar_chart(categories, values, x_label, y_label, title):
# Sort categories and values based on values in descending order
sorted_data = sorted(zip(categories, values), key=lambda x: x[1], reverse=True)
categories, values = zip(*sorted_data)
# get total number of tasks
total_tasks = sum(values)
text_labels = [f"({value/total_tasks:.1%} of failures)" for value in values]
fig = go.Figure(data=[go.Bar(
y=categories,
x=values,
orientation='h',
marker_color='#3498db', # Same color as the scatter plot
text=text_labels,
textposition='auto',
customdata=[f'{value} tasks ({value/total_tasks:.1%} of failures)' for value in values],
textfont=dict(color='black', size=14, family='Arial', weight=2),
hovertemplate='<b>%{y}</b><br>' +
'Affected Tasks: %{customdata}<extra></extra>'
)])
fig.update_layout(
height=600,
xaxis=dict(
showline=True,
linecolor='black',
showgrid=False
),
yaxis=dict(
showline=True,
linecolor='black',
showgrid=False,
autorange="reversed" # This will put the category with the highest value at the top
),
plot_bgcolor='white',
paper_bgcolor='white',
bargap=0.2,
bargroupgap=0.1,
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
modebar=dict(
activecolor='#1f77b4',
orientation='h',
bgcolor='rgba(255,255,255,0.8)',
color='#777',
add=['pan2d'],
remove=[
'zoom2d', 'zoomIn2d', 'zoomOut2d', 'resetScale2d',
'hoverClosestCartesian', 'hoverCompareCartesian',
'toggleSpikelines', 'lasso2d', 'lasso', 'select2d', 'select'
]
),
dragmode='pan'
)
return fig
def create_scatter_plot(df, x: str, y: str, x_label: str = None, y_label: str = None, hover_data: list = None):
# agents = [Agent(row['Total Cost'], row['Accuracy']) for i, row in df.iterrows()]
# instead of creating one Agent object for each row, we can create one Agent object for each unique agent and use the mean of the cost and accuracy values
unique_agents = df['Agent Name'].unique()
agents = [Agent(df[df['Agent Name'] == agent]['Total Cost'].mean(), df[df['Agent Name'] == agent]['Accuracy'].mean()) for agent in unique_agents]
pareto_frontier = compute_pareto_frontier(agents)
fig = go.Figure()
# Sort the Pareto frontier points by x-coordinate
pareto_points = sorted([(agent.total_cost, agent.accuracy) for agent in pareto_frontier], key=lambda x: x[0])
# Add the Pareto frontier line
fig.add_trace(go.Scatter(
x=[point[0] for point in pareto_points],
y=[point[1] for point in pareto_points],
mode='lines',
name='Pareto Frontier',
hoverinfo=None,
line=dict(color='black', width=1, dash='dash')
))
# Plot scatter points and error bars for each agent
unique_agents = df[hover_data[0]].unique()
# Create lists to store all point coordinates for label placement
all_x = []
all_y = []
all_labels = []
for agent in unique_agents:
agent_data = df[df[hover_data[0]] == agent]
# remove url from tooltip name
def clean_agent_name(name):
if '[' in str(name):
return name[1:].rsplit(']')[0]
return name
# Apply the function to each element individually
agent_data.loc[:, 'Agent Name'] = agent_data['Agent Name'].apply(clean_agent_name)
x_value = [np.mean(agent_data[x].values)]
y_value = [np.mean(agent_data[y].values)]
# Store coordinates and label for later use
all_x.extend(x_value)
all_y.extend(y_value)
all_labels.extend([agent_data['Agent Name'].iloc[0]])
if len(agent_data) > 1:
# Add error bars for x (cost minmax)
fig.add_trace(go.Scatter(
x=x_value,
y=y_value,
error_x=dict(
type='data',
symmetric=False,
array=[np.max(agent_data[x]) - x_value],
arrayminus=[x_value - np.min(agent_data[x])],
color='#fec44f',
),
mode='markers',
marker=dict(color='rgba(0,0,0,0)', opacity=0),
showlegend=False,
hoverinfo=None
))
# Add error bars for y (accuracy minmax)
fig.add_trace(go.Scatter(
x=x_value,
y=y_value,
error_y=dict(
type='data',
symmetric=False,
array=[np.max(agent_data[y]) - y_value],
arrayminus=[y_value - np.min(agent_data[y])],
color='#bdbdbd',
),
mode='markers',
marker=dict(color='rgba(0,0,0,0)', opacity=0),
showlegend=False,
hoverinfo=None
))
# Add scatter points for this agent
fig.add_trace(go.Scatter(
x=x_value,
y=y_value,
mode='markers', # Remove text mode, only use markers
marker=dict(size=10, color='#3498db'),
customdata=agent_data[hover_data],
showlegend=False,
hovertemplate="<br>".join([
"<b>Agent</b>: %{customdata[0]}",
"<b>Total Cost</b>: $%{x:.1f}",
"<b>Accuracy</b>: %{y:.1%}<extra></extra>",
]),
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
))
# Add legend entries for error bars
fig.add_trace(go.Scatter(
x=[None], y=[None], mode='markers',
marker=dict(color='#fec44f', size=10),
name='Cost CI (Min-Max)'
))
fig.add_trace(go.Scatter(
x=[None], y=[None], mode='markers',
marker=dict(color='#bdbdbd', size=10),
name='Accuracy CI (Min-Max)'
))
# Update layout to handle overlapping labels
fig.update_layout(
height=600,
xaxis_title=x_label,
yaxis_title=y_label,
xaxis=dict(
showline=True,
linecolor='black',
showgrid=False
),
yaxis=dict(
showline=True,
showgrid=False,
linecolor='black'
),
plot_bgcolor='white',
legend=dict(
yanchor="bottom",
y=0.01,
xanchor="right",
x=0.98,
bgcolor="rgba(255, 255, 255, 0.5)"
),
modebar=dict(
activecolor='#1f77b4',
orientation='h',
bgcolor='rgba(255,255,255,0.8)',
color='#777',
add=['pan2d'],
remove=[
'zoom2d', 'zoomIn2d', 'zoomOut2d', 'resetScale2d',
'hoverClosestCartesian', 'hoverCompareCartesian',
'toggleSpikelines', 'lasso2d', 'lasso', 'select2d', 'select'
]
),
dragmode='pan',
# Add configuration for handling overlapping labels
showlegend=True,
annotations=[],
)
# Add non-overlapping labels using annotations
for i in range(len(all_x)):
# Default position: lower right
ax = 20
ay = 20
# Adjust position if near axes
x_range = max(all_x) - min(all_x)
y_range = max(all_y) - min(all_y)
# If point is near minimum x-axis (left side)
if all_x[i] < min(all_x) + 0.05 * x_range:
ax = 120 # Large shift for points very close to left axis
# If point is near maximum x-axis (right side)
if all_x[i] > max(all_x) - 0.1 * x_range:
ax = -20 # Move label to the left
# If point is near minimum y-axis (bottom)
if all_y[i] < min(all_y) + 0.05 * y_range:
ay = -30 # Move label up
# If point is near maximum y-axis (top)
if all_y[i] > max(all_y) - 0.1 * y_range:
ay = -20 # Move label down
# Check for overlap with previous labels
overlap = False
for j in range(i):
# Simple distance check between points
dx = abs(all_x[i] - all_x[j])
dy = abs(all_y[i] - all_y[j])
# Reduced overlap threshold from 0.2 to 0.1
if dx < 0.12 * x_range and dy < 0.12 * y_range:
# If points are close, try different positions
if not overlap:
ax += 20 # Smaller increment (from 40 to 20)
ay += 20 # Smaller increment (from 40 to 20)
overlap = True
if not overlap or i == 0: # Always show first label
fig.add_annotation(
x=all_x[i],
y=all_y[i],
text=all_labels[i],
showarrow=True,
arrowhead=0,
arrowsize=1,
arrowwidth=1,
arrowcolor="#CCCCCC",
ax=ax,
ay=ay,
font=dict(
size=10
),
)
fig.update_yaxes(rangemode="tozero")
fig.update_xaxes(rangemode="tozero")
return fig
import plotly.graph_objects as go
import textwrap
def create_flow_chart(steps):
node_x = []
node_y = []
edge_x = []
edge_y = []
node_text = []
hover_text = []
node_colors = []
node_shapes = []
# Define color and shape mappings
color_map = {True: 'green', False: 'red'} # True for success, False for challenges
shape_map = {
'plan': 'octagon',
'tool': 'square',
'retrieve': 'diamond',
'other': 'circle'
}
for i, step in enumerate(steps):
node_x.append(i)
node_y.append(0)
# Extract Description, Assessment, and new attributes
analysis = step['analysis']
if isinstance(analysis, str):
try:
analysis = json.loads(analysis)
except json.JSONDecodeError:
analysis = {}
description = analysis.get('description', 'No description available.')
assessment = analysis.get('assessment', 'No assessment available.')
success = analysis.get('success', True) # Assuming True if not specified
# action_type = analysis.get('action_type', 'other') # Default to 'other' if not specified
step_headline = analysis.get('headline', '')
# Set node color and shape based on attributes
node_colors.append(color_map[success])
# node_shapes.append(shape_map.get(action_type, 'circle'))
# Wrap text to improve readability
wrapped_description = '<br>'.join(textwrap.wrap(description, width=90, max_lines=20))
wrapped_assessment = '<br>'.join(textwrap.wrap(assessment, width=90, max_lines=10))
wrapped_outline = textwrap.shorten(step_headline, width=50, placeholder='')
wrapped_outline = '' if wrapped_outline == '' else f": {wrapped_outline}"
node_text_outline = '' if wrapped_outline == '' else f":<br>{'<br>'.join(textwrap.wrap(step_headline, width=30, placeholder=''))}"
node_text.append(f"Step {i+1}{node_text_outline}")
# Create formatted hover text without indentation
hover_info = f"<b>Step {i+1}{wrapped_outline}</b><br><br>" \
f"<b>Description:</b><br>" \
f"{wrapped_description}<br><br>" \
# f"<b>Assessment:</b><br>" \
# f"{wrapped_assessment}<br><br>" \
# f"<b>Successful:</b> {'Yes' if success else 'No'}<br>" \
# f"<b>Action Type:</b> {action_type.capitalize()}"
hover_text.append(hover_info)
if i > 0:
edge_x.extend([i-1, i, None])
edge_y.extend([0, 0, None])
node_trace = go.Scatter(
x=node_x, y=node_y,
mode='markers+text',
text=node_text,
textposition="top center",
showlegend=False,
hovertext=hover_text,
hoverinfo='text',
hoverlabel=dict(bgcolor="white", font_size=12, font_family="Arial"),
marker=dict(
# color=node_colors,
color='#3498db',
size=30,
line_width=2,
# symbol=node_shapes
))
edge_trace = go.Scatter(
x=edge_x, y=edge_y,
line=dict(width=2, color='#888'),
hoverinfo='none',
showlegend=False,
mode='lines')
# Create legend traces
legend_traces = []
# # Color legend
# for success, color in color_map.items():
# legend_traces.append(go.Scatter(
# x=[None], y=[None],
# mode='markers',
# marker=dict(size=10, color=color),
# showlegend=True,
# name=f"{'Success' if success else 'Issue'}"
# ))
# # Shape legend
# for action, shape in shape_map.items():
# legend_traces.append(go.Scatter(
# x=[None], y=[None],
# mode='markers',
# marker=dict(size=10, symbol=shape, color='gray'),
# showlegend=True,
# name=f"{action.capitalize()}"
# ))
# Combine all traces
all_traces = [edge_trace, node_trace] + legend_traces
layout = go.Layout(
showlegend=True,
hovermode='closest',
margin=dict(b=20,l=5,r=5,t=40),
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
plot_bgcolor='white',
paper_bgcolor='white',
modebar=dict(
activecolor='#1f77b4', # Color of active tool
orientation='h', # Vertical orientation
bgcolor='rgba(255,255,255,0.8)', # Slightly transparent white background
color='#777', # Color of inactive tools
),
legend=dict(
orientation="h",
yanchor="bottom",
y=0.02,
xanchor="right",
x=1,
bgcolor='rgba(255,255,255,0.8)',
bordercolor='rgba(0,0,0,0.1)',
borderwidth=1
),
)
fig = go.Figure(data=all_traces, layout=layout)
fig.update_layout(legend=dict(
orientation="h",
yanchor="bottom",
y=1.02,
xanchor="right",
x=1,
bgcolor='rgba(255,255,255,0.8)', # Set legend background to slightly transparent white
bordercolor='rgba(0,0,0,0.1)', # Add a light border to the legend
borderwidth=1
),
dragmode='pan'
)
config = {
'add': ['pan2d'],
'remove': [
'zoom2d',
'zoomIn2d',
'zoomOut2d',
'resetScale2d',
'hoverClosestCartesian',
'hoverCompareCartesian',
'toggleSpikelines',
'lasso2d',
'lasso',
'select2d',
'select',
]
}
# Apply the config to the figure
fig.update_layout(modebar=config)
return fig