Spaces:
Sleeping
Sleeping
File size: 6,238 Bytes
0869b01 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import gradio as gr
import pandas as pd
tools = pd.read_csv("./data/tools.csv")
demo = gr.Blocks()
INC_TOOLS = [
'prediction-online',
'prediction-offline',
'claude-prediction-online',
'claude-prediction-offline',
'prediction-offline-sme',
'prediction-online-sme',
'prediction-request-rag',
'prediction-request-reasoning',
'prediction-url-cot-claude',
'prediction-request-rag-claude',
'prediction-request-reasoning-claude'
]
def set_error(row):
if row.error not in [True, False]:
if not row.prompt_response:
return True
return False
return row.error
def get_error_data():
tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
tools_inc['error'] = tools_inc.apply(set_error, axis=1)
error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
error['total_requests'] = error[False] + error[True]
return error
def get_error_data_all(error):
error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
error_total.columns = error_total.columns.astype(str)
error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
return error_total
error = get_error_data()
error_all = get_error_data_all(error)
with demo:
gr.HTML("<h1>Olas Predict Actual Performance</h1>")
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
with gr.Tabs():
with gr.TabItem("🔥 Error Dashboard"):
with gr.Row():
gr.Markdown("# Plot showing overall error")
with gr.Row():
# plot
with gr.Column():
gr.BarPlot(
value=error_all,
x="request_month_year_week",
y="error_perc",
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
height=800,
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", "error_perc"]
)
with gr.Row():
gr.Markdown("# Plot showing error by tool")
with gr.Row():
sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0])
with gr.Row():
plot_tool_error = gr.BarPlot(
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["request_month_year_week", "error_perc"],
width=800
)
with gr.Row():
gr.Markdown("# Plot showing error by week")
with gr.Row():
choices = error['request_month_year_week'].unique().tolist()
# sort the choices by the latest week to be on the top
choices = sorted(choices)
sel_week = gr.Dropdown(
label="Select a week",
choices=choices,
value=choices[-1]
)
with gr.Row():
plot_week_error = gr.BarPlot(
title="Error Percentage",
x_title="Tool",
y_title="Error Percentage",
show_label=True,
interactive=True,
show_actions_button=True,
tooltip=["tool", "error_perc"],
width=800
)
def update_tool_plot(selected_tool):
filtered_data = error[error['tool'] == selected_tool]
# convert column name to string
filtered_data.columns = filtered_data.columns.astype(str)
# convert error_perc to 4 decimal place
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
update = gr.LinePlot(
title="Error Percentage",
x_title="Week",
y_title="Error Percentage",
x="request_month_year_week",
y="error_perc",
value=filtered_data
)
return update
def update_week_plot(selected_week):
filtered_data = error[error['request_month_year_week'] == selected_week]
# convert column name to string
filtered_data.columns = filtered_data.columns.astype(str)
# convert error_perc to 4 decimal place
filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
update = gr.BarPlot(
title="Error Percentage",
x_title="Tool",
y_title="Error Percentage",
x="tool",
y="error_perc",
value=filtered_data
)
return update
sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error)
with gr.Row():
sel_tool
with gr.Row():
plot_tool_error
with gr.Row():
sel_week
with gr.Row():
plot_week_error
with gr.TabItem("ℹ️ About"):
with gr.Accordion("About the Benchmark"):
gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")
demo.queue(default_concurrency_limit=40).launch()
|