File size: 6,238 Bytes
0869b01
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
import gradio as gr
import pandas as pd

tools = pd.read_csv("./data/tools.csv")

demo = gr.Blocks()

INC_TOOLS = [
    'prediction-online', 
    'prediction-offline', 
    'claude-prediction-online', 
    'claude-prediction-offline', 
    'prediction-offline-sme',
    'prediction-online-sme',
    'prediction-request-rag',
    'prediction-request-reasoning',
    'prediction-url-cot-claude', 
    'prediction-request-rag-claude',
    'prediction-request-reasoning-claude'
]

def set_error(row):
    if row.error not in [True, False]:
        if not row.prompt_response:
            return True
        return False
    return row.error

def get_error_data():
    tools_inc = tools[tools['tool'].isin(INC_TOOLS)]
    tools_inc['error'] = tools_inc.apply(set_error, axis=1)
    error = tools_inc.groupby(['tool', 'request_month_year_week', 'error']).size().unstack().fillna(0).reset_index()
    error['error_perc'] = (error[True] / (error[False] + error[True])) * 100
    error['total_requests'] = error[False] + error[True]
    return error

def get_error_data_all(error):
    error_total = error.groupby('request_month_year_week').agg({'total_requests': 'sum', False: 'sum', True: 'sum'}).reset_index()
    error_total['error_perc'] = (error_total[True] / error_total['total_requests']) * 100
    error_total.columns = error_total.columns.astype(str)
    error_total['error_perc'] = error_total['error_perc'].apply(lambda x: round(x, 4))
    return error_total

error = get_error_data()
error_all = get_error_data_all(error)

with demo:
    gr.HTML("<h1>Olas Predict Actual Performance</h1>")
    gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

    with gr.Tabs():
        with gr.TabItem("🔥 Error Dashboard"):
            with gr.Row():
                gr.Markdown("# Plot showing overall error")
            with gr.Row():
                # plot 
                with gr.Column():
                    gr.BarPlot(
                        value=error_all,
                        x="request_month_year_week",
                        y="error_perc",
                        title="Error Percentage",
                        x_title="Week",
                        y_title="Error Percentage",
                        height=800,
                        show_label=True,
                        interactive=True,
                        show_actions_button=True,
                        tooltip=["request_month_year_week", "error_perc"]
                    )
            with gr.Row():
                gr.Markdown("# Plot showing error by tool")

            with gr.Row():
                sel_tool = gr.Dropdown(label="Select a tool", choices=INC_TOOLS, value=INC_TOOLS[0])

            with gr.Row():
                plot_tool_error = gr.BarPlot(
                    title="Error Percentage", 
                    x_title="Week", 
                    y_title="Error Percentage", 
                    show_label=True,
                    interactive=True,
                    show_actions_button=True,
                    tooltip=["request_month_year_week", "error_perc"],
                    width=800
                )

            with gr.Row():
                gr.Markdown("# Plot showing error by week")

            with gr.Row():
                choices = error['request_month_year_week'].unique().tolist()
                # sort the choices by the latest week to be on the top
                choices = sorted(choices)
                sel_week = gr.Dropdown(
                    label="Select a week", 
                    choices=choices, 
                    value=choices[-1]
                    )

            with gr.Row():
                plot_week_error = gr.BarPlot(
                    title="Error Percentage", 
                    x_title="Tool", 
                    y_title="Error Percentage", 
                    show_label=True,
                    interactive=True,
                    show_actions_button=True,
                    tooltip=["tool", "error_perc"],
                    width=800
                )


            def update_tool_plot(selected_tool):
                filtered_data = error[error['tool'] == selected_tool]
                # convert column name to string
                filtered_data.columns = filtered_data.columns.astype(str)
                # convert error_perc to 4 decimal place
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                update = gr.LinePlot(
                    title="Error Percentage", 
                    x_title="Week", 
                    y_title="Error Percentage", 
                    x="request_month_year_week", 
                    y="error_perc",
                    value=filtered_data
                )
                return update

            def update_week_plot(selected_week):
                filtered_data = error[error['request_month_year_week'] == selected_week]
                # convert column name to string
                filtered_data.columns = filtered_data.columns.astype(str)
                # convert error_perc to 4 decimal place
                filtered_data['error_perc'] = filtered_data['error_perc'].apply(lambda x: round(x, 4))
                update = gr.BarPlot(
                    title="Error Percentage", 
                    x_title="Tool", 
                    y_title="Error Percentage", 
                    x="tool", 
                    y="error_perc",
                    value=filtered_data
                )
                return update

            sel_tool.change(update_tool_plot, inputs=sel_tool, outputs=plot_tool_error)
            sel_week.change(update_week_plot, inputs=sel_week, outputs=plot_week_error)

            with gr.Row():
                sel_tool
            with gr.Row():
                plot_tool_error
            with gr.Row():
                sel_week
            with gr.Row():
                plot_week_error

        with gr.TabItem("ℹ️ About"):
            with gr.Accordion("About the Benchmark"):
                gr.Markdown("This app shows the actual performance of Olas Predict tools on the live market.")

demo.queue(default_concurrency_limit=40).launch()