File size: 10,896 Bytes
9568e9c
 
 
 
 
 
1fd3f24
9568e9c
 
 
1fd3f24
 
9568e9c
 
 
 
 
1fd3f24
 
9568e9c
 
1fd3f24
 
 
9568e9c
1fd3f24
9568e9c
 
 
1fd3f24
9568e9c
 
 
 
 
 
1fd3f24
 
 
 
 
 
9568e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fd3f24
 
9568e9c
1fd3f24
 
 
 
 
9568e9c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fd3f24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9568e9c
 
 
 
1fd3f24
9568e9c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import gradio as gr
from datetime import datetime, date, timedelta
from datasets import Dataset
from huggingface_hub import HfApi

from predibench.pnl import compute_pnls

# Configuration
AGENT_CHOICES_REPO = "m-ric/predibench-agent-choices"

def load_agent_choices():
    """Load agent choices from HuggingFace dataset"""
    dataset = Dataset.from_parquet(f"hf://datasets/{AGENT_CHOICES_REPO}")
    return dataset.to_pandas()

def calculate_pnl_and_performance(df: pd.DataFrame):
    """Calculate real PnL and performance metrics for each agent using historical market data"""
    investment_dates = sorted(df['date'].unique())
    final_pnls, cumulative_pnls, figures = compute_pnls(investment_dates, df)
    
    # Convert to the format expected by frontend
    agents_performance = {}
    for agent in df['agent_name'].unique():
        agent_data = df[df['agent_name'] == agent].copy()
        cumulative_pnl = cumulative_pnls[agent]
        
        agents_performance[agent] = {
            'total_decisions': len(agent_data),
            'long_positions': len(agent_data[agent_data['choice'] == 1]),
            'short_positions': len(agent_data[agent_data['choice'] == -1]),
            'no_positions': len(agent_data[agent_data['choice'] == 0]),
            'cumulative_pnl': final_pnls[agent],
            'sharpe_ratio': 0.0,  # Would need more calculation for proper Sharpe
            'win_rate': 0.0,      # Would need daily PnL for win rate
            'daily_pnl': cumulative_pnl.tolist(),
            'dates': cumulative_pnl.index.tolist(),
            'figure': figures[agent]
        }
    
    return agents_performance

def create_leaderboard(performance_data):
    """Create leaderboard table"""
    leaderboard_data = []
    
    for agent, metrics in performance_data.items():
        leaderboard_data.append({
            'Agent': agent.replace('smolagent_', '').replace('--', '/'),
            'Total Decisions': metrics['total_decisions'],
            'Long Positions': metrics['long_positions'],
            'Short Positions': metrics['short_positions'],
            'No Position': metrics['no_positions'],
            'Cumulative PnL': f"{metrics['cumulative_pnl']:.3f}",
            'Sharpe Ratio': f"{metrics['sharpe_ratio']:.3f}",
            'Win Rate': f"{metrics['win_rate']:.1%}",
        })
    
    # Sort by cumulative PnL
    leaderboard_df = pd.DataFrame(leaderboard_data)
    leaderboard_df['PnL_numeric'] = leaderboard_df['Cumulative PnL'].astype(float)
    leaderboard_df = leaderboard_df.sort_values('PnL_numeric', ascending=False)
    leaderboard_df = leaderboard_df.drop('PnL_numeric', axis=1)
    
    return leaderboard_df

def create_pnl_plot(performance_data, selected_agent=None):
    """Create interactive PnL plot"""
    fig = go.Figure()
    
    agents_to_plot = [selected_agent] if selected_agent and selected_agent in performance_data else performance_data.keys()
    
    colors = px.colors.qualitative.Set1
    
    for i, agent in enumerate(agents_to_plot):
        if agent not in performance_data:
            continue
            
        metrics = performance_data[agent]
        daily_pnl = metrics['daily_pnl']
        dates = metrics['dates']
        
        # Calculate cumulative PnL over time
        cumulative_pnl = np.cumsum([0] + daily_pnl)
        plot_dates = [dates[0]] + dates if dates else [datetime.now()]
        
        fig.add_trace(go.Scatter(
            x=plot_dates,
            y=cumulative_pnl,
            name=agent.replace('smolagent_', '').replace('--', '/'),
            line=dict(color=colors[i % len(colors)], width=2),
            mode='lines+markers',
            hovertemplate='<b>%{fullData.name}</b><br>' +
                         'Date: %{x}<br>' +
                         'Cumulative PnL: %{y:.3f}<br>' +
                         '<extra></extra>'
        ))
    
    fig.update_layout(
        title="Agent Performance - Cumulative PnL Over Time",
        xaxis_title="Date",
        yaxis_title="Cumulative PnL",
        hovermode='x unified',
        template="plotly_white",
        height=500,
        showlegend=True
    )
    
    # Add horizontal line at 0
    fig.add_hline(y=0, line_dash="dash", line_color="gray", opacity=0.5)
    
    return fig

def create_position_breakdown_plot(performance_data):
    """Create position breakdown plot"""
    agents = list(performance_data.keys())
    long_positions = [performance_data[agent]['long_positions'] for agent in agents]
    short_positions = [performance_data[agent]['short_positions'] for agent in agents]
    no_positions = [performance_data[agent]['no_positions'] for agent in agents]
    
    # Clean agent names for display
    clean_agents = [agent.replace('smolagent_', '').replace('--', '/') for agent in agents]
    
    fig = go.Figure()
    
    fig.add_trace(go.Bar(
        name='Long Positions',
        x=clean_agents,
        y=long_positions,
        marker_color='green',
        opacity=0.7
    ))
    
    fig.add_trace(go.Bar(
        name='Short Positions', 
        x=clean_agents,
        y=short_positions,
        marker_color='red',
        opacity=0.7
    ))
    
    fig.add_trace(go.Bar(
        name='No Position',
        x=clean_agents,
        y=no_positions,
        marker_color='gray',
        opacity=0.7
    ))
    
    fig.update_layout(
        title="Position Breakdown by Agent",
        xaxis_title="Agent",
        yaxis_title="Number of Decisions",
        barmode='stack',
        template="plotly_white",
        height=400
    )
    
    return fig

def get_agent_list(df):
    """Get list of agents for dropdown"""
    if df.empty:
        return ["No agents available"]
    agents = df['agent_name'].unique()
    clean_agents = [agent.replace('smolagent_', '').replace('--', '/') for agent in agents]
    return ["All Agents"] + clean_agents

def update_plot(selected_agent):
    """Update plot based on selected agent"""
    df = load_agent_choices()
    performance_data = calculate_pnl_and_performance(df)
    
    # Map clean name back to original name
    if selected_agent != "All Agents" and selected_agent != "No agents available":
        original_name = None
        for agent in performance_data.keys():
            clean_name = agent.replace('smolagent_', '').replace('--', '/')
            if clean_name == selected_agent:
                original_name = agent
                break
        selected_agent = original_name
    else:
        selected_agent = None
    
    return create_pnl_plot(performance_data, selected_agent)

def refresh_data():
    """Refresh all data and return updated components"""
    df = load_agent_choices()
    performance_data = calculate_pnl_and_performance(df)
    
    leaderboard = create_leaderboard(performance_data)
    pnl_plot = create_pnl_plot(performance_data)
    position_plot = create_position_breakdown_plot(performance_data)
    agent_list = get_agent_list(df)
    portfolio_list = list(performance_data.keys())
    first_portfolio_plot = performance_data[portfolio_list[0]]['figure'] if portfolio_list else None
    
    return (leaderboard, pnl_plot, position_plot, 
            gr.update(choices=agent_list), 
            gr.update(choices=portfolio_list, value=portfolio_list[0] if portfolio_list else None),
            first_portfolio_plot,
            f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Initialize data
df = load_agent_choices()
performance_data = calculate_pnl_and_performance(df)

# Create Gradio interface
with gr.Blocks(title="PrediBench Leaderboard", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# πŸ† PrediBench Agent Leaderboard")
    gr.Markdown("Track the performance of AI agents making predictions on Polymarket questions")
    
    with gr.Row():
        refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="primary")
        last_updated = gr.Textbox(
            value=f"Last updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}", 
            label="Status", 
            interactive=False,
            scale=3
        )
    
    with gr.Tabs():
        with gr.TabItem("πŸ“Š Leaderboard"):
            gr.Markdown("### Agent Performance Ranking")
            leaderboard_table = gr.Dataframe(
                value=create_leaderboard(performance_data),
                interactive=False,
                wrap=True
            )
            
            gr.Markdown("### Position Breakdown")
            position_breakdown = gr.Plot(
                value=create_position_breakdown_plot(performance_data)
            )
        
        with gr.TabItem("πŸ“ˆ Individual Performance"):
            gr.Markdown("### Select Agent to View Detailed Performance")
            
            with gr.Row():
                agent_dropdown = gr.Dropdown(
                    choices=get_agent_list(df),
                    value="All Agents",
                    label="Select Agent",
                    scale=3
                )
            
            pnl_plot = gr.Plot(
                value=create_pnl_plot(performance_data)
            )
            
            # Update plot when agent selection changes
            agent_dropdown.change(
                fn=update_plot,
                inputs=agent_dropdown,
                outputs=pnl_plot
            )
        
        with gr.TabItem("πŸ“Š Portfolio Details"):
            gr.Markdown("### Detailed Portfolio Analysis")
            
            with gr.Row():
                portfolio_dropdown = gr.Dropdown(
                    choices=[agent for agent in performance_data.keys()],
                    value=list(performance_data.keys())[0] if performance_data else None,
                    label="Select Agent Portfolio",
                    scale=3
                )
            
            portfolio_plot = gr.Plot(
                value=performance_data[list(performance_data.keys())[0]]['figure'] if performance_data else None
            )
            
            # Update portfolio plot when agent selection changes
            def update_portfolio_plot(selected_agent):
                if selected_agent and selected_agent in performance_data:
                    return performance_data[selected_agent]['figure']
                return None
                
            portfolio_dropdown.change(
                fn=update_portfolio_plot,
                inputs=portfolio_dropdown,
                outputs=portfolio_plot
            )
    
    # Refresh functionality
    refresh_btn.click(
        fn=refresh_data,
        outputs=[leaderboard_table, pnl_plot, position_breakdown, agent_dropdown, portfolio_dropdown, portfolio_plot, last_updated]
    )

if __name__ == "__main__":
    demo.launch(server_name="0.0.0.0", server_port=7860)