File size: 7,529 Bytes
32c2587
76138e1
12c6f3b
 
9ba222b
12c6f3b
 
 
f039650
12c6f3b
76138e1
 
 
 
ddd101e
12c6f3b
 
 
 
 
49b81e8
12c6f3b
9ba222b
12c6f3b
 
65deab8
12c6f3b
 
 
 
db58fdb
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76138e1
12c6f3b
 
 
 
 
 
 
 
 
76138e1
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
76138e1
12c6f3b
76138e1
dbf76b6
12c6f3b
 
9ba222b
12c6f3b
 
 
9ba222b
 
12c6f3b
 
 
 
 
 
 
 
 
9ba222b
12c6f3b
 
 
 
 
 
9ba222b
12c6f3b
 
9ed5b46
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8842a2
12c6f3b
 
 
 
 
 
dcf465d
12c6f3b
 
 
 
db58fdb
12c6f3b
 
e8a8241
12c6f3b
 
 
a2875f8
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
dcf465d
12c6f3b
dcf465d
f8842a2
12c6f3b
61186ad
dcf465d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
import gradio as gr
import argilla as rg
import pandas as pd
import os
import time
from collections import defaultdict
from fastapi import FastAPI
from functools import lru_cache

# Initialize Argilla client with environment variables
client = rg.Argilla(
    api_url=os.getenv("ARGILLA_API_URL", ""),
    api_key=os.getenv("ARGILLA_API_KEY", "")
)

# Dataset information - list all the datasets to track
DATASETS = [
    "🇪🇸 España - ESP - Responder",
    # Add more datasets as needed
]

# Cache results to avoid frequent API calls
@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
    return get_user_contributions()

def get_user_contributions():
    """Get contributions per user across all datasets"""
    user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "datasets": {}})
    user_id_to_username = {}
    
    # Process each dataset
    for dataset_name in DATASETS:
        try:
            print(f"Processing dataset: {dataset_name}")
            dataset = client.datasets(dataset_name)
            records = list(dataset.records(with_responses=True))
            
            # Track contributions per user in this dataset
            dataset_contributions = defaultdict(int)
            
            for record in records:
                record_dict = record.to_dict()
                if "answer_1" in record_dict["responses"]:
                    for answer in record_dict["responses"]["answer_1"]:
                        if answer["user_id"]:
                            user_id = answer["user_id"]
                            dataset_contributions[user_id] += 1
                            
                            # Get username if not already cached
                            if user_id not in user_id_to_username:
                                try:
                                    user = client.users(id=user_id)
                                    user_id_to_username[user_id] = user.username
                                except Exception as e:
                                    print(f"Error getting username for {user_id}: {e}")
                                    user_id_to_username[user_id] = f"User-{user_id[:8]}"
            
            # Add dataset contributions to overall user stats
            for user_id, count in dataset_contributions.items():
                username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
                user_contributions[user_id]["username"] = username
                user_contributions[user_id]["contributions"] += count
                user_contributions[user_id]["datasets"][dataset_name] = count
                
        except Exception as e:
            print(f"Error processing dataset {dataset_name}: {e}")
    
    # Convert to dataframe for easier handling
    rows = []
    for user_id, data in user_contributions.items():
        row = {
            "Username": data["username"],
            "Total Contributions": data["contributions"]
        }
        # Add individual dataset contributions
        for dataset_name in DATASETS:
            row[dataset_name] = data["datasets"].get(dataset_name, 0)
        rows.append(row)
    
    df = pd.DataFrame(rows)
    
    # Sort by total contributions (descending)
    if not df.empty:
        df = df.sort_values("Total Contributions", ascending=False)
    
    return df

# App setup
app = FastAPI()

last_update_time = 0
cached_data = None

def create_leaderboard_ui():
    """Create the leaderboard UI"""
    global cached_data, last_update_time
    current_time = time.time()
    
    # Use cached data if available and not expired (5 minute cache)
    if cached_data is not None and current_time - last_update_time < 300:
        df = cached_data
    else:
        # Fetch fresh data
        cache_buster = int(current_time)
        df = get_user_contributions_cached(cache_buster)
        cached_data = df
        last_update_time = current_time
    
    # Add rank column
    if not df.empty:
        df = df.reset_index(drop=True)
        df.index = df.index + 1
        df = df.rename_axis("Rank")
        df = df.reset_index()
    
    # Format for better display
    df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
    
    # Add some styling
    styled_html = f"""
    <div style="margin: 20px 0;">
        <h2>🏆 Leaderboard of User Contributions</h2>
        <p>Last updated: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
        <style>
            .leaderboard-table {{
                width: 100%;
                border-collapse: collapse;
                font-family: Arial, sans-serif;
            }}
            .leaderboard-table th {{
                background-color: #f2f2f2;
                color: #333;
                font-weight: bold;
                text-align: left;
                padding: 12px;
                border-bottom: 2px solid #ddd;
            }}
            .leaderboard-table td {{
                padding: 10px 12px;
                border-bottom: 1px solid #ddd;
            }}
            .leaderboard-table tr:nth-child(even) {{
                background-color: #f9f9f9;
            }}
            .leaderboard-table tr:hover {{
                background-color: #f1f1f1;
            }}
            .leaderboard-table tr:nth-child(1) td:first-child,
            .leaderboard-table tr:nth-child(1) td:nth-child(2) {{
                font-weight: bold;
                color: gold;
            }}
            .leaderboard-table tr:nth-child(2) td:first-child,
            .leaderboard-table tr:nth-child(2) td:nth-child(2) {{
                font-weight: bold;
                color: silver;
            }}
            .leaderboard-table tr:nth-child(3) td:first-child,
            .leaderboard-table tr:nth-child(3) td:nth-child(2) {{
                font-weight: bold;
                color: #cd7f32; /* bronze */
            }}
        </style>
        {df_html}
        <p><small>Note: This leaderboard shows user contributions across all tracked datasets.</small></p>
    </div>
    """
    return styled_html

def refresh_data():
    """Force refresh of the data"""
    global cached_data, last_update_time
    cached_data = None
    last_update_time = 0
    return create_leaderboard_ui()

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
    gr.Markdown("# Contribution Leaderboard")
    gr.Markdown("Track user contributions across datasets in real-time")
    
    # Create leaderboard display
    leaderboard_html = gr.HTML(create_leaderboard_ui)
    
    # Add refresh button
    refresh_btn = gr.Button("🔄 Refresh Data")
    refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
    
    # Additional information
    with gr.Accordion("About this leaderboard", open=False):
        gr.Markdown("""
        This leaderboard tracks user contributions across multiple datasets.
        
        ### How it works
        - **Contributions**: Each response provided by a user counts as one contribution
        - **Refresh**: Data is automatically cached for 5 minutes. Click the refresh button to update manually
        - **Datasets tracked**: 
          - 🇪🇸 España - ESP - Responder
          - [Add more datasets as needed]
        """)

# Mount the Gradio app
gr.mount_gradio_app(app, demo, path="/")

# Run the app
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)