leaderboard-hackaton-2025

Runtime error

leaderboard-hackaton-2025

File size: 7,529 Bytes

32c2587
76138e1
12c6f3b
 
9ba222b
12c6f3b
 
 
f039650
12c6f3b
76138e1
 
 
 
ddd101e
12c6f3b
 
 
 
 
49b81e8
12c6f3b
9ba222b
12c6f3b
 
65deab8
12c6f3b
 
 
 
db58fdb
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76138e1
12c6f3b
 
 
 
 
 
 
 
 
76138e1
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
76138e1
12c6f3b
76138e1
dbf76b6
12c6f3b
 
9ba222b
12c6f3b
 
 
9ba222b
 
12c6f3b
 
 
 
 
 
 
 
 
9ba222b
12c6f3b
 
 
 
 
 
9ba222b
12c6f3b
 
9ed5b46
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f8842a2
12c6f3b
 
 
 
 
 
dcf465d
12c6f3b
 
 
 
db58fdb
12c6f3b
 
e8a8241
12c6f3b
 
 
a2875f8
12c6f3b
 
 
 
 
 
 
 
 
 
 
 
dcf465d
12c6f3b
dcf465d
f8842a2
12c6f3b
61186ad
dcf465d

import gradio as gr
import argilla as rg
import pandas as pd
import os
import time
from collections import defaultdict
from fastapi import FastAPI
from functools import lru_cache

# Initialize Argilla client with environment variables
client = rg.Argilla(
    api_url=os.getenv("ARGILLA_API_URL", ""),
    api_key=os.getenv("ARGILLA_API_KEY", "")
)

# Dataset information - list all the datasets to track
DATASETS = [
    "🇪🇸 España - ESP - Responder",
    # Add more datasets as needed
]

# Cache results to avoid frequent API calls
@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
    return get_user_contributions()

def get_user_contributions():
    """Get contributions per user across all datasets"""
    user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "datasets": {}})
    user_id_to_username = {}
    
    # Process each dataset
    for dataset_name in DATASETS:
        try:
            print(f"Processing dataset: {dataset_name}")
            dataset = client.datasets(dataset_name)
            records = list(dataset.records(with_responses=True))
            
            # Track contributions per user in this dataset
            dataset_contributions = defaultdict(int)
            
            for record in records:
                record_dict = record.to_dict()
                if "answer_1" in record_dict["responses"]:
                    for answer in record_dict["responses"]["answer_1"]:
                        if answer["user_id"]:
                            user_id = answer["user_id"]
                            dataset_contributions[user_id] += 1
                            
                            # Get username if not already cached
                            if user_id not in user_id_to_username:
                                try:
                                    user = client.users(id=user_id)
                                    user_id_to_username[user_id] = user.username
                                except Exception as e:
                                    print(f"Error getting username for {user_id}: {e}")
                                    user_id_to_username[user_id] = f"User-{user_id[:8]}"
            
            # Add dataset contributions to overall user stats
            for user_id, count in dataset_contributions.items():
                username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
                user_contributions[user_id]["username"] = username
                user_contributions[user_id]["contributions"] += count
                user_contributions[user_id]["datasets"][dataset_name] = count
                
        except Exception as e:
            print(f"Error processing dataset {dataset_name}: {e}")
    
    # Convert to dataframe for easier handling
    rows = []
    for user_id, data in user_contributions.items():
        row = {
            "Username": data["username"],
            "Total Contributions": data["contributions"]
        }
        # Add individual dataset contributions
        for dataset_name in DATASETS:
            row[dataset_name] = data["datasets"].get(dataset_name, 0)
        rows.append(row)
    
    df = pd.DataFrame(rows)
    
    # Sort by total contributions (descending)
    if not df.empty:
        df = df.sort_values("Total Contributions", ascending=False)
    
    return df

# App setup
app = FastAPI()

last_update_time = 0
cached_data = None

def create_leaderboard_ui():
    """Create the leaderboard UI"""
    global cached_data, last_update_time
    current_time = time.time()
    
    # Use cached data if available and not expired (5 minute cache)
    if cached_data is not None and current_time - last_update_time < 300:
        df = cached_data
    else:
        # Fetch fresh data
        cache_buster = int(current_time)
        df = get_user_contributions_cached(cache_buster)
        cached_data = df
        last_update_time = current_time
    
    # Add rank column
    if not df.empty:
        df = df.reset_index(drop=True)
        df.index = df.index + 1
        df = df.rename_axis("Rank")
        df = df.reset_index()
    
    # Format for better display
    df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
    
    # Add some styling
    styled_html = f"""
    <div style="margin: 20px 0;">
        <h2>🏆 Leaderboard of User Contributions</h2>
        <p>Last updated: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
        <style>
            .leaderboard-table {{
                width: 100%;
                border-collapse: collapse;
                font-family: Arial, sans-serif;
            }}
            .leaderboard-table th {{
                background-color: #f2f2f2;
                color: #333;
                font-weight: bold;
                text-align: left;
                padding: 12px;
                border-bottom: 2px solid #ddd;
            }}
            .leaderboard-table td {{
                padding: 10px 12px;
                border-bottom: 1px solid #ddd;
            }}
            .leaderboard-table tr:nth-child(even) {{
                background-color: #f9f9f9;
            }}
            .leaderboard-table tr:hover {{
                background-color: #f1f1f1;
            }}
            .leaderboard-table tr:nth-child(1) td:first-child,
            .leaderboard-table tr:nth-child(1) td:nth-child(2) {{
                font-weight: bold;
                color: gold;
            }}
            .leaderboard-table tr:nth-child(2) td:first-child,
            .leaderboard-table tr:nth-child(2) td:nth-child(2) {{
                font-weight: bold;
                color: silver;
            }}
            .leaderboard-table tr:nth-child(3) td:first-child,
            .leaderboard-table tr:nth-child(3) td:nth-child(2) {{
                font-weight: bold;
                color: #cd7f32; /* bronze */
            }}
        </style>
        {df_html}
        <p><small>Note: This leaderboard shows user contributions across all tracked datasets.</small></p>
    </div>
    """
    return styled_html

def refresh_data():
    """Force refresh of the data"""
    global cached_data, last_update_time
    cached_data = None
    last_update_time = 0
    return create_leaderboard_ui()

# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
    gr.Markdown("# Contribution Leaderboard")
    gr.Markdown("Track user contributions across datasets in real-time")
    
    # Create leaderboard display
    leaderboard_html = gr.HTML(create_leaderboard_ui)
    
    # Add refresh button
    refresh_btn = gr.Button("🔄 Refresh Data")
    refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
    
    # Additional information
    with gr.Accordion("About this leaderboard", open=False):
        gr.Markdown("""
        This leaderboard tracks user contributions across multiple datasets.
        
        ### How it works
        - **Contributions**: Each response provided by a user counts as one contribution
        - **Refresh**: Data is automatically cached for 5 minutes. Click the refresh button to update manually
        - **Datasets tracked**: 
          - 🇪🇸 España - ESP - Responder
          - [Add more datasets as needed]
        """)

# Mount the Gradio app
gr.mount_gradio_app(app, demo, path="/")

# Run the app
if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)