leaderboard-hackaton-2025

Runtime error

leaderboard-hackaton-2025

File size: 13,002 Bytes

12c6f3b
9ba222b
12c6f3b
 
f039650
77ab908
 
 
3b6b956
77ab908
 
3b6b956
 
 
 
 
 
 
 
 
 
ddd101e
907ca48
 
 
 
 
dff615a
 
907ca48
88e9336
77ab908
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88e9336
b8ab640
77ab908
732a141
6067055
77ab908
88e9336
 
 
77ab908
88e9336
77ab908
12c6f3b
 
 
 
77ab908
12c6f3b
6067055
77ab908
12c6f3b
 
 
 
 
 
 
77ab908
6067055
12c6f3b
 
6067055
12c6f3b
 
6067055
77ab908
12c6f3b
6067055
77ab908
 
 
 
12c6f3b
 
77ab908
6067055
732a141
77ab908
6067055
 
732a141
907ca48
 
dff615a
 
77ab908
dff615a
 
77ab908
732a141
 
dff615a
 
732a141
 
77ab908
6067055
77ab908
 
 
dff615a
732a141
907ca48
77ab908
6067055
732a141
77ab908
6067055
 
5fb1bc9
907ca48
 
5fb1bc9
 
 
 
 
 
6067055
907ca48
77ab908
6067055
 
77ab908
6067055
 
 
77ab908
6067055
907ca48
 
5fb1bc9
 
 
 
 
 
 
77ab908
5fb1bc9
6067055
 
77ab908
 
 
 
 
5fb1bc9
907ca48
77ab908
6067055
5fb1bc9
77ab908
6067055
 
 
77ab908
2b70f36
907ca48
2b70f36
77ab908
907ca48
2b70f36
77ab908
2b70f36
77ab908
2b70f36
 
 
 
 
 
77ab908
2b70f36
6067055
 
77ab908
 
 
2b70f36
907ca48
77ab908
6067055
2b70f36
77ab908
732a141
 
 
 
77ab908
732a141
6067055
 
 
 
 
77ab908
 
 
 
 
 
 
 
 
 
 
6067055
 
 
 
77ab908
6067055
77ab908
6067055
 
77ab908
6067055
 
 
 
 
 
 
 
77ab908
6067055
 
77ab908
 
 
12c6f3b
6067055
 
 
 
 
77ab908
76138e1
12c6f3b
77ab908
12c6f3b
77ab908
12c6f3b
b8ab640
77ab908
dff615a
 
 
12c6f3b
76138e1
77ab908
76138e1
dbf76b6
12c6f3b
 
9ba222b
77ab908
12c6f3b
 
9ba222b
77ab908
12c6f3b
 
 
 
 
 
 
77ab908
12c6f3b
 
 
 
 
77ab908
12c6f3b
77ab908
12c6f3b
 
fb5493c
12c6f3b
 
 
 
b8ab640
 
 
 
12c6f3b
 
b8ab640
 
12c6f3b
 
b8ab640
 
12c6f3b
 
b8ab640
7a0e457
 
 
12c6f3b
7a0e457
 
b8ab640
 
 
 
 
 
7a0e457
b8ab640
 
 
 
 
 
7a0e457
b8ab640
 
 
 
 
 
7a0e457
12c6f3b
 
 
7a0e457
12c6f3b
 
 
7a0e457
12c6f3b
 
 
7a0e457
12c6f3b
 
 
 
 
 
f8842a2
77ab908
12c6f3b
 
 
 
 
dcf465d
77ab908
7a0e457
b8ab640
7a0e457
77ab908
b8ab640
77ab908
fb5493c
b8ab640
dcf465d
 
f8842a2
61186ad
dcf465d
77ab908

import os
import time
from collections import defaultdict
from functools import lru_cache

import argilla as rg
import gradio as gr
import pandas as pd
from dotenv import load_dotenv
from fastapi import FastAPI

load_dotenv()

try:
    client = rg.Argilla(
        api_url=os.getenv("ARGILLA_API_URL", ""),
        api_key=os.getenv("ARGILLA_API_KEY", ""),
    )
except Exception as e:
    print(f"Error initializing Argilla client: {e}")
    client = None


DATA_DIR = "data"
INCLUDE_CSV = os.path.join(DATA_DIR, "include.csv")
STEREOTYPES_CSV = os.path.join(DATA_DIR, "stereotypes.csv")
ARENA_JSON = os.path.join(DATA_DIR, "arena.json")
PARTICIPANTS_CSV = os.path.join(DATA_DIR, "participants.csv")
LEADERBOARD_CSV = os.path.join(".", "leaderboard.csv")

countries = {
    "Argentina": {"iso": "ARG", "emoji": "🇦🇷"},
    "Bolivia": {"iso": "BOL", "emoji": "🇧🇴"},
    "Chile": {"iso": "CHL", "emoji": "🇨🇱"},
    "Colombia": {"iso": "COL", "emoji": "🇨🇴"},
    "Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"},
    "Cuba": {"iso": "CUB", "emoji": "🇨🇺"},
    "Ecuador": {"iso": "ECU", "emoji": "🇪🇨"},
    "El Salvador": {"iso": "SLV", "emoji": "🇸🇻"},
    "España": {"iso": "ESP", "emoji": "🇪🇸"},
    "Guatemala": {"iso": "GTM", "emoji": "🇬🇹"},
    "Honduras": {"iso": "HND", "emoji": "🇭🇳"},
    "México": {"iso": "MEX", "emoji": "🇲🇽"},
    "Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"},
    "Panamá": {"iso": "PAN", "emoji": "🇵🇦"},
    "Paraguay": {"iso": "PRY", "emoji": "🇵🇾"},
    "Perú": {"iso": "PER", "emoji": "🇵🇪"},
    "Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"},
    "República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"},
    "Uruguay": {"iso": "URY", "emoji": "🇺🇾"},
    "Venezuela": {"iso": "VEN", "emoji": "🇻🇪"},
}


def get_blend_es_data():
    data = []

    for country in countries.keys():
        iso = countries[country]["iso"]
        emoji = countries[country]["emoji"]

        dataset_name = f"{emoji} {country} - {iso} - Responder"

        try:
            print(f"Processing dataset: {dataset_name}")
            dataset = client.datasets(dataset_name)
            records = list(dataset.records(with_responses=True))

            dataset_contributions = defaultdict(int)
            user_mapping = {}

            for record in records:
                record_dict = record.to_dict()
                if "answer_1" in record_dict["responses"]:
                    for answer in record_dict["responses"]["answer_1"]:
                        if answer["user_id"]:
                            user_id = answer["user_id"]
                            dataset_contributions[user_id] += 1

                            if user_id not in user_mapping:
                                try:
                                    user = client.users(id=user_id)
                                    user_mapping[user_id] = user.username
                                except Exception as e:
                                    print(f"Error getting username for {user_id}: {e}")
                                    user_mapping[user_id] = f"User-{user_id[:8]}"

            for user_id, count in dataset_contributions.items():
                username = user_mapping.get(user_id, f"User-{user_id[:8]}")
                data.append(
                    {"source": "blend-es", "username": username, "count": count}
                )

        except Exception as e:
            print(f"Error processing dataset {dataset_name}: {e}")

    return data


def get_include_data():
    data = []
    try:
        if os.path.exists(INCLUDE_CSV):
            include_df = pd.read_csv(INCLUDE_CSV)
            username_column = "Nombre en Discord / username"
            questions_column = "Total preguntas hackathon"
            if (
                username_column in include_df.columns
                and questions_column in include_df.columns
            ):
                discord_users = defaultdict(int)
                for _, row in include_df.iterrows():
                    username = row[username_column][1:]  # Remove the @ symbol
                    questions = row[questions_column]
                    if pd.notna(username) and pd.notna(questions):
                        discord_users[username.lower()] += int(questions)

                for username, count in discord_users.items():
                    data.append(
                        {"source": "include", "username": username, "count": count}
                    )

    except Exception as e:
        print(f"Error loading {INCLUDE_CSV}: {e}")

    return data


def get_mail_to_username_mapping():
    mail_to_discord = {}
    try:
        if os.path.exists(PARTICIPANTS_CSV):
            mapping_df = pd.read_csv(PARTICIPANTS_CSV)
            if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
                for _, row in mapping_df.iterrows():
                    mail = row["gmail"]
                    discord = row["discord"]
                    if pd.notna(mail) and pd.notna(discord):
                        mail_to_discord[mail.lower()] = discord.lower()
    except Exception as e:
        print(f"Error loading {PARTICIPANTS_CSV}: {e}")

    return mail_to_discord


def get_estereotipos_data():
    data = []
    mail_to_discord = get_mail_to_username_mapping()

    try:
        if os.path.exists(STEREOTYPES_CSV):
            counts_df = pd.read_csv(STEREOTYPES_CSV)
            if "token_id" in counts_df.columns and "count" in counts_df.columns:
                mail_counts = defaultdict(int)
                for _, row in counts_df.iterrows():
                    mail = row["token_id"]
                    count = row["count"]
                    if pd.notna(mail) and pd.notna(count):
                        mail_counts[mail.lower()] += int(count)

                for mail, count in mail_counts.items():
                    username = mail_to_discord.get(mail.lower(), "")
                    if not username:
                        username = mail.split("@")[0] if "@" in mail else mail

                    data.append(
                        {"source": "estereotipos", "username": username, "count": count}
                    )
    except Exception as e:
        print(f"Error loading {STEREOTYPES_CSV}: {e}")

    return data


def get_arena_data():
    data = []
    mail_to_discord = get_mail_to_username_mapping()

    try:
        if os.path.exists(ARENA_JSON):
            import json

            with open(ARENA_JSON, "r", encoding="utf-8") as f:
                arena_data = json.load(f)

            mail_counts = defaultdict(int)

            for country, conversations in arena_data.items():
                for conversation in conversations:
                    if "username" in conversation:
                        mail = conversation["username"]
                        if mail:
                            mail_counts[mail.lower()] += 1

            for mail, count in mail_counts.items():
                username = mail_to_discord.get(mail.lower(), "")
                if not username:
                    username = mail.split("@")[0] if "@" in mail else mail

                data.append({"source": "arena", "username": username, "count": count})
    except Exception as e:
        print(f"Error loading {ARENA_JSON}: {e}")

    return data


@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
    return consolidate_all_data()


def consolidate_all_data():
    all_data = []
    all_data.extend(get_blend_es_data())
    all_data.extend(get_include_data())
    all_data.extend(get_estereotipos_data())
    all_data.extend(get_arena_data())

    user_contributions = defaultdict(
        lambda: {
            "username": "",
            "blend_es": 0,
            "include": 0,
            "estereotipos": 0,
            "arena": 0,
        }
    )

    for item in all_data:
        source = item["source"]
        username = item["username"]
        count = item["count"]

        user_key = username.lower()

        if not user_contributions[user_key]["username"]:
            user_contributions[user_key]["username"] = username

        if source == "blend-es":
            user_contributions[user_key]["blend_es"] += count
        elif source == "include":
            user_contributions[user_key]["include"] += count
        elif source == "estereotipos":
            user_contributions[user_key]["estereotipos"] += count
        elif source == "arena":
            user_contributions[user_key]["arena"] += count

    rows = []
    for _, data in user_contributions.items():
        total = (
            data["blend_es"] + data["include"] + data["estereotipos"] + data["arena"]
        )
        row = {
            "Username": data["username"],
            "Total": total,
            "Blend-es": data["blend_es"],
            "INCLUDE": data["include"],
            "Estereotipos": data["estereotipos"],
            "Arena": data["arena"],
        }
        rows.append(row)

    df = pd.DataFrame(rows)

    if not df.empty:
        df = df.sort_values("Total", ascending=False)

    with open(LEADERBOARD_CSV, "w", encoding="utf-8") as f:
        df.to_csv(f, index=False)

    return df


app = FastAPI()

last_update_time = 0
cached_data = None


def create_leaderboard_ui():
    global cached_data, last_update_time
    current_time = time.time()

    if cached_data is not None and current_time - last_update_time < 300:
        df = cached_data
    else:
        cache_buster = int(current_time)
        df = get_user_contributions_cached(cache_buster)
        cached_data = df
        last_update_time = current_time

    if not df.empty:
        df = df.reset_index(drop=True)
        df.index = df.index + 1
        df = df.rename_axis("Rank")
        df = df.reset_index()

    df_html = df.to_html(classes="leaderboard-table", border=0, index=False)

    styled_html = f"""
    <div style="margin: 20px 0;">
        <p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
        <style>
            .leaderboard-table {{
                width: 100%;
                border-collapse: collapse;
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                box-shadow: 0 4px 8px rgba(0,0,0,0.1);
                border-radius: 8px;
                overflow: hidden;
            }}
            .leaderboard-table th {{
                background-color: #1a1a2e;
                color: white;
                font-weight: bold;
                text-align: left;
                padding: 14px;
                border-bottom: 2px solid #16213e;
            }}
            .leaderboard-table td {{
                padding: 12px 14px;
                border-bottom: 1px solid #333;
                background-color: #222;
                color: #fff;
            }}
            .leaderboard-table tr:hover td {{
                background-color: #2a2a3a;
            }}
            .leaderboard-table tr:nth-child(1) td:first-child {{
                background-color: #ffd700;
                color: #333;
                font-weight: bold;
                text-align: center;
                border-right: 1px solid #333;
            }}
            .leaderboard-table tr:nth-child(2) td:first-child {{
                background-color: #c0c0c0;
                color: #333;
                font-weight: bold;
                text-align: center;
                border-right: 1px solid #333;
            }}
            .leaderboard-table tr:nth-child(3) td:first-child {{
                background-color: #cd7f32;
                color: #333;
                font-weight: bold;
                text-align: center;
                border-right: 1px solid #333;
            }}
            .leaderboard-table tr:nth-child(1) td:nth-child(2) {{
                font-weight: bold;
                color: #ffd700;
            }}
            .leaderboard-table tr:nth-child(2) td:nth-child(2) {{
                font-weight: bold;
                color: #c0c0c0;
            }}
            .leaderboard-table tr:nth-child(3) td:nth-child(2) {{
                font-weight: bold;
                color: #cd7f32;
            }}
        </style>
        {df_html}
    </div>
    """
    return styled_html


def refresh_data():
    global cached_data, last_update_time
    cached_data = None
    last_update_time = 0
    return create_leaderboard_ui()


with gr.Blocks(theme=gr.themes.Default()) as demo:
    with gr.Column(scale=1):
        gr.Markdown("""# 🏆 Hackaton Leaderboard""")

        leaderboard_html = gr.HTML(create_leaderboard_ui)

        refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
        refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)

gr.mount_gradio_app(app, demo, path="/")

if __name__ == "__main__":
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)