leaderboard-hackaton-2025

Runtime error

App Files Files Community

leaderboard-hackaton-2025 / app.py

mariagrandury

update personal leaderboard

5def33e 25 days ago

raw

history blame

14.1 kB

	import os
	import time
	from collections import defaultdict
	from functools import lru_cache

	import argilla as rg
	import gradio as gr
	import pandas as pd
	from dotenv import load_dotenv
	from fastapi import FastAPI

	load_dotenv()

	try:
	client = rg.Argilla(
	api_url=os.getenv("ARGILLA_API_URL", ""),
	api_key=os.getenv("ARGILLA_API_KEY", ""),
	)
	except Exception as e:
	print(f"Error initializing Argilla client: {e}")
	client = None


	DATA_DIR = "data"
	INCLUDE_CSV = os.path.join(DATA_DIR, "include.csv")
	STEREOTYPES_CSV = os.path.join(DATA_DIR, "stereotypes.csv")
	ARENA_JSON = os.path.join(DATA_DIR, "arena.json")
	PARTICIPANTS_CSV = os.path.join(DATA_DIR, "participants.csv")
	LEADERBOARD_PERSONAL_CSV = os.path.join(".", "leaderboard_personal.csv")

	countries = {
	"Argentina": {"iso": "ARG", "emoji": "🇦🇷"},
	"Bolivia": {"iso": "BOL", "emoji": "🇧🇴"},
	"Chile": {"iso": "CHL", "emoji": "🇨🇱"},
	"Colombia": {"iso": "COL", "emoji": "🇨🇴"},
	"Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"},
	"Cuba": {"iso": "CUB", "emoji": "🇨🇺"},
	"Ecuador": {"iso": "ECU", "emoji": "🇪🇨"},
	"El Salvador": {"iso": "SLV", "emoji": "🇸🇻"},
	"España": {"iso": "ESP", "emoji": "🇪🇸"},
	"Guatemala": {"iso": "GTM", "emoji": "🇬🇹"},
	"Honduras": {"iso": "HND", "emoji": "🇭🇳"},
	"México": {"iso": "MEX", "emoji": "🇲🇽"},
	"Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"},
	"Panamá": {"iso": "PAN", "emoji": "🇵🇦"},
	"Paraguay": {"iso": "PRY", "emoji": "🇵🇾"},
	"Perú": {"iso": "PER", "emoji": "🇵🇪"},
	"Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"},
	"República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"},
	"Uruguay": {"iso": "URY", "emoji": "🇺🇾"},
	"Venezuela": {"iso": "VEN", "emoji": "🇻🇪"},
	}


	def get_user_mapping():
	"""
	Get cached mapping of emails and hf_usernames to discord usernames.
	Returns a tuple of (email_to_discord, hf_username_to_discord) mappings.
	"""
	email_to_discord = {}
	hf_username_to_discord = {}

	try:
	if os.path.exists(PARTICIPANTS_CSV):
	mapping_df = pd.read_csv(PARTICIPANTS_CSV)

	# Map emails to discord usernames
	if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
	for _, row in mapping_df.iterrows():
	mail = row["gmail"]
	discord = row["discord"]
	if pd.notna(mail) and pd.notna(discord) and discord != "NA":
	email_to_discord[mail.lower()] = discord.lower()

	# Map hf_usernames to discord usernames
	if "hf_username" in mapping_df.columns and "discord" in mapping_df.columns:
	for _, row in mapping_df.iterrows():
	hf_username = row["hf_username"]
	discord = row["discord"]
	if pd.notna(hf_username) and pd.notna(discord) and discord != "NA":
	hf_username_to_discord[hf_username.lower()] = discord.lower()

	except Exception as e:
	print(f"Error loading {PARTICIPANTS_CSV}: {e}")

	return email_to_discord, hf_username_to_discord


	def get_discord_username(identifier):
	"""
	Get discord username from either email or hf_username. Returns the discord username if found, otherwise returns the identifier.
	"""
	email_to_discord, hf_username_to_discord = get_user_mapping()

	# Try to find discord username by email first
	if "@" in identifier:
	discord_username = email_to_discord.get(identifier.lower())
	if discord_username:
	return discord_username

	# Try to find discord username by hf_username
	discord_username = hf_username_to_discord.get(identifier.lower())
	if discord_username:
	return discord_username

	# Fallback: use identifier as username
	return identifier.split("@")[0] if "@" in identifier else identifier


	def get_blend_es_data():
	data = []

	for country in countries.keys():
	iso = countries[country]["iso"]
	emoji = countries[country]["emoji"]

	dataset_name = f"{emoji} {country} - {iso} - Responder"

	try:
	print(f"Processing dataset: {dataset_name}")
	dataset = client.datasets(dataset_name)
	records = list(dataset.records(with_responses=True))

	dataset_contributions = defaultdict(int)
	user_mapping = {}

	for record in records:
	record_dict = record.to_dict()
	if "answer_1" in record_dict["responses"]:
	for answer in record_dict["responses"]["answer_1"]:
	if answer["user_id"]:
	user_id = answer["user_id"]
	dataset_contributions[user_id] += 1

	if user_id not in user_mapping:
	try:
	user = client.users(id=user_id)
	user_mapping[user_id] = user.username
	except Exception as e:
	print(f"Error getting username for {user_id}: {e}")
	user_mapping[user_id] = f"User-{user_id[:8]}"

	for user_id, count in dataset_contributions.items():
	hf_username = user_mapping.get(user_id, f"User-{user_id[:8]}")
	username = get_discord_username(hf_username)
	data.append(
	{"source": "blend-es", "username": username, "count": count}
	)

	except Exception as e:
	print(f"Error processing dataset {dataset_name}: {e}")

	return data


	def get_include_data():
	data = []
	try:
	if os.path.exists(INCLUDE_CSV):
	include_df = pd.read_csv(INCLUDE_CSV)
	username_column = "Nombre en Discord / username"
	questions_column = "Total preguntas hackathon"
	if (
	username_column in include_df.columns
	and questions_column in include_df.columns
	):
	discord_users = defaultdict(int)
	for _, row in include_df.iterrows():
	username = row[username_column][1:] # Remove the @ symbol
	questions = row[questions_column]
	if pd.notna(username) and pd.notna(questions):
	discord_users[username.lower()] += int(questions)

	for username, count in discord_users.items():
	data.append(
	{"source": "include", "username": username, "count": count}
	)

	except Exception as e:
	print(f"Error loading {INCLUDE_CSV}: {e}")

	return data


	def get_estereotipos_data():
	data = []

	try:
	if os.path.exists(STEREOTYPES_CSV):
	counts_df = pd.read_csv(STEREOTYPES_CSV)
	if "token_id" in counts_df.columns and "count" in counts_df.columns:
	mail_counts = defaultdict(int)
	for _, row in counts_df.iterrows():
	mail = row["token_id"]
	count = row["count"]
	if pd.notna(mail) and pd.notna(count):
	mail_counts[mail.lower()] += int(count)

	for mail, count in mail_counts.items():
	username = get_discord_username(mail)
	data.append(
	{"source": "estereotipos", "username": username, "count": count}
	)
	except Exception as e:
	print(f"Error loading {STEREOTYPES_CSV}: {e}")

	return data


	def get_arena_data():
	data = []

	try:
	if os.path.exists(ARENA_JSON):
	import json

	with open(ARENA_JSON, "r", encoding="utf-8") as f:
	arena_data = json.load(f)

	mail_counts = defaultdict(int)

	for country, conversations in arena_data.items():
	for conversation in conversations:
	if "username" in conversation:
	mail = conversation["username"]
	if mail:
	mail_counts[mail.lower()] += 1

	for mail, count in mail_counts.items():
	username = get_discord_username(mail)
	data.append({"source": "arena", "username": username, "count": count})
	except Exception as e:
	print(f"Error loading {ARENA_JSON}: {e}")

	return data


	@lru_cache(maxsize=32)
	def get_user_contributions_cached(cache_buster: int):
	return consolidate_all_data()


	def consolidate_all_data():
	all_data = []
	all_data.extend(get_blend_es_data())
	all_data.extend(get_include_data())
	all_data.extend(get_estereotipos_data())
	all_data.extend(get_arena_data())

	user_contributions = defaultdict(
	lambda: {
	"username": "",
	"blend_es": 0,
	"include": 0,
	"estereotipos": 0,
	"arena": 0,
	}
	)

	for item in all_data:
	source = item["source"]
	username = item["username"]
	count = item["count"]

	user_key = username.lower()

	if not user_contributions[user_key]["username"]:
	user_contributions[user_key]["username"] = username

	if source == "blend-es":
	user_contributions[user_key]["blend_es"] += count
	elif source == "include":
	user_contributions[user_key]["include"] += count
	elif source == "estereotipos":
	user_contributions[user_key]["estereotipos"] += count
	elif source == "arena":
	user_contributions[user_key]["arena"] += count

	rows = []
	for _, data in user_contributions.items():
	row = {
	"Username": data["username"],
	"Arena": data["arena"],
	"Blend-ES": data["blend_es"],
	"Estereotipos": data["estereotipos"],
	"INCLUDE": data["include"],
	}
	rows.append(row)

	df = pd.DataFrame(rows)

	if not df.empty:
	df = df.sort_values("Arena", ascending=False)

	with open(LEADERBOARD_PERSONAL_CSV, "w", encoding="utf-8") as f:
	df.to_csv(f, index=False)

	return df


	app = FastAPI()

	last_update_time = 0
	cached_data = None


	def create_leaderboard_ui():
	global cached_data, last_update_time
	current_time = time.time()

	if cached_data is not None and current_time - last_update_time < 300:
	df = cached_data
	else:
	cache_buster = int(current_time)
	df = get_user_contributions_cached(cache_buster)
	cached_data = df
	last_update_time = current_time

	if not df.empty:
	df = df.reset_index(drop=True)
	df.index = df.index + 1
	df = df.rename_axis("Rank")
	df = df.reset_index()

	df_html = df.to_html(classes="leaderboard-table", border=0, index=False)

	styled_html = f"""
	<div style="margin: 20px 0;">
	<p>Última Actualización: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
	<style>
	.leaderboard-table {{
	width: 100%;
	border-collapse: collapse;
	font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
	box-shadow: 0 4px 8px rgba(0,0,0,0.1);
	border-radius: 8px;
	overflow: hidden;
	}}
	.leaderboard-table th {{
	background-color: #1a1a2e;
	color: white;
	font-weight: bold;
	text-align: left;
	padding: 14px;
	border-bottom: 2px solid #16213e;
	}}
	.leaderboard-table td {{
	padding: 12px 14px;
	border-bottom: 1px solid #333;
	background-color: #222;
	color: #fff;
	}}
	.leaderboard-table tr:hover td {{
	background-color: #2a2a3a;
	}}
	.leaderboard-table tr:nth-child(1) td:first-child {{
	background-color: #ffd700;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(2) td:first-child {{
	background-color: #c0c0c0;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(3) td:first-child {{
	background-color: #cd7f32;
	color: #333;
	font-weight: bold;
	text-align: center;
	border-right: 1px solid #333;
	}}
	.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
	font-weight: bold;
	color: #ffd700;
	}}
	.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
	font-weight: bold;
	color: #c0c0c0;
	}}
	.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
	font-weight: bold;
	color: #cd7f32;
	}}
	</style>
	{df_html}
	</div>
	"""
	return styled_html


	def refresh_data():
	global cached_data, last_update_time
	cached_data = None
	last_update_time = 0
	return create_leaderboard_ui()


	with gr.Blocks(theme=gr.themes.Default()) as demo:
	with gr.Column(scale=1):
	gr.Markdown("""# 🏆 Hackaton Leaderboard""")

	leaderboard_html = gr.HTML(create_leaderboard_ui)

	refresh_btn = gr.Button("🔄 Actualizar Datos", variant="primary")
	refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)

	gr.mount_gradio_app(app, demo, path="/")

	if __name__ == "__main__":
	import uvicorn

	uvicorn.run(app, host="0.0.0.0", port=7860)