leaderboard-hackaton-2025

Runtime error

App Files Files Community

leaderboard-hackaton-2025 / app.py

ouhenio

Update app.py

12c6f3b verified 7 months ago

raw

history blame

7.53 kB

	import gradio as gr
	import argilla as rg
	import pandas as pd
	import os
	import time
	from collections import defaultdict
	from fastapi import FastAPI
	from functools import lru_cache

	# Initialize Argilla client with environment variables
	client = rg.Argilla(
	api_url=os.getenv("ARGILLA_API_URL", ""),
	api_key=os.getenv("ARGILLA_API_KEY", "")
	)

	# Dataset information - list all the datasets to track
	DATASETS = [
	"🇪🇸 España - ESP - Responder",
	# Add more datasets as needed
	]

	# Cache results to avoid frequent API calls
	@lru_cache(maxsize=32)
	def get_user_contributions_cached(cache_buster: int):
	return get_user_contributions()

	def get_user_contributions():
	"""Get contributions per user across all datasets"""
	user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "datasets": {}})
	user_id_to_username = {}

	# Process each dataset
	for dataset_name in DATASETS:
	try:
	print(f"Processing dataset: {dataset_name}")
	dataset = client.datasets(dataset_name)
	records = list(dataset.records(with_responses=True))

	# Track contributions per user in this dataset
	dataset_contributions = defaultdict(int)

	for record in records:
	record_dict = record.to_dict()
	if "answer_1" in record_dict["responses"]:
	for answer in record_dict["responses"]["answer_1"]:
	if answer["user_id"]:
	user_id = answer["user_id"]
	dataset_contributions[user_id] += 1

	# Get username if not already cached
	if user_id not in user_id_to_username:
	try:
	user = client.users(id=user_id)
	user_id_to_username[user_id] = user.username
	except Exception as e:
	print(f"Error getting username for {user_id}: {e}")
	user_id_to_username[user_id] = f"User-{user_id[:8]}"

	# Add dataset contributions to overall user stats
	for user_id, count in dataset_contributions.items():
	username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
	user_contributions[user_id]["username"] = username
	user_contributions[user_id]["contributions"] += count
	user_contributions[user_id]["datasets"][dataset_name] = count

	except Exception as e:
	print(f"Error processing dataset {dataset_name}: {e}")

	# Convert to dataframe for easier handling
	rows = []
	for user_id, data in user_contributions.items():
	row = {
	"Username": data["username"],
	"Total Contributions": data["contributions"]
	}
	# Add individual dataset contributions
	for dataset_name in DATASETS:
	row[dataset_name] = data["datasets"].get(dataset_name, 0)
	rows.append(row)

	df = pd.DataFrame(rows)

	# Sort by total contributions (descending)
	if not df.empty:
	df = df.sort_values("Total Contributions", ascending=False)

	return df

	# App setup
	app = FastAPI()

	last_update_time = 0
	cached_data = None

	def create_leaderboard_ui():
	"""Create the leaderboard UI"""
	global cached_data, last_update_time
	current_time = time.time()

	# Use cached data if available and not expired (5 minute cache)
	if cached_data is not None and current_time - last_update_time < 300:
	df = cached_data
	else:
	# Fetch fresh data
	cache_buster = int(current_time)
	df = get_user_contributions_cached(cache_buster)
	cached_data = df
	last_update_time = current_time

	# Add rank column
	if not df.empty:
	df = df.reset_index(drop=True)
	df.index = df.index + 1
	df = df.rename_axis("Rank")
	df = df.reset_index()

	# Format for better display
	df_html = df.to_html(classes="leaderboard-table", border=0, index=False)

	# Add some styling
	styled_html = f"""
	<div style="margin: 20px 0;">
	<h2>🏆 Leaderboard of User Contributions</h2>
	<p>Last updated: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
	<style>
	.leaderboard-table {{
	width: 100%;
	border-collapse: collapse;
	font-family: Arial, sans-serif;
	}}
	.leaderboard-table th {{
	background-color: #f2f2f2;
	color: #333;
	font-weight: bold;
	text-align: left;
	padding: 12px;
	border-bottom: 2px solid #ddd;
	}}
	.leaderboard-table td {{
	padding: 10px 12px;
	border-bottom: 1px solid #ddd;
	}}
	.leaderboard-table tr:nth-child(even) {{
	background-color: #f9f9f9;
	}}
	.leaderboard-table tr:hover {{
	background-color: #f1f1f1;
	}}
	.leaderboard-table tr:nth-child(1) td:first-child,
	.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
	font-weight: bold;
	color: gold;
	}}
	.leaderboard-table tr:nth-child(2) td:first-child,
	.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
	font-weight: bold;
	color: silver;
	}}
	.leaderboard-table tr:nth-child(3) td:first-child,
	.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
	font-weight: bold;
	color: #cd7f32; /* bronze */
	}}
	</style>
	{df_html}
	<p><small>Note: This leaderboard shows user contributions across all tracked datasets.</small></p>
	</div>
	"""
	return styled_html

	def refresh_data():
	"""Force refresh of the data"""
	global cached_data, last_update_time
	cached_data = None
	last_update_time = 0
	return create_leaderboard_ui()

	# Create Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
	gr.Markdown("# Contribution Leaderboard")
	gr.Markdown("Track user contributions across datasets in real-time")

	# Create leaderboard display
	leaderboard_html = gr.HTML(create_leaderboard_ui)

	# Add refresh button
	refresh_btn = gr.Button("🔄 Refresh Data")
	refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)

	# Additional information
	with gr.Accordion("About this leaderboard", open=False):
	gr.Markdown("""
	This leaderboard tracks user contributions across multiple datasets.

	### How it works
	- Contributions: Each response provided by a user counts as one contribution
	- Refresh: Data is automatically cached for 5 minutes. Click the refresh button to update manually
	- Datasets tracked:
	- 🇪🇸 España - ESP - Responder
	- [Add more datasets as needed]
	""")

	# Mount the Gradio app
	gr.mount_gradio_app(app, demo, path="/")

	# Run the app
	if __name__ == "__main__":
	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)