ouhenio's picture
Update app.py
12c6f3b verified
raw
history blame
7.53 kB
import gradio as gr
import argilla as rg
import pandas as pd
import os
import time
from collections import defaultdict
from fastapi import FastAPI
from functools import lru_cache
# Initialize Argilla client with environment variables
client = rg.Argilla(
api_url=os.getenv("ARGILLA_API_URL", ""),
api_key=os.getenv("ARGILLA_API_KEY", "")
)
# Dataset information - list all the datasets to track
DATASETS = [
"🇪🇸 España - ESP - Responder",
# Add more datasets as needed
]
# Cache results to avoid frequent API calls
@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
return get_user_contributions()
def get_user_contributions():
"""Get contributions per user across all datasets"""
user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "datasets": {}})
user_id_to_username = {}
# Process each dataset
for dataset_name in DATASETS:
try:
print(f"Processing dataset: {dataset_name}")
dataset = client.datasets(dataset_name)
records = list(dataset.records(with_responses=True))
# Track contributions per user in this dataset
dataset_contributions = defaultdict(int)
for record in records:
record_dict = record.to_dict()
if "answer_1" in record_dict["responses"]:
for answer in record_dict["responses"]["answer_1"]:
if answer["user_id"]:
user_id = answer["user_id"]
dataset_contributions[user_id] += 1
# Get username if not already cached
if user_id not in user_id_to_username:
try:
user = client.users(id=user_id)
user_id_to_username[user_id] = user.username
except Exception as e:
print(f"Error getting username for {user_id}: {e}")
user_id_to_username[user_id] = f"User-{user_id[:8]}"
# Add dataset contributions to overall user stats
for user_id, count in dataset_contributions.items():
username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
user_contributions[user_id]["username"] = username
user_contributions[user_id]["contributions"] += count
user_contributions[user_id]["datasets"][dataset_name] = count
except Exception as e:
print(f"Error processing dataset {dataset_name}: {e}")
# Convert to dataframe for easier handling
rows = []
for user_id, data in user_contributions.items():
row = {
"Username": data["username"],
"Total Contributions": data["contributions"]
}
# Add individual dataset contributions
for dataset_name in DATASETS:
row[dataset_name] = data["datasets"].get(dataset_name, 0)
rows.append(row)
df = pd.DataFrame(rows)
# Sort by total contributions (descending)
if not df.empty:
df = df.sort_values("Total Contributions", ascending=False)
return df
# App setup
app = FastAPI()
last_update_time = 0
cached_data = None
def create_leaderboard_ui():
"""Create the leaderboard UI"""
global cached_data, last_update_time
current_time = time.time()
# Use cached data if available and not expired (5 minute cache)
if cached_data is not None and current_time - last_update_time < 300:
df = cached_data
else:
# Fetch fresh data
cache_buster = int(current_time)
df = get_user_contributions_cached(cache_buster)
cached_data = df
last_update_time = current_time
# Add rank column
if not df.empty:
df = df.reset_index(drop=True)
df.index = df.index + 1
df = df.rename_axis("Rank")
df = df.reset_index()
# Format for better display
df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
# Add some styling
styled_html = f"""
<div style="margin: 20px 0;">
<h2>🏆 Leaderboard of User Contributions</h2>
<p>Last updated: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
<style>
.leaderboard-table {{
width: 100%;
border-collapse: collapse;
font-family: Arial, sans-serif;
}}
.leaderboard-table th {{
background-color: #f2f2f2;
color: #333;
font-weight: bold;
text-align: left;
padding: 12px;
border-bottom: 2px solid #ddd;
}}
.leaderboard-table td {{
padding: 10px 12px;
border-bottom: 1px solid #ddd;
}}
.leaderboard-table tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.leaderboard-table tr:hover {{
background-color: #f1f1f1;
}}
.leaderboard-table tr:nth-child(1) td:first-child,
.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
font-weight: bold;
color: gold;
}}
.leaderboard-table tr:nth-child(2) td:first-child,
.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
font-weight: bold;
color: silver;
}}
.leaderboard-table tr:nth-child(3) td:first-child,
.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
font-weight: bold;
color: #cd7f32; /* bronze */
}}
</style>
{df_html}
<p><small>Note: This leaderboard shows user contributions across all tracked datasets.</small></p>
</div>
"""
return styled_html
def refresh_data():
"""Force refresh of the data"""
global cached_data, last_update_time
cached_data = None
last_update_time = 0
return create_leaderboard_ui()
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
gr.Markdown("# Contribution Leaderboard")
gr.Markdown("Track user contributions across datasets in real-time")
# Create leaderboard display
leaderboard_html = gr.HTML(create_leaderboard_ui)
# Add refresh button
refresh_btn = gr.Button("🔄 Refresh Data")
refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
# Additional information
with gr.Accordion("About this leaderboard", open=False):
gr.Markdown("""
This leaderboard tracks user contributions across multiple datasets.
### How it works
- **Contributions**: Each response provided by a user counts as one contribution
- **Refresh**: Data is automatically cached for 5 minutes. Click the refresh button to update manually
- **Datasets tracked**:
- 🇪🇸 España - ESP - Responder
- [Add more datasets as needed]
""")
# Mount the Gradio app
gr.mount_gradio_app(app, demo, path="/")
# Run the app
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)