Spaces:
Runtime error
Runtime error
File size: 7,529 Bytes
32c2587 76138e1 12c6f3b 9ba222b 12c6f3b f039650 12c6f3b 76138e1 ddd101e 12c6f3b 49b81e8 12c6f3b 9ba222b 12c6f3b 65deab8 12c6f3b db58fdb 12c6f3b 76138e1 12c6f3b 76138e1 12c6f3b 76138e1 12c6f3b 76138e1 dbf76b6 12c6f3b 9ba222b 12c6f3b 9ba222b 12c6f3b 9ba222b 12c6f3b 9ba222b 12c6f3b 9ed5b46 12c6f3b f8842a2 12c6f3b dcf465d 12c6f3b db58fdb 12c6f3b e8a8241 12c6f3b a2875f8 12c6f3b dcf465d 12c6f3b dcf465d f8842a2 12c6f3b 61186ad dcf465d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
import gradio as gr
import argilla as rg
import pandas as pd
import os
import time
from collections import defaultdict
from fastapi import FastAPI
from functools import lru_cache
# Initialize Argilla client with environment variables
client = rg.Argilla(
api_url=os.getenv("ARGILLA_API_URL", ""),
api_key=os.getenv("ARGILLA_API_KEY", "")
)
# Dataset information - list all the datasets to track
DATASETS = [
"🇪🇸 España - ESP - Responder",
# Add more datasets as needed
]
# Cache results to avoid frequent API calls
@lru_cache(maxsize=32)
def get_user_contributions_cached(cache_buster: int):
return get_user_contributions()
def get_user_contributions():
"""Get contributions per user across all datasets"""
user_contributions = defaultdict(lambda: {"username": "", "contributions": 0, "datasets": {}})
user_id_to_username = {}
# Process each dataset
for dataset_name in DATASETS:
try:
print(f"Processing dataset: {dataset_name}")
dataset = client.datasets(dataset_name)
records = list(dataset.records(with_responses=True))
# Track contributions per user in this dataset
dataset_contributions = defaultdict(int)
for record in records:
record_dict = record.to_dict()
if "answer_1" in record_dict["responses"]:
for answer in record_dict["responses"]["answer_1"]:
if answer["user_id"]:
user_id = answer["user_id"]
dataset_contributions[user_id] += 1
# Get username if not already cached
if user_id not in user_id_to_username:
try:
user = client.users(id=user_id)
user_id_to_username[user_id] = user.username
except Exception as e:
print(f"Error getting username for {user_id}: {e}")
user_id_to_username[user_id] = f"User-{user_id[:8]}"
# Add dataset contributions to overall user stats
for user_id, count in dataset_contributions.items():
username = user_id_to_username.get(user_id, f"User-{user_id[:8]}")
user_contributions[user_id]["username"] = username
user_contributions[user_id]["contributions"] += count
user_contributions[user_id]["datasets"][dataset_name] = count
except Exception as e:
print(f"Error processing dataset {dataset_name}: {e}")
# Convert to dataframe for easier handling
rows = []
for user_id, data in user_contributions.items():
row = {
"Username": data["username"],
"Total Contributions": data["contributions"]
}
# Add individual dataset contributions
for dataset_name in DATASETS:
row[dataset_name] = data["datasets"].get(dataset_name, 0)
rows.append(row)
df = pd.DataFrame(rows)
# Sort by total contributions (descending)
if not df.empty:
df = df.sort_values("Total Contributions", ascending=False)
return df
# App setup
app = FastAPI()
last_update_time = 0
cached_data = None
def create_leaderboard_ui():
"""Create the leaderboard UI"""
global cached_data, last_update_time
current_time = time.time()
# Use cached data if available and not expired (5 minute cache)
if cached_data is not None and current_time - last_update_time < 300:
df = cached_data
else:
# Fetch fresh data
cache_buster = int(current_time)
df = get_user_contributions_cached(cache_buster)
cached_data = df
last_update_time = current_time
# Add rank column
if not df.empty:
df = df.reset_index(drop=True)
df.index = df.index + 1
df = df.rename_axis("Rank")
df = df.reset_index()
# Format for better display
df_html = df.to_html(classes="leaderboard-table", border=0, index=False)
# Add some styling
styled_html = f"""
<div style="margin: 20px 0;">
<h2>🏆 Leaderboard of User Contributions</h2>
<p>Last updated: {time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(last_update_time))}</p>
<style>
.leaderboard-table {{
width: 100%;
border-collapse: collapse;
font-family: Arial, sans-serif;
}}
.leaderboard-table th {{
background-color: #f2f2f2;
color: #333;
font-weight: bold;
text-align: left;
padding: 12px;
border-bottom: 2px solid #ddd;
}}
.leaderboard-table td {{
padding: 10px 12px;
border-bottom: 1px solid #ddd;
}}
.leaderboard-table tr:nth-child(even) {{
background-color: #f9f9f9;
}}
.leaderboard-table tr:hover {{
background-color: #f1f1f1;
}}
.leaderboard-table tr:nth-child(1) td:first-child,
.leaderboard-table tr:nth-child(1) td:nth-child(2) {{
font-weight: bold;
color: gold;
}}
.leaderboard-table tr:nth-child(2) td:first-child,
.leaderboard-table tr:nth-child(2) td:nth-child(2) {{
font-weight: bold;
color: silver;
}}
.leaderboard-table tr:nth-child(3) td:first-child,
.leaderboard-table tr:nth-child(3) td:nth-child(2) {{
font-weight: bold;
color: #cd7f32; /* bronze */
}}
</style>
{df_html}
<p><small>Note: This leaderboard shows user contributions across all tracked datasets.</small></p>
</div>
"""
return styled_html
def refresh_data():
"""Force refresh of the data"""
global cached_data, last_update_time
cached_data = None
last_update_time = 0
return create_leaderboard_ui()
# Create Gradio interface
with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue", secondary_hue="indigo")) as demo:
gr.Markdown("# Contribution Leaderboard")
gr.Markdown("Track user contributions across datasets in real-time")
# Create leaderboard display
leaderboard_html = gr.HTML(create_leaderboard_ui)
# Add refresh button
refresh_btn = gr.Button("🔄 Refresh Data")
refresh_btn.click(fn=refresh_data, outputs=leaderboard_html)
# Additional information
with gr.Accordion("About this leaderboard", open=False):
gr.Markdown("""
This leaderboard tracks user contributions across multiple datasets.
### How it works
- **Contributions**: Each response provided by a user counts as one contribution
- **Refresh**: Data is automatically cached for 5 minutes. Click the refresh button to update manually
- **Datasets tracked**:
- 🇪🇸 España - ESP - Responder
- [Add more datasets as needed]
""")
# Mount the Gradio app
gr.mount_gradio_app(app, demo, path="/")
# Run the app
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860) |