mariagrandury commited on
Commit
4da5970
·
1 Parent(s): 11d32ed

extract score calculation to new script

Browse files
Files changed (2) hide show
  1. app.py +3 -447
  2. calculate_scores.py +462 -0
app.py CHANGED
@@ -10,453 +10,9 @@ import pandas as pd
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI
12
 
13
- load_dotenv()
14
-
15
- # Constants
16
- DATA_DIR = "data"
17
- PARTICIPANTS_CSV = os.path.join(DATA_DIR, "participants.csv")
18
- EQUIPOS_CSV = os.path.join(DATA_DIR, "equipos.csv")
19
- LEADERBOARD_PERSONAL_CSV = "leaderboard_personal.csv"
20
- LEADERBOARD_EQUIPOS_CSV = "leaderboard_equipos.csv"
21
-
22
- # Column mappings for participants info
23
- COLUMN_MAP = {
24
- "gmail": "Dirección de correo electrónico",
25
- "discord": "¿Cuál es tu nombre en Discord?",
26
- "hf_username": "¿Cuál es tu nombre en el Hub de Hugging Face?",
27
- "contact_email": "Email de contacto",
28
- }
29
-
30
- # Column mappings for teams info
31
- TEAM_COLUMNS = {
32
- "team_name": "Nombre del equipo",
33
- "email_1": "Email 1",
34
- "email_2": "Email 2",
35
- "email_3": "Email 3",
36
- "email_4": "Email 4",
37
- "email_5": "Email 5",
38
- }
39
-
40
- # Initialize Argilla client
41
- try:
42
- client = rg.Argilla(
43
- api_url=os.getenv("ARGILLA_API_URL", ""),
44
- api_key=os.getenv("ARGILLA_API_KEY", ""),
45
- )
46
- except Exception as e:
47
- print(f"Error initializing Argilla client: {e}")
48
- client = None
49
-
50
- # Countries data
51
- countries = {
52
- "Argentina": {"iso": "ARG", "emoji": "🇦🇷"},
53
- "Bolivia": {"iso": "BOL", "emoji": "🇧🇴"},
54
- "Chile": {"iso": "CHL", "emoji": "🇨🇱"},
55
- "Colombia": {"iso": "COL", "emoji": "🇨🇴"},
56
- "Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"},
57
- "Cuba": {"iso": "CUB", "emoji": "🇨🇺"},
58
- "Ecuador": {"iso": "ECU", "emoji": "🇪🇨"},
59
- "El Salvador": {"iso": "SLV", "emoji": "🇸🇻"},
60
- "España": {"iso": "ESP", "emoji": "🇪🇸"},
61
- "Guatemala": {"iso": "GTM", "emoji": "🇬🇹"},
62
- "Honduras": {"iso": "HND", "emoji": "🇭🇳"},
63
- "México": {"iso": "MEX", "emoji": "🇲🇽"},
64
- "Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"},
65
- "Panamá": {"iso": "PAN", "emoji": "🇵🇦"},
66
- "Paraguay": {"iso": "PRY", "emoji": "🇵🇾"},
67
- "Perú": {"iso": "PER", "emoji": "🇵🇪"},
68
- "Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"},
69
- "República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"},
70
- "Uruguay": {"iso": "URY", "emoji": "🇺🇾"},
71
- "Venezuela": {"iso": "VEN", "emoji": "🇻🇪"},
72
- }
73
-
74
-
75
- @lru_cache(maxsize=1)
76
- def get_user_mapping():
77
- """Get cached mapping of emails and hf_usernames to discord usernames."""
78
- if not os.path.exists(PARTICIPANTS_CSV):
79
- return {}, {}
80
-
81
- try:
82
- df = pd.read_csv(PARTICIPANTS_CSV)
83
- email_to_discord = {}
84
- hf_to_discord = {}
85
-
86
- for _, row in df.iterrows():
87
- discord = row.get(COLUMN_MAP["discord"], "")
88
- if pd.notna(discord) and discord != "NA":
89
- discord_lower = discord.lower()
90
-
91
- # Map email to discord
92
- gmail = row.get(COLUMN_MAP["gmail"], "")
93
- if pd.notna(gmail):
94
- email_to_discord[gmail.lower()] = discord_lower
95
-
96
- # Map hf_username to discord
97
- hf_username = row.get(COLUMN_MAP["hf_username"], "")
98
- if pd.notna(hf_username):
99
- hf_to_discord[hf_username.lower()] = discord_lower
100
-
101
- return email_to_discord, hf_to_discord
102
- except Exception as e:
103
- print(f"Error loading {PARTICIPANTS_CSV}: {e}")
104
- return {}, {}
105
-
106
-
107
- def get_discord_username(identifier):
108
- """Get discord username from email or hf_username."""
109
- email_to_discord, hf_to_discord = get_user_mapping()
110
-
111
- if "@" in identifier:
112
- return email_to_discord.get(identifier.lower(), identifier.split("@")[0])
113
-
114
- return hf_to_discord.get(identifier.lower(), identifier)
115
-
116
-
117
- def get_participant_info():
118
- """Get participant information from CSV."""
119
- if not os.path.exists(PARTICIPANTS_CSV):
120
- return {}
121
-
122
- try:
123
- df = pd.read_csv(PARTICIPANTS_CSV)
124
- participant_info = {}
125
-
126
- for _, row in df.iterrows():
127
- discord_username = row.get(COLUMN_MAP["discord"], "")
128
- if pd.notna(discord_username) and discord_username != "NA":
129
- participant_info[discord_username.lower()] = {
130
- "gmail": row.get(COLUMN_MAP["gmail"], ""),
131
- "discord_username": discord_username,
132
- "hf_username": row.get(COLUMN_MAP["hf_username"], ""),
133
- "email": row.get(COLUMN_MAP["contact_email"], ""),
134
- }
135
-
136
- return participant_info
137
- except Exception as e:
138
- print(f"Error loading participant info: {e}")
139
- return {}
140
-
141
-
142
- def get_team_leaderboard(personal_leaderboard_df):
143
- """Calculate team leaderboard based on personal scores."""
144
- if not os.path.exists(EQUIPOS_CSV):
145
- return pd.DataFrame()
146
-
147
- try:
148
- teams_df = pd.read_csv(EQUIPOS_CSV)
149
- team_leaderboard = []
150
-
151
- for _, team_row in teams_df.iterrows():
152
- team_name = team_row.get(TEAM_COLUMNS["team_name"], "")
153
- if not team_name:
154
- continue
155
-
156
- # Get team member emails
157
- team_emails = []
158
- for i in range(1, 6):
159
- email_col = TEAM_COLUMNS[f"email_{i}"]
160
- email = team_row.get(email_col, "")
161
- if pd.notna(email) and email.strip():
162
- team_emails.append(email.lower())
163
-
164
- if not team_emails:
165
- continue
166
-
167
- # Map emails to Discord usernames and get scores
168
- discord_usernames = []
169
- team_scores = {"arena": 0, "blend_es": 0, "estereotipos": 0, "include": 0}
170
-
171
- for email in team_emails:
172
- # Get Discord username from email
173
- discord_username = get_discord_username(email)
174
- discord_usernames.append(discord_username)
175
-
176
- # Find this user in the personal leaderboard
177
- user_scores = personal_leaderboard_df[
178
- personal_leaderboard_df["Username"].str.lower()
179
- == discord_username.lower()
180
- ]
181
-
182
- if not user_scores.empty:
183
- team_scores["arena"] += user_scores.iloc[0]["Arena"]
184
- team_scores["blend_es"] += user_scores.iloc[0]["Blend-ES"]
185
- team_scores["estereotipos"] += user_scores.iloc[0]["Estereotipos"]
186
- team_scores["include"] += user_scores.iloc[0]["INCLUDE"]
187
-
188
- # Pad Discord usernames list to 5 elements
189
- while len(discord_usernames) < 5:
190
- discord_usernames.append("")
191
-
192
- # Create team row
193
- team_row_data = {
194
- "team_name": team_name,
195
- "discord_1": discord_usernames[0],
196
- "discord_2": discord_usernames[1],
197
- "discord_3": discord_usernames[2],
198
- "discord_4": discord_usernames[3],
199
- "discord_5": discord_usernames[4],
200
- "total_arena": team_scores["arena"],
201
- "ptos_arena": 0, # Set to 0 for now as requested
202
- "total_blend_es": team_scores["blend_es"],
203
- "ptos_blend_es": 0, # Set to 0 for now as requested
204
- "total_estereotipos": team_scores["estereotipos"],
205
- "ptos_estereotipos": 0, # Set to 0 for now as requested
206
- "total_include": team_scores["include"],
207
- "ptos_include": 0, # Set to 0 for now as requested
208
- "ptos_total": 0, # Set to 0 for now as requested
209
- }
210
-
211
- team_leaderboard.append(team_row_data)
212
-
213
- # Create DataFrame and sort by total_arena
214
- if team_leaderboard:
215
- team_df = pd.DataFrame(team_leaderboard)
216
- team_df.sort_values("total_arena", ascending=False, inplace=True)
217
- return team_df
218
- else:
219
- return pd.DataFrame()
220
-
221
- except Exception as e:
222
- print(f"Error calculating team leaderboard: {e}")
223
- return pd.DataFrame()
224
-
225
-
226
- def get_blend_es_data():
227
- """Get blend-es data from Argilla."""
228
- if not client:
229
- return []
230
-
231
- data = []
232
- for country, info in countries.items():
233
- dataset_name = f"{info['emoji']} {country} - {info['iso']} - Responder"
234
-
235
- try:
236
- dataset = client.datasets(dataset_name)
237
- records = list(dataset.records(with_responses=True))
238
-
239
- user_counts = defaultdict(int)
240
- user_mapping = {}
241
-
242
- for record in records:
243
- if "answer_1" in record.responses:
244
- for answer in record.responses["answer_1"]:
245
- if answer.user_id:
246
- user_id = answer.user_id
247
- user_counts[user_id] += 1
248
-
249
- if user_id not in user_mapping:
250
- try:
251
- user = client.users(id=user_id)
252
- user_mapping[user_id] = user.username
253
- except:
254
- user_mapping[user_id] = f"User-{user_id[:8]}"
255
-
256
- for user_id, count in user_counts.items():
257
- hf_username = user_mapping.get(user_id, f"User-{user_id[:8]}")
258
- username = get_discord_username(hf_username)
259
- data.append(
260
- {"source": "blend-es", "username": username, "count": count}
261
- )
262
-
263
- except Exception as e:
264
- print(f"Error processing {dataset_name}: {e}")
265
-
266
- return data
267
-
268
-
269
- def get_include_data():
270
- """Get include data from CSV."""
271
- csv_path = os.path.join(DATA_DIR, "include.csv")
272
- if not os.path.exists(csv_path):
273
- return []
274
-
275
- try:
276
- df = pd.read_csv(csv_path)
277
- username_col = "Nombre en Discord / username"
278
- questions_col = "Total preguntas hackathon"
279
-
280
- if username_col not in df.columns or questions_col not in df.columns:
281
- return []
282
-
283
- user_counts = defaultdict(int)
284
- for _, row in df.iterrows():
285
- username = row[username_col][1:] if pd.notna(row[username_col]) else ""
286
- questions = row[questions_col] if pd.notna(row[questions_col]) else 0
287
- if username and questions:
288
- user_counts[username.lower()] += int(questions)
289
-
290
- return [
291
- {"source": "include", "username": username, "count": count}
292
- for username, count in user_counts.items()
293
- ]
294
- except Exception as e:
295
- print(f"Error loading include data: {e}")
296
- return []
297
-
298
-
299
- def get_estereotipos_data():
300
- """Get estereotipos data from CSV."""
301
- csv_path = os.path.join(DATA_DIR, "stereotypes.csv")
302
- if not os.path.exists(csv_path):
303
- return []
304
-
305
- try:
306
- df = pd.read_csv(csv_path)
307
- if "token_id" not in df.columns or "count" not in df.columns:
308
- return []
309
-
310
- user_counts = defaultdict(int)
311
- for _, row in df.iterrows():
312
- mail = row.get("token_id", "")
313
- count = row.get("count", 0)
314
- if pd.notna(mail) and pd.notna(count):
315
- user_counts[mail.lower()] += int(count)
316
-
317
- return [
318
- {
319
- "source": "include",
320
- "username": get_discord_username(mail),
321
- "count": count,
322
- }
323
- for mail, count in user_counts.items()
324
- ]
325
- except Exception as e:
326
- print(f"Error loading estereotipos data: {e}")
327
- return []
328
-
329
-
330
- def get_arena_data():
331
- """Get arena data from JSON."""
332
- json_path = os.path.join(DATA_DIR, "arena.json")
333
- if not os.path.exists(json_path):
334
- return []
335
-
336
- try:
337
- with open(json_path, "r", encoding="utf-8") as f:
338
- arena_data = json.load(f)
339
-
340
- user_counts = defaultdict(int)
341
- for conversations in arena_data.values():
342
- for conversation in conversations:
343
- if username := conversation.get("username"):
344
- user_counts[username.lower()] += 1
345
-
346
- return [
347
- {"source": "arena", "username": get_discord_username(mail), "count": count}
348
- for mail, count in user_counts.items()
349
- ]
350
- except Exception as e:
351
- print(f"Error loading arena data: {e}")
352
- return []
353
-
354
-
355
- def consolidate_all_data():
356
- """Consolidate all data sources and create leaderboard."""
357
- # Collect all data
358
- all_data = (
359
- get_blend_es_data()
360
- + get_include_data()
361
- + get_estereotipos_data()
362
- + get_arena_data()
363
- )
364
-
365
- # Get participant info
366
- participant_info = get_participant_info()
367
-
368
- # Aggregate user contributions
369
- user_contributions = defaultdict(
370
- lambda: {
371
- "username": "",
372
- "gmail": "",
373
- "discord_username": "",
374
- "hf_username": "",
375
- "email": "",
376
- "blend_es": 0,
377
- "include": 0,
378
- "estereotipos": 0,
379
- "arena": 0,
380
- }
381
- )
382
-
383
- for item in all_data:
384
- source = item["source"]
385
- username = item["username"]
386
- count = item["count"]
387
- user_key = username.lower()
388
-
389
- if not user_contributions[user_key]["username"]:
390
- user_contributions[user_key]["username"] = username
391
- if username.lower() in participant_info:
392
- info = participant_info[username.lower()]
393
- user_contributions[user_key].update(
394
- {
395
- "gmail": info["gmail"],
396
- "discord_username": info["discord_username"],
397
- "hf_username": info["hf_username"],
398
- "email": info["email"],
399
- }
400
- )
401
-
402
- if source == "blend-es":
403
- user_contributions[user_key]["blend_es"] += count
404
- elif source == "include":
405
- user_contributions[user_key]["include"] += count
406
- elif source == "estereotipos":
407
- user_contributions[user_key]["estereotipos"] += count
408
- elif source == "arena":
409
- user_contributions[user_key]["arena"] += count
410
-
411
- # Create dataframes
412
- full_rows = []
413
- display_rows = []
414
-
415
- for data in user_contributions.values():
416
- # Full data for CSV
417
- full_rows.append(
418
- {
419
- "Username": data["username"],
420
- "Gmail": data["gmail"],
421
- "Discord_Username": data["discord_username"],
422
- "HF_Username": data["hf_username"],
423
- "Email": data["email"],
424
- "Arena": data["arena"],
425
- "Blend-ES": data["blend_es"],
426
- "Estereotipos": data["estereotipos"],
427
- "INCLUDE": data["include"],
428
- }
429
- )
430
-
431
- # Display data for UI (public)
432
- display_rows.append(
433
- {
434
- "Username": data["username"],
435
- "Arena": data["arena"],
436
- "Blend-ES": data["blend_es"],
437
- "Estereotipos": data["estereotipos"],
438
- "INCLUDE": data["include"],
439
- }
440
- )
441
-
442
- # Save full data to CSV
443
- full_df = pd.DataFrame(full_rows)
444
- if not full_df.empty:
445
- full_df.sort_values("Arena", ascending=False, inplace=True)
446
- full_df.to_csv(LEADERBOARD_PERSONAL_CSV, index=False, encoding="utf-8")
447
-
448
- # Generate and save team leaderboard
449
- team_df = get_team_leaderboard(full_df)
450
- if not team_df.empty:
451
- team_df.to_csv(LEADERBOARD_EQUIPOS_CSV, index=False, encoding="utf-8")
452
-
453
- # Return display dataframe for UI
454
- display_df = pd.DataFrame(display_rows)
455
- if not display_df.empty:
456
- display_df.sort_values("Arena", ascending=False, inplace=True)
457
-
458
- return display_df
459
 
 
460
 
461
  # FastAPI app
462
  app = FastAPI()
@@ -474,7 +30,7 @@ def create_leaderboard_ui():
474
  if cached_data is not None and current_time - last_update_time < 300:
475
  df = cached_data
476
  else:
477
- df = consolidate_all_data()
478
  cached_data = df
479
  last_update_time = current_time
480
 
 
10
  from dotenv import load_dotenv
11
  from fastapi import FastAPI
12
 
13
+ from calculate_scores import calculate_scores
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ load_dotenv()
16
 
17
  # FastAPI app
18
  app = FastAPI()
 
30
  if cached_data is not None and current_time - last_update_time < 300:
31
  df = cached_data
32
  else:
33
+ df = calculate_scores()
34
  cached_data = df
35
  last_update_time = current_time
36
 
calculate_scores.py ADDED
@@ -0,0 +1,462 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import os
3
+ import time
4
+ from collections import defaultdict
5
+ from functools import lru_cache
6
+
7
+ import argilla as rg
8
+ import gradio as gr
9
+ import pandas as pd
10
+ from dotenv import load_dotenv
11
+ from fastapi import FastAPI
12
+
13
+ load_dotenv()
14
+
15
+ # Constants
16
+ DATA_DIR = "data"
17
+ PARTICIPANTS_CSV = os.path.join(DATA_DIR, "participants.csv")
18
+ EQUIPOS_CSV = os.path.join(DATA_DIR, "equipos.csv")
19
+ LEADERBOARD_PERSONAL_CSV = "leaderboard_personal.csv"
20
+ LEADERBOARD_EQUIPOS_CSV = "leaderboard_equipos.csv"
21
+
22
+ # Column mappings for participants info
23
+ COLUMN_MAP = {
24
+ "gmail": "Dirección de correo electrónico",
25
+ "discord": "¿Cuál es tu nombre en Discord?",
26
+ "hf_username": "¿Cuál es tu nombre en el Hub de Hugging Face?",
27
+ "contact_email": "Email de contacto",
28
+ }
29
+
30
+ # Column mappings for teams info
31
+ TEAM_COLUMNS = {
32
+ "team_name": "Nombre del equipo",
33
+ "email_1": "Email 1",
34
+ "email_2": "Email 2",
35
+ "email_3": "Email 3",
36
+ "email_4": "Email 4",
37
+ "email_5": "Email 5",
38
+ }
39
+
40
+ # Initialize Argilla client
41
+ try:
42
+ client = rg.Argilla(
43
+ api_url=os.getenv("ARGILLA_API_URL", ""),
44
+ api_key=os.getenv("ARGILLA_API_KEY", ""),
45
+ )
46
+ except Exception as e:
47
+ print(f"Error initializing Argilla client: {e}")
48
+ client = None
49
+
50
+ # Countries data
51
+ countries = {
52
+ "Argentina": {"iso": "ARG", "emoji": "🇦🇷"},
53
+ "Bolivia": {"iso": "BOL", "emoji": "🇧🇴"},
54
+ "Chile": {"iso": "CHL", "emoji": "🇨🇱"},
55
+ "Colombia": {"iso": "COL", "emoji": "🇨🇴"},
56
+ "Costa Rica": {"iso": "CRI", "emoji": "🇨🇷"},
57
+ "Cuba": {"iso": "CUB", "emoji": "🇨🇺"},
58
+ "Ecuador": {"iso": "ECU", "emoji": "🇪🇨"},
59
+ "El Salvador": {"iso": "SLV", "emoji": "🇸🇻"},
60
+ "España": {"iso": "ESP", "emoji": "🇪🇸"},
61
+ "Guatemala": {"iso": "GTM", "emoji": "🇬🇹"},
62
+ "Honduras": {"iso": "HND", "emoji": "🇭🇳"},
63
+ "México": {"iso": "MEX", "emoji": "🇲🇽"},
64
+ "Nicaragua": {"iso": "NIC", "emoji": "🇳🇮"},
65
+ "Panamá": {"iso": "PAN", "emoji": "🇵🇦"},
66
+ "Paraguay": {"iso": "PRY", "emoji": "🇵🇾"},
67
+ "Perú": {"iso": "PER", "emoji": "🇵🇪"},
68
+ "Puerto Rico": {"iso": "PRI", "emoji": "🇵🇷"},
69
+ "República Dominicana": {"iso": "DOM", "emoji": "🇩🇴"},
70
+ "Uruguay": {"iso": "URY", "emoji": "🇺🇾"},
71
+ "Venezuela": {"iso": "VEN", "emoji": "🇻🇪"},
72
+ }
73
+
74
+
75
+ @lru_cache(maxsize=1)
76
+ def get_user_mapping():
77
+ """Get cached mapping of emails and hf_usernames to discord usernames."""
78
+ if not os.path.exists(PARTICIPANTS_CSV):
79
+ return {}, {}
80
+
81
+ try:
82
+ df = pd.read_csv(PARTICIPANTS_CSV)
83
+ email_to_discord = {}
84
+ hf_to_discord = {}
85
+
86
+ for _, row in df.iterrows():
87
+ discord = row.get(COLUMN_MAP["discord"], "")
88
+ if pd.notna(discord) and discord != "NA":
89
+ discord_lower = discord.lower()
90
+
91
+ # Map email to discord
92
+ gmail = row.get(COLUMN_MAP["gmail"], "")
93
+ if pd.notna(gmail):
94
+ email_to_discord[gmail.lower()] = discord_lower
95
+
96
+ # Map hf_username to discord
97
+ hf_username = row.get(COLUMN_MAP["hf_username"], "")
98
+ if pd.notna(hf_username):
99
+ hf_to_discord[hf_username.lower()] = discord_lower
100
+
101
+ return email_to_discord, hf_to_discord
102
+ except Exception as e:
103
+ print(f"Error loading {PARTICIPANTS_CSV}: {e}")
104
+ return {}, {}
105
+
106
+
107
+ def get_discord_username(identifier):
108
+ """Get discord username from email or hf_username."""
109
+ email_to_discord, hf_to_discord = get_user_mapping()
110
+
111
+ if "@" in identifier:
112
+ return email_to_discord.get(identifier.lower(), identifier.split("@")[0])
113
+
114
+ return hf_to_discord.get(identifier.lower(), identifier)
115
+
116
+
117
+ def get_participant_info():
118
+ """Get participant information from CSV."""
119
+ if not os.path.exists(PARTICIPANTS_CSV):
120
+ return {}
121
+
122
+ try:
123
+ df = pd.read_csv(PARTICIPANTS_CSV)
124
+ participant_info = {}
125
+
126
+ for _, row in df.iterrows():
127
+ discord_username = row.get(COLUMN_MAP["discord"], "")
128
+ if pd.notna(discord_username) and discord_username != "NA":
129
+ participant_info[discord_username.lower()] = {
130
+ "gmail": row.get(COLUMN_MAP["gmail"], ""),
131
+ "discord_username": discord_username,
132
+ "hf_username": row.get(COLUMN_MAP["hf_username"], ""),
133
+ "email": row.get(COLUMN_MAP["contact_email"], ""),
134
+ }
135
+
136
+ return participant_info
137
+ except Exception as e:
138
+ print(f"Error loading participant info: {e}")
139
+ return {}
140
+
141
+
142
+ def get_team_leaderboard(personal_leaderboard_df):
143
+ """Calculate team leaderboard based on personal scores."""
144
+ if not os.path.exists(EQUIPOS_CSV):
145
+ return pd.DataFrame()
146
+
147
+ try:
148
+ teams_df = pd.read_csv(EQUIPOS_CSV)
149
+ team_leaderboard = []
150
+
151
+ for _, team_row in teams_df.iterrows():
152
+ team_name = team_row.get(TEAM_COLUMNS["team_name"], "")
153
+ if not team_name:
154
+ continue
155
+
156
+ # Get team member emails
157
+ team_emails = []
158
+ for i in range(1, 6):
159
+ email_col = TEAM_COLUMNS[f"email_{i}"]
160
+ email = team_row.get(email_col, "")
161
+ if pd.notna(email) and email.strip():
162
+ team_emails.append(email.lower())
163
+
164
+ if not team_emails:
165
+ continue
166
+
167
+ # Map emails to Discord usernames and get scores
168
+ discord_usernames = []
169
+ team_scores = {"arena": 0, "blend_es": 0, "estereotipos": 0, "include": 0}
170
+
171
+ for email in team_emails:
172
+ # Get Discord username from email
173
+ discord_username = get_discord_username(email)
174
+ discord_usernames.append(discord_username)
175
+
176
+ # Find this user in the personal leaderboard
177
+ user_scores = personal_leaderboard_df[
178
+ personal_leaderboard_df["Username"].str.lower()
179
+ == discord_username.lower()
180
+ ]
181
+
182
+ if not user_scores.empty:
183
+ team_scores["arena"] += user_scores.iloc[0]["Arena"]
184
+ team_scores["blend_es"] += user_scores.iloc[0]["Blend-ES"]
185
+ team_scores["estereotipos"] += user_scores.iloc[0]["Estereotipos"]
186
+ team_scores["include"] += user_scores.iloc[0]["INCLUDE"]
187
+
188
+ # Pad Discord usernames list to 5 elements
189
+ while len(discord_usernames) < 5:
190
+ discord_usernames.append("")
191
+
192
+ # Create team row
193
+ team_row_data = {
194
+ "team_name": team_name,
195
+ "discord_1": discord_usernames[0],
196
+ "discord_2": discord_usernames[1],
197
+ "discord_3": discord_usernames[2],
198
+ "discord_4": discord_usernames[3],
199
+ "discord_5": discord_usernames[4],
200
+ "total_arena": team_scores["arena"],
201
+ "ptos_arena": 0, # Set to 0 for now as requested
202
+ "total_blend_es": team_scores["blend_es"],
203
+ "ptos_blend_es": 0, # Set to 0 for now as requested
204
+ "total_estereotipos": team_scores["estereotipos"],
205
+ "ptos_estereotipos": 0, # Set to 0 for now as requested
206
+ "total_include": team_scores["include"],
207
+ "ptos_include": 0, # Set to 0 for now as requested
208
+ "ptos_total": 0, # Set to 0 for now as requested
209
+ }
210
+
211
+ team_leaderboard.append(team_row_data)
212
+
213
+ # Create DataFrame and sort by total_arena
214
+ if team_leaderboard:
215
+ team_df = pd.DataFrame(team_leaderboard)
216
+ team_df.sort_values("total_arena", ascending=False, inplace=True)
217
+ return team_df
218
+ else:
219
+ return pd.DataFrame()
220
+
221
+ except Exception as e:
222
+ print(f"Error calculating team leaderboard: {e}")
223
+ return pd.DataFrame()
224
+
225
+
226
+ def get_blend_es_data():
227
+ """Get blend-es data from Argilla."""
228
+ if not client:
229
+ return []
230
+
231
+ data = []
232
+ for country, info in countries.items():
233
+ dataset_name = f"{info['emoji']} {country} - {info['iso']} - Responder"
234
+
235
+ try:
236
+ dataset = client.datasets(dataset_name)
237
+ records = list(dataset.records(with_responses=True))
238
+
239
+ user_counts = defaultdict(int)
240
+ user_mapping = {}
241
+
242
+ for record in records:
243
+ if "answer_1" in record.responses:
244
+ for answer in record.responses["answer_1"]:
245
+ if answer.user_id:
246
+ user_id = answer.user_id
247
+ user_counts[user_id] += 1
248
+
249
+ if user_id not in user_mapping:
250
+ try:
251
+ user = client.users(id=user_id)
252
+ user_mapping[user_id] = user.username
253
+ except:
254
+ user_mapping[user_id] = f"User-{user_id[:8]}"
255
+
256
+ for user_id, count in user_counts.items():
257
+ hf_username = user_mapping.get(user_id, f"User-{user_id[:8]}")
258
+ username = get_discord_username(hf_username)
259
+ data.append(
260
+ {"source": "blend-es", "username": username, "count": count}
261
+ )
262
+
263
+ except Exception as e:
264
+ print(f"Error processing {dataset_name}: {e}")
265
+
266
+ return data
267
+
268
+
269
+ def get_include_data():
270
+ """Get include data from CSV."""
271
+ csv_path = os.path.join(DATA_DIR, "include.csv")
272
+ if not os.path.exists(csv_path):
273
+ return []
274
+
275
+ try:
276
+ df = pd.read_csv(csv_path)
277
+ username_col = "Nombre en Discord / username"
278
+ questions_col = "Total preguntas hackathon"
279
+
280
+ if username_col not in df.columns or questions_col not in df.columns:
281
+ return []
282
+
283
+ user_counts = defaultdict(int)
284
+ for _, row in df.iterrows():
285
+ username = row[username_col][1:] if pd.notna(row[username_col]) else ""
286
+ questions = row[questions_col] if pd.notna(row[questions_col]) else 0
287
+ if username and questions:
288
+ user_counts[username.lower()] += int(questions)
289
+
290
+ return [
291
+ {"source": "include", "username": username, "count": count}
292
+ for username, count in user_counts.items()
293
+ ]
294
+ except Exception as e:
295
+ print(f"Error loading include data: {e}")
296
+ return []
297
+
298
+
299
+ def get_estereotipos_data():
300
+ """Get estereotipos data from CSV."""
301
+ csv_path = os.path.join(DATA_DIR, "stereotypes.csv")
302
+ if not os.path.exists(csv_path):
303
+ return []
304
+
305
+ try:
306
+ df = pd.read_csv(csv_path)
307
+ if "token_id" not in df.columns or "count" not in df.columns:
308
+ return []
309
+
310
+ user_counts = defaultdict(int)
311
+ for _, row in df.iterrows():
312
+ mail = row.get("token_id", "")
313
+ count = row.get("count", 0)
314
+ if pd.notna(mail) and pd.notna(count):
315
+ user_counts[mail.lower()] += int(count)
316
+
317
+ return [
318
+ {
319
+ "source": "include",
320
+ "username": get_discord_username(mail),
321
+ "count": count,
322
+ }
323
+ for mail, count in user_counts.items()
324
+ ]
325
+ except Exception as e:
326
+ print(f"Error loading estereotipos data: {e}")
327
+ return []
328
+
329
+
330
+ def get_arena_data():
331
+ """Get arena data from JSON."""
332
+ json_path = os.path.join(DATA_DIR, "arena.json")
333
+ if not os.path.exists(json_path):
334
+ return []
335
+
336
+ try:
337
+ with open(json_path, "r", encoding="utf-8") as f:
338
+ arena_data = json.load(f)
339
+
340
+ user_counts = defaultdict(int)
341
+ for conversations in arena_data.values():
342
+ for conversation in conversations:
343
+ if username := conversation.get("username"):
344
+ user_counts[username.lower()] += 1
345
+
346
+ return [
347
+ {"source": "arena", "username": get_discord_username(mail), "count": count}
348
+ for mail, count in user_counts.items()
349
+ ]
350
+ except Exception as e:
351
+ print(f"Error loading arena data: {e}")
352
+ return []
353
+
354
+
355
+ def calculate_scores():
356
+ """Consolidate all data sources and create leaderboard."""
357
+ # Collect all data
358
+ all_data = (
359
+ get_blend_es_data()
360
+ + get_include_data()
361
+ + get_estereotipos_data()
362
+ + get_arena_data()
363
+ )
364
+
365
+ # Get participant info
366
+ participant_info = get_participant_info()
367
+
368
+ # Aggregate user contributions
369
+ user_contributions = defaultdict(
370
+ lambda: {
371
+ "username": "",
372
+ "gmail": "",
373
+ "discord_username": "",
374
+ "hf_username": "",
375
+ "email": "",
376
+ "blend_es": 0,
377
+ "include": 0,
378
+ "estereotipos": 0,
379
+ "arena": 0,
380
+ }
381
+ )
382
+
383
+ for item in all_data:
384
+ source = item["source"]
385
+ username = item["username"]
386
+ count = item["count"]
387
+ user_key = username.lower()
388
+
389
+ if not user_contributions[user_key]["username"]:
390
+ user_contributions[user_key]["username"] = username
391
+ if username.lower() in participant_info:
392
+ info = participant_info[username.lower()]
393
+ user_contributions[user_key].update(
394
+ {
395
+ "gmail": info["gmail"],
396
+ "discord_username": info["discord_username"],
397
+ "hf_username": info["hf_username"],
398
+ "email": info["email"],
399
+ }
400
+ )
401
+
402
+ if source == "blend-es":
403
+ user_contributions[user_key]["blend_es"] += count
404
+ elif source == "include":
405
+ user_contributions[user_key]["include"] += count
406
+ elif source == "estereotipos":
407
+ user_contributions[user_key]["estereotipos"] += count
408
+ elif source == "arena":
409
+ user_contributions[user_key]["arena"] += count
410
+
411
+ # Create dataframes
412
+ full_rows = []
413
+ display_rows = []
414
+
415
+ for data in user_contributions.values():
416
+ # Full data for CSV
417
+ full_rows.append(
418
+ {
419
+ "Username": data["username"],
420
+ "Gmail": data["gmail"],
421
+ "Discord_Username": data["discord_username"],
422
+ "HF_Username": data["hf_username"],
423
+ "Email": data["email"],
424
+ "Arena": data["arena"],
425
+ "Blend-ES": data["blend_es"],
426
+ "Estereotipos": data["estereotipos"],
427
+ "INCLUDE": data["include"],
428
+ }
429
+ )
430
+
431
+ # Display data for UI (public)
432
+ display_rows.append(
433
+ {
434
+ "Username": data["username"],
435
+ "Arena": data["arena"],
436
+ "Blend-ES": data["blend_es"],
437
+ "Estereotipos": data["estereotipos"],
438
+ "INCLUDE": data["include"],
439
+ }
440
+ )
441
+
442
+ # Save full data to CSV
443
+ full_df = pd.DataFrame(full_rows)
444
+ if not full_df.empty:
445
+ full_df.sort_values("Arena", ascending=False, inplace=True)
446
+ full_df.to_csv(LEADERBOARD_PERSONAL_CSV, index=False, encoding="utf-8")
447
+
448
+ # Generate and save team leaderboard
449
+ team_df = get_team_leaderboard(full_df)
450
+ if not team_df.empty:
451
+ team_df.to_csv(LEADERBOARD_EQUIPOS_CSV, index=False, encoding="utf-8")
452
+
453
+ # Return display dataframe for UI
454
+ display_df = pd.DataFrame(display_rows)
455
+ if not display_df.empty:
456
+ display_df.sort_values("Arena", ascending=False, inplace=True)
457
+
458
+ return display_df
459
+
460
+
461
+ if __name__ == "__main__":
462
+ calculate_scores()