mariagrandury commited on
Commit
1534ae5
·
1 Parent(s): dff615a

add hf to discord mapping

Browse files
Files changed (1) hide show
  1. app.py +47 -16
app.py CHANGED
@@ -86,7 +86,8 @@ def get_blend_es_data():
86
  user_mapping[user_id] = f"User-{user_id[:8]}"
87
 
88
  for user_id, count in dataset_contributions.items():
89
- username = user_mapping.get(user_id, f"User-{user_id[:8]}")
 
90
  data.append(
91
  {"source": "blend-es", "username": username, "count": count}
92
  )
@@ -126,26 +127,63 @@ def get_include_data():
126
  return data
127
 
128
 
129
- def get_mail_to_username_mapping():
130
- mail_to_discord = {}
 
 
 
 
 
 
131
  try:
132
  if os.path.exists(PARTICIPANTS_CSV):
133
  mapping_df = pd.read_csv(PARTICIPANTS_CSV)
 
 
134
  if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
135
  for _, row in mapping_df.iterrows():
136
  mail = row["gmail"]
137
  discord = row["discord"]
138
- if pd.notna(mail) and pd.notna(discord):
139
- mail_to_discord[mail.lower()] = discord.lower()
 
 
 
 
 
 
 
 
 
140
  except Exception as e:
141
  print(f"Error loading {PARTICIPANTS_CSV}: {e}")
142
 
143
- return mail_to_discord
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
 
146
  def get_estereotipos_data():
147
  data = []
148
- mail_to_discord = get_mail_to_username_mapping()
149
 
150
  try:
151
  if os.path.exists(STEREOTYPES_CSV):
@@ -159,10 +197,7 @@ def get_estereotipos_data():
159
  mail_counts[mail.lower()] += int(count)
160
 
161
  for mail, count in mail_counts.items():
162
- username = mail_to_discord.get(mail.lower(), "")
163
- if not username:
164
- username = mail.split("@")[0] if "@" in mail else mail
165
-
166
  data.append(
167
  {"source": "estereotipos", "username": username, "count": count}
168
  )
@@ -174,7 +209,6 @@ def get_estereotipos_data():
174
 
175
  def get_arena_data():
176
  data = []
177
- mail_to_discord = get_mail_to_username_mapping()
178
 
179
  try:
180
  if os.path.exists(ARENA_JSON):
@@ -193,10 +227,7 @@ def get_arena_data():
193
  mail_counts[mail.lower()] += 1
194
 
195
  for mail, count in mail_counts.items():
196
- username = mail_to_discord.get(mail.lower(), "")
197
- if not username:
198
- username = mail.split("@")[0] if "@" in mail else mail
199
-
200
  data.append({"source": "arena", "username": username, "count": count})
201
  except Exception as e:
202
  print(f"Error loading {ARENA_JSON}: {e}")
 
86
  user_mapping[user_id] = f"User-{user_id[:8]}"
87
 
88
  for user_id, count in dataset_contributions.items():
89
+ hf_username = user_mapping.get(user_id, f"User-{user_id[:8]}")
90
+ username = get_discord_username(hf_username)
91
  data.append(
92
  {"source": "blend-es", "username": username, "count": count}
93
  )
 
127
  return data
128
 
129
 
130
+ def get_user_mapping():
131
+ """
132
+ Get cached mapping of emails and hf_usernames to discord usernames.
133
+ Returns a tuple of (email_to_discord, hf_username_to_discord) mappings.
134
+ """
135
+ email_to_discord = {}
136
+ hf_username_to_discord = {}
137
+
138
  try:
139
  if os.path.exists(PARTICIPANTS_CSV):
140
  mapping_df = pd.read_csv(PARTICIPANTS_CSV)
141
+
142
+ # Map emails to discord usernames
143
  if "gmail" in mapping_df.columns and "discord" in mapping_df.columns:
144
  for _, row in mapping_df.iterrows():
145
  mail = row["gmail"]
146
  discord = row["discord"]
147
+ if pd.notna(mail) and pd.notna(discord) and discord != "NA":
148
+ email_to_discord[mail.lower()] = discord.lower()
149
+
150
+ # Map hf_usernames to discord usernames
151
+ if "hf_username" in mapping_df.columns and "discord" in mapping_df.columns:
152
+ for _, row in mapping_df.iterrows():
153
+ hf_username = row["hf_username"]
154
+ discord = row["discord"]
155
+ if pd.notna(hf_username) and pd.notna(discord) and discord != "NA":
156
+ hf_username_to_discord[hf_username.lower()] = discord.lower()
157
+
158
  except Exception as e:
159
  print(f"Error loading {PARTICIPANTS_CSV}: {e}")
160
 
161
+ return email_to_discord, hf_username_to_discord
162
+
163
+
164
+ def get_discord_username(identifier):
165
+ """
166
+ Get discord username from either email or hf_username. Returns the discord username if found, otherwise returns the identifier.
167
+ """
168
+ email_to_discord, hf_username_to_discord = get_user_mapping()
169
+
170
+ # Try to find discord username by email first
171
+ if "@" in identifier:
172
+ discord_username = email_to_discord.get(identifier.lower())
173
+ if discord_username:
174
+ return discord_username
175
+
176
+ # Try to find discord username by hf_username
177
+ discord_username = hf_username_to_discord.get(identifier.lower())
178
+ if discord_username:
179
+ return discord_username
180
+
181
+ # Fallback: use identifier as username
182
+ return identifier.split("@")[0] if "@" in identifier else identifier
183
 
184
 
185
  def get_estereotipos_data():
186
  data = []
 
187
 
188
  try:
189
  if os.path.exists(STEREOTYPES_CSV):
 
197
  mail_counts[mail.lower()] += int(count)
198
 
199
  for mail, count in mail_counts.items():
200
+ username = get_discord_username(mail)
 
 
 
201
  data.append(
202
  {"source": "estereotipos", "username": username, "count": count}
203
  )
 
209
 
210
  def get_arena_data():
211
  data = []
 
212
 
213
  try:
214
  if os.path.exists(ARENA_JSON):
 
227
  mail_counts[mail.lower()] += 1
228
 
229
  for mail, count in mail_counts.items():
230
+ username = get_discord_username(mail)
 
 
 
231
  data.append({"source": "arena", "username": username, "count": count})
232
  except Exception as e:
233
  print(f"Error loading {ARENA_JSON}: {e}")