Sampler-Arena / elo.py
rwitz's picture
Update elo.py
b74ba04 verified
raw
history blame
2.13 kB
def update_elo_ratings(ratings_dataset, winner, loser):
# Convert the Hugging Face dataset to a pandas DataFrame
ratings_df = pd.DataFrame(ratings_dataset)
# Check and add new players if they don't exist in the dataset
for player in [winner, loser]:
if player not in ratings_df['bot_name'].values:
new_player = {'bot_name': player, 'elo_rating': 1200, 'games_played': 0}
ratings_df = pd.concat([ratings_df, pd.DataFrame([new_player])], ignore_index=True)
# Function to determine the K-factor based on games played
def determine_k_factor(games_played):
# Define K-factor based on number of games played. Adjust these thresholds as needed.
if games_played < 30:
return 40
elif games_played < 100:
return 20
else:
return 10
# Update games played
ratings_df.loc[ratings_df['bot_name'] == winner, 'games_played'] += 1
ratings_df.loc[ratings_df['bot_name'] == loser, 'games_played'] += 1
# Extract old ratings and games played
winner_old_rating = ratings_df.loc[ratings_df['bot_name'] == winner, 'elo_rating'].iloc[0]
loser_old_rating = ratings_df.loc[ratings_df['bot_name'] == loser, 'elo_rating'].iloc[0]
winner_games_played = ratings_df.loc[ratings_df['bot_name'] == winner, 'games_played'].iloc[0]
loser_games_played = ratings_df.loc[ratings_df['bot_name'] == loser, 'games_played'].iloc[0]
# Determine K-factors
winner_k_factor = determine_k_factor(winner_games_played)
loser_k_factor = determine_k_factor(loser_games_played)
# Calculate new ratings
winner_new_rating, loser_new_rating = elo(winner_old_rating, loser_old_rating, k_factor_winner=winner_k_factor, k_factor_loser=loser_k_factor)
# Update the DataFrame
ratings_df.loc[ratings_df['bot_name'] == winner, 'elo_rating'] = winner_new_rating
ratings_df.loc[ratings_df['bot_name'] == loser, 'elo_rating'] = loser_new_rating
# Convert the DataFrame back to a Hugging Face dataset
updated_ratings_dataset = Dataset.from_pandas(ratings_df)
return updated_ratings_dataset