Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
44c2a20
1
Parent(s):
ce4dda5
ready hyperlinks for leaderboard
Browse files- utils/arena_df_leaderboard.csv +9 -0
- utils/leaderboard.py +31 -5
- utils/models.py +2 -2
utils/arena_df_leaderboard.csv
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,wins,losses,ties
|
2 |
+
Model Alpha,0,0,0
|
3 |
+
Model Beta,0,0,0
|
4 |
+
Model Delta (Refusal Specialist),0,0,0
|
5 |
+
Model Gamma,0,0,0
|
6 |
+
Qwen2.5-1.5b-Instruct,1,1,0
|
7 |
+
Llama-3.2-1b-Instruct,0,1,0
|
8 |
+
Qwen2.5-3b-Instruct,1,0,0
|
9 |
+
Llama-3.2-3b-Instruct,0,0,0
|
utils/leaderboard.py
CHANGED
@@ -2,6 +2,7 @@ import os
|
|
2 |
import pandas as pd
|
3 |
import math
|
4 |
from datetime import datetime
|
|
|
5 |
|
6 |
# Default K-factor (determines how much a single match affects ratings)
|
7 |
DEFAULT_K_FACTOR = 32
|
@@ -9,12 +10,37 @@ DEFAULT_K_FACTOR = 32
|
|
9 |
# Default starting Elo
|
10 |
DEFAULT_ELO = 1500
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
# Mapping of model names to their Hugging Face URLs
|
13 |
-
model_to_hf = {
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
}
|
|
|
|
|
18 |
|
19 |
def calculate_elo_changes(winner_rating, loser_rating, k_factor=DEFAULT_K_FACTOR, draw=False):
|
20 |
"""
|
|
|
2 |
import pandas as pd
|
3 |
import math
|
4 |
from datetime import datetime
|
5 |
+
from .models import models
|
6 |
|
7 |
# Default K-factor (determines how much a single match affects ratings)
|
8 |
DEFAULT_K_FACTOR = 32
|
|
|
10 |
# Default starting Elo
|
11 |
DEFAULT_ELO = 1500
|
12 |
|
13 |
+
def prepare_url(model_dict: dict):
|
14 |
+
"""
|
15 |
+
Prepare the URL for the model based on its name.
|
16 |
+
|
17 |
+
Parameters:
|
18 |
+
- model_dict: Dictionary containing model information
|
19 |
+
|
20 |
+
Returns:
|
21 |
+
- URL string for the model
|
22 |
+
"""
|
23 |
+
url_dict = {}
|
24 |
+
# Extract the model name from the dictionary
|
25 |
+
model_names = model_dict.keys()
|
26 |
+
for name in model_names:
|
27 |
+
half_url = model_dict[name]
|
28 |
+
|
29 |
+
# Construct the URL using the model name
|
30 |
+
url = f"https://huggingface.co/{half_url}"
|
31 |
+
url_dict[name] = url
|
32 |
+
|
33 |
+
return url_dict
|
34 |
+
|
35 |
+
|
36 |
# Mapping of model names to their Hugging Face URLs
|
37 |
+
# model_to_hf = {
|
38 |
+
# "Qwen2.5-1.5b-Instruct": "https://huggingface.co/qwen/qwen2.5-1.5b-instruct",
|
39 |
+
# "Qwen2.5-3b-Instruct": "https://huggingface.co/qwen/qwen2.5-3b-instruct",
|
40 |
+
# # Add more models and their HF links here
|
41 |
+
# }
|
42 |
+
|
43 |
+
model_to_hf = prepare_url(models)
|
44 |
|
45 |
def calculate_elo_changes(winner_rating, loser_rating, k_factor=DEFAULT_K_FACTOR, draw=False):
|
46 |
"""
|
utils/models.py
CHANGED
@@ -13,8 +13,8 @@ from .prompts import format_rag_prompt
|
|
13 |
|
14 |
models = {
|
15 |
"Qwen2.5-1.5b-Instruct": "qwen/qwen2.5-1.5b-instruct",
|
16 |
-
"Qwen2.5-3b-Instruct": "qwen/qwen2.5-3b-instruct", # remove gated for now
|
17 |
-
"Llama-3.2-3b-Instruct": "meta-llama/llama-3.2-3b-instruct",
|
18 |
"Llama-3.2-1b-Instruct": "meta-llama/llama-3.2-1b-instruct",
|
19 |
"Gemma-3-1b-it" : "google/gemma-3-1b-it",
|
20 |
#"Bitnet-b1.58-2B-4T": "microsoft/bitnet-b1.58-2B-4T",
|
|
|
13 |
|
14 |
models = {
|
15 |
"Qwen2.5-1.5b-Instruct": "qwen/qwen2.5-1.5b-instruct",
|
16 |
+
#"Qwen2.5-3b-Instruct": "qwen/qwen2.5-3b-instruct", # remove gated for now
|
17 |
+
#"Llama-3.2-3b-Instruct": "meta-llama/llama-3.2-3b-instruct",
|
18 |
"Llama-3.2-1b-Instruct": "meta-llama/llama-3.2-1b-instruct",
|
19 |
"Gemma-3-1b-it" : "google/gemma-3-1b-it",
|
20 |
#"Bitnet-b1.58-2B-4T": "microsoft/bitnet-b1.58-2B-4T",
|