Upload folder using huggingface_hub
Browse files
checkpoint-200/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
|
3 |
size 249323242
|
checkpoint-225/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85afa3559924d8e598e87b57f11617f919bec98cfca85e983d0b7184ed264e80
|
3 |
size 249323242
|
choice_distribution.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"gpt-4o": 0.0,
|
3 |
-
"DeepSeek-V3-0324": 0.
|
4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
5 |
-
"qwen25-coder-32b-instruct": 0.
|
6 |
-
"gpt-4.1-mini": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
"gpt-4o": 0.0,
|
3 |
+
"DeepSeek-V3-0324": 0.05,
|
4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
5 |
+
"qwen25-coder-32b-instruct": 0.15,
|
6 |
+
"gpt-4.1-mini": 0.8
|
7 |
}
|
logs/events.out.tfevents.1754579928.209-20-159-47.44655.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ac1ad2e67b689a5d930e272625f2c782899025a1fa6640256a8a435b8572e2e9
|
3 |
+
size 7091
|
logs/events.out.tfevents.1754579928.209-20-159-47.44655.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7557e3aefe35343339020f4d613dced2aec601871122b7ec4b8de505093bf924
|
3 |
+
size 7091
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
|
3 |
size 249323242
|
per_sample_predictions.csv
CHANGED
@@ -1,83 +1,84 @@
|
|
1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
2 |
-
-0.
|
3 |
-
-0.
|
4 |
-
-0.
|
5 |
-
-0.
|
6 |
-
-0.
|
7 |
-
-0.
|
8 |
-
-0.
|
9 |
-
-0.
|
10 |
-
-0.
|
11 |
-
-0.
|
12 |
-
-
|
13 |
-
-0.
|
14 |
-
-0.9609375,-0.
|
15 |
-
-0.
|
16 |
-
-1.0078125,-0.
|
17 |
-
-0.
|
18 |
-
-0
|
19 |
-
-0.
|
20 |
-
-0
|
21 |
-
-
|
22 |
-
-0.
|
23 |
-
-0.
|
24 |
-
-0
|
25 |
-
-0.
|
26 |
-
-0.
|
27 |
-
-0.
|
28 |
-
-0.
|
29 |
-
-0.
|
30 |
-
-
|
31 |
-
-0.
|
32 |
-
-0.
|
33 |
-
-0.
|
34 |
-
-0.
|
35 |
-
-0.
|
36 |
-
-
|
37 |
-
-0.
|
38 |
-
-0.
|
39 |
-
-0.
|
40 |
-
-0.
|
41 |
-
-0.
|
42 |
-
-0.
|
43 |
-
-0
|
44 |
-
-
|
45 |
-
-0.
|
46 |
-
-
|
47 |
-
-
|
48 |
-
-0.
|
49 |
-
-0.
|
50 |
-
-1.
|
51 |
-
-0.
|
52 |
-
-0.
|
53 |
-
-0.
|
54 |
-
-0.
|
55 |
-
-0.
|
56 |
-
-0.984375,-0.
|
57 |
-
-0.
|
58 |
-
-
|
59 |
-
-0.
|
60 |
-
-0.
|
61 |
-
-
|
62 |
-
-0.
|
63 |
-
-
|
64 |
-
-0.
|
65 |
-
-0.
|
66 |
-
-0.
|
67 |
-
-0.
|
68 |
-
-0.
|
69 |
-
-0.
|
70 |
-
-0.
|
71 |
-
-0.
|
72 |
-
-0.
|
73 |
-
-0.
|
74 |
-
-0.96875,-0.
|
75 |
-
-0.
|
76 |
-
-0.
|
77 |
-
-0
|
78 |
-
-0.
|
79 |
-
-0.
|
80 |
-
-
|
81 |
-
-0.
|
82 |
|
83 |
-
predicted_proportions,0.0000,0.
|
|
|
|
1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
2 |
+
-0.84765625,-0.69140625,-0.76171875,-0.7265625,-0.68359375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
3 |
+
-0.9296875,-0.8125,-0.84375,-0.80078125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
4 |
+
-0.9609375,-0.8515625,-0.8671875,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
5 |
+
-0.953125,-0.8046875,-0.83984375,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
6 |
+
-0.953125,-0.8125,-0.8359375,-0.8046875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
7 |
+
-0.95703125,-0.8046875,-0.83984375,-0.8125,-0.78515625,gpt-4.1-mini,0.0,0.0,1.0,0.0,1.0
|
8 |
+
-0.88671875,-0.7578125,-0.83984375,-0.77734375,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
9 |
+
-0.9453125,-0.796875,-0.828125,-0.7890625,-0.7578125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
10 |
+
-0.8671875,-0.78515625,-0.72265625,-0.69140625,-0.75,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
|
11 |
+
-0.93359375,-0.79296875,-0.80859375,-0.8046875,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
12 |
+
-0.97265625,-0.85546875,-0.89453125,-0.82421875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
13 |
+
-0.94140625,-0.80078125,-0.8203125,-0.80078125,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
14 |
+
-0.9609375,-0.8515625,-0.84765625,-0.81640625,-0.8125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
15 |
+
-0.96875,-0.7421875,-0.8671875,-0.78515625,-0.69921875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
16 |
+
-1.0078125,-0.890625,-0.88671875,-0.8359375,-0.8203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
17 |
+
-0.94921875,-0.80078125,-0.8125,-0.7890625,-0.75,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
18 |
+
-1.0,-0.8203125,-0.91015625,-0.80078125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
19 |
+
-0.91796875,-0.8125,-0.85546875,-0.78515625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
20 |
+
-1.0,-0.90625,-0.88671875,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
21 |
+
-0.96875,-0.90625,-0.890625,-0.8125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
22 |
+
-0.9296875,-0.83203125,-0.82421875,-0.78515625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
23 |
+
-0.90625,-0.73046875,-0.81640625,-0.8046875,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
24 |
+
-1.0,-0.875,-0.95703125,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
25 |
+
-0.9609375,-0.859375,-0.84765625,-0.8125,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
26 |
+
-0.9609375,-0.86328125,-0.86328125,-0.828125,-0.828125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
27 |
+
-0.85546875,-0.71875,-0.81640625,-0.76171875,-0.62109375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
28 |
+
-0.96484375,-0.8125,-0.84765625,-0.7890625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
29 |
+
-0.98046875,-0.83984375,-0.87109375,-0.828125,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
30 |
+
-0.984375,-0.91015625,-0.88671875,-0.78125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
31 |
+
-0.9296875,-0.74609375,-0.7890625,-0.75,-0.703125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
32 |
+
-0.91796875,-0.7890625,-0.8125,-0.8046875,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
33 |
+
-0.92578125,-0.8046875,-0.84375,-0.796875,-0.78125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
34 |
+
-0.97265625,-0.76953125,-0.85546875,-0.7734375,-0.73046875,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
|
35 |
+
-0.9609375,-0.78515625,-0.8515625,-0.78515625,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
36 |
+
-0.98046875,-0.87109375,-0.921875,-0.8125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
37 |
+
-0.953125,-0.84375,-0.8359375,-0.8125,-0.7890625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
38 |
+
-0.9609375,-0.81640625,-0.85546875,-0.83203125,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
39 |
+
-0.96484375,-0.84765625,-0.84375,-0.80859375,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
|
40 |
+
-0.95703125,-0.81640625,-0.84375,-0.80859375,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
41 |
+
-0.98046875,-0.8125,-0.87109375,-0.81640625,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
42 |
+
-0.921875,-0.8359375,-0.828125,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
43 |
+
-1.0,-0.83984375,-0.83984375,-0.79296875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
44 |
+
-0.97265625,-0.890625,-0.875,-0.80859375,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
45 |
+
-0.984375,-0.875,-0.8515625,-0.8203125,-0.82421875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
46 |
+
-0.97265625,-0.85546875,-0.859375,-0.828125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
47 |
+
-0.9296875,-0.79296875,-0.79296875,-0.7734375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
48 |
+
-0.94140625,-0.78515625,-0.80859375,-0.78125,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
49 |
+
-0.97265625,-0.90234375,-0.91015625,-0.8125,-0.8046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
50 |
+
-1.0078125,-0.91015625,-0.94140625,-0.82421875,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
51 |
+
-0.92578125,-0.82421875,-0.828125,-0.81640625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
52 |
+
-0.95703125,-0.765625,-0.85546875,-0.8046875,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
53 |
+
-0.99609375,-0.77734375,-0.87890625,-0.765625,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
54 |
+
-0.96875,-0.84375,-0.8203125,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,0.0,1.0
|
55 |
+
-0.85546875,-0.6640625,-0.7109375,-0.7421875,-0.63671875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
56 |
+
-0.984375,-0.87890625,-0.8828125,-0.8125,-0.796875,gpt-4.1-mini,1.0,0.0,0.0,0.0,0.0
|
57 |
+
-0.984375,-0.8515625,-0.87890625,-0.84375,-0.828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
58 |
+
-0.98046875,-0.90625,-0.85546875,-0.79296875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
59 |
+
-0.9609375,-0.79296875,-0.84765625,-0.765625,-0.703125,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
|
60 |
+
-0.9296875,-0.78125,-0.8515625,-0.8125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
61 |
+
-0.9609375,-0.89453125,-0.87109375,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
62 |
+
-0.9765625,-0.84765625,-0.890625,-0.8203125,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
63 |
+
-0.98828125,-0.90234375,-0.87890625,-0.81640625,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
64 |
+
-0.98828125,-0.890625,-0.86328125,-0.8359375,-0.82421875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
65 |
+
-0.9453125,-0.82421875,-0.828125,-0.828125,-0.796875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
66 |
+
-0.96484375,-0.796875,-0.828125,-0.8203125,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
|
67 |
+
-0.91796875,-0.734375,-0.796875,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
68 |
+
-0.9296875,-0.76171875,-0.83203125,-0.79296875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
69 |
+
-0.96875,-0.82421875,-0.8671875,-0.76953125,-0.75390625,gpt-4.1-mini,1.0,0.0,0.0,1.0,1.0
|
70 |
+
-0.87109375,-0.71484375,-0.79296875,-0.78125,-0.72265625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
71 |
+
-0.9609375,-0.90234375,-0.87109375,-0.8046875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
72 |
+
-0.953125,-0.8359375,-0.859375,-0.80078125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
73 |
+
-0.96875,-0.8046875,-0.84765625,-0.78515625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
74 |
+
-0.96875,-0.82421875,-0.8515625,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
75 |
+
-0.94921875,-0.8515625,-0.88671875,-0.859375,-0.83203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
76 |
+
-0.9765625,-0.84375,-0.90625,-0.796875,-0.79296875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
77 |
+
-1.0,-0.828125,-0.87109375,-0.82421875,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
78 |
+
-0.921875,-0.828125,-0.84375,-0.796875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
79 |
+
-0.97265625,-0.83984375,-0.85546875,-0.83203125,-0.80859375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
80 |
+
-1.015625,-0.8828125,-0.94140625,-0.859375,-0.828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
81 |
+
-0.94140625,-0.8203125,-0.8515625,-0.82421875,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
82 |
|
83 |
+
predicted_proportions,0.0000,0.0500,0.0000,0.1500,0.8000
|
84 |
+
true_proportions,0.0848,0.0806,0.0713,0.0723,0.0785
|