Upload folder using huggingface_hub
Browse files
checkpoint-200/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
|
3 |
size 249323242
|
checkpoint-225/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b35188d753a2de13f875f831c22747d631bd20d4c77f0f268580cbfadfb5acf
|
3 |
size 249323242
|
choice_distribution.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1 |
{
|
2 |
"gpt-4o": 0.0,
|
3 |
-
"DeepSeek-V3-0324": 0.
|
4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
5 |
-
"qwen25-coder-32b-instruct": 0.
|
6 |
-
"gpt-4.1-mini": 0.
|
7 |
}
|
|
|
1 |
{
|
2 |
"gpt-4o": 0.0,
|
3 |
+
"DeepSeek-V3-0324": 0.325,
|
4 |
"Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
|
5 |
+
"qwen25-coder-32b-instruct": 0.55,
|
6 |
+
"gpt-4.1-mini": 0.125
|
7 |
}
|
logs/events.out.tfevents.1754579550.209-20-159-47.43998.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71eb240364939c43e03aeb2ee1e2e35a30862d5f1efe81f4e7685311d8321d43
|
3 |
+
size 7091
|
logs/events.out.tfevents.1754579550.209-20-159-47.43998.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2ddee3604959657ecb44aec848f678c3c0019e16150326096c9f34635fa3d6e
|
3 |
+
size 7091
|
model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 249323242
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
|
3 |
size 249323242
|
per_sample_predictions.csv
CHANGED
@@ -1,81 +1,84 @@
|
|
1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
2 |
-
-0.
|
3 |
-
-0.
|
4 |
-
-0.
|
5 |
-
-0.
|
6 |
-
-0.
|
7 |
-
-0.
|
8 |
-
-0.91015625,-0.
|
9 |
-
-0.
|
10 |
-
-0.
|
11 |
-
-0.
|
12 |
-
-0.
|
13 |
-
-0.
|
14 |
-
-0.
|
15 |
-
-0.
|
16 |
-
-0.
|
17 |
-
-0.
|
18 |
-
-0.
|
19 |
-
-0.
|
20 |
-
-0.
|
21 |
-
-
|
22 |
-
-0.
|
23 |
-
-0.
|
24 |
-
-0.
|
25 |
-
-0.90625,-0.
|
26 |
-
-0.
|
27 |
-
-0.
|
28 |
-
-0.
|
29 |
-
-0.
|
30 |
-
-0.
|
31 |
-
-0.
|
32 |
-
-0.
|
33 |
-
-0.
|
34 |
-
-0.
|
35 |
-
-0.
|
36 |
-
-
|
37 |
-
-0.
|
38 |
-
-0.
|
39 |
-
-0.
|
40 |
-
-0.
|
41 |
-
-0.
|
42 |
-
-0.
|
43 |
-
-0.
|
44 |
-
-
|
45 |
-
-0.
|
46 |
-
-0.
|
47 |
-
-0.
|
48 |
-
-0.
|
49 |
-
-0.
|
50 |
-
-
|
51 |
-
-0.
|
52 |
-
-0.
|
53 |
-
-0.
|
54 |
-
-0.
|
55 |
-
-0.
|
56 |
-
-0.
|
57 |
-
-0.97265625,-0.
|
58 |
-
-
|
59 |
-
-0.
|
60 |
-
-0.
|
61 |
-
-0.
|
62 |
-
-0.
|
63 |
-
-0.
|
64 |
-
-
|
65 |
-
-0.
|
66 |
-
-0.
|
67 |
-
-0.
|
68 |
-
-0.89453125,-0.
|
69 |
-
-0.
|
70 |
-
-0.
|
71 |
-
-0.
|
72 |
-
-0.921875,-0.
|
73 |
-
-0.
|
74 |
-
-0.
|
75 |
-
-0.
|
76 |
-
-0.
|
77 |
-
-0.
|
78 |
-
-0.
|
79 |
-
-0.
|
80 |
-
-0.
|
81 |
-
-0.
|
|
|
|
|
|
|
|
1 |
logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
|
2 |
+
-0.8203125,-0.73046875,-0.734375,-0.6015625,-0.66796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
3 |
+
-0.8671875,-0.7890625,-0.7890625,-0.72265625,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
4 |
+
-0.9375,-0.7734375,-0.83203125,-0.7578125,-0.7890625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
5 |
+
-0.8984375,-0.7734375,-0.81640625,-0.75390625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
6 |
+
-0.8984375,-0.7734375,-0.796875,-0.7109375,-0.78125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
7 |
+
-0.89453125,-0.7734375,-0.84765625,-0.78515625,-0.8125,DeepSeek-V3-0324,0.0,0.0,1.0,0.0,1.0
|
8 |
+
-0.91015625,-0.74609375,-0.8203125,-0.71875,-0.72265625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
9 |
+
-0.90625,-0.80078125,-0.78125,-0.7109375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
10 |
+
-0.8359375,-0.73828125,-0.7421875,-0.5703125,-0.6328125,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
|
11 |
+
-0.859375,-0.71875,-0.78125,-0.70703125,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
12 |
+
-0.953125,-0.78515625,-0.83984375,-0.8203125,-0.796875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
13 |
+
-0.89453125,-0.7421875,-0.8046875,-0.74609375,-0.76171875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
14 |
+
-0.91796875,-0.79296875,-0.8125,-0.75,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
15 |
+
-0.8828125,-0.7578125,-0.76953125,-0.73046875,-0.7421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
16 |
+
-0.953125,-0.7421875,-0.859375,-0.80078125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
17 |
+
-0.91796875,-0.78125,-0.81640625,-0.74609375,-0.83203125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
18 |
+
-0.94140625,-0.796875,-0.796875,-0.80859375,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
19 |
+
-0.90234375,-0.79296875,-0.79296875,-0.78125,-0.79296875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
20 |
+
-0.9375,-0.75,-0.8046875,-0.77734375,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
21 |
+
-0.88671875,-0.72265625,-0.81640625,-0.7578125,-0.76171875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
|
22 |
+
-0.90234375,-0.75390625,-0.84375,-0.734375,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
23 |
+
-0.8515625,-0.66796875,-0.6875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
24 |
+
-0.92578125,-0.7890625,-0.8203125,-0.81640625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
25 |
+
-0.90625,-0.78125,-0.8046875,-0.7578125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
26 |
+
-0.94140625,-0.82421875,-0.88671875,-0.80859375,-0.85546875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
27 |
+
-0.80078125,-0.7109375,-0.67578125,-0.6953125,-0.61328125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
28 |
+
-0.90234375,-0.78515625,-0.84765625,-0.70703125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
29 |
+
-0.94140625,-0.73046875,-0.83984375,-0.76171875,-0.78125,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
30 |
+
-0.90234375,-0.73046875,-0.78515625,-0.73828125,-0.74609375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
31 |
+
-0.83203125,-0.71484375,-0.734375,-0.66796875,-0.69140625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
32 |
+
-0.90234375,-0.7578125,-0.80859375,-0.73046875,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
33 |
+
-0.90234375,-0.80078125,-0.80078125,-0.72265625,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
34 |
+
-0.859375,-0.65234375,-0.78515625,-0.703125,-0.71875,DeepSeek-V3-0324,1.0,0.0,1.0,1.0,1.0
|
35 |
+
-0.875,-0.7578125,-0.796875,-0.7109375,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
36 |
+
-0.9375,-0.8046875,-0.8203125,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
37 |
+
-0.9375,-0.78125,-0.828125,-0.79296875,-0.81640625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
38 |
+
-0.921875,-0.7421875,-0.81640625,-0.72265625,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
39 |
+
-0.9375,-0.75390625,-0.84375,-0.765625,-0.796875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
|
40 |
+
-0.93359375,-0.78125,-0.78125,-0.76171875,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
41 |
+
-0.890625,-0.75390625,-0.78515625,-0.765625,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
42 |
+
-0.91015625,-0.79296875,-0.87109375,-0.7578125,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
43 |
+
-0.921875,-0.75390625,-0.8125,-0.765625,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
44 |
+
-0.94921875,-0.7734375,-0.90234375,-0.7578125,-0.82421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
45 |
+
-0.921875,-0.796875,-0.8125,-0.76953125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
46 |
+
-0.91796875,-0.7890625,-0.85546875,-0.78125,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
47 |
+
-0.90234375,-0.80078125,-0.82421875,-0.7265625,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
48 |
+
-0.921875,-0.75,-0.7890625,-0.7265625,-0.76953125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
49 |
+
-0.91796875,-0.7578125,-0.8203125,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
50 |
+
-0.95703125,-0.76953125,-0.83984375,-0.8203125,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
51 |
+
-0.9453125,-0.76953125,-0.76953125,-0.68359375,-0.70703125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
52 |
+
-0.88671875,-0.7890625,-0.8046875,-0.76953125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
53 |
+
-0.875,-0.68359375,-0.81640625,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
54 |
+
-0.93359375,-0.7890625,-0.7890625,-0.78125,-0.78515625,qwen25-coder-32b-instruct,1.0,1.0,1.0,0.0,1.0
|
55 |
+
-0.82421875,-0.7265625,-0.71875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
|
56 |
+
-0.92578125,-0.75390625,-0.828125,-0.80859375,-0.8046875,DeepSeek-V3-0324,1.0,0.0,0.0,0.0,0.0
|
57 |
+
-0.97265625,-0.734375,-0.8515625,-0.859375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
58 |
+
-0.9453125,-0.765625,-0.86328125,-0.78125,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
59 |
+
-0.8359375,-0.7109375,-0.73046875,-0.70703125,-0.6640625,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
|
60 |
+
-0.9140625,-0.71484375,-0.83984375,-0.68359375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
61 |
+
-0.9296875,-0.7734375,-0.8359375,-0.78125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
62 |
+
-0.96484375,-0.75390625,-0.8203125,-0.76171875,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
|
63 |
+
-0.87109375,-0.71875,-0.83984375,-0.734375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
64 |
+
-0.94140625,-0.765625,-0.859375,-0.7578125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
65 |
+
-0.90234375,-0.74609375,-0.83203125,-0.72265625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
66 |
+
-0.9453125,-0.72265625,-0.83203125,-0.71484375,-0.78125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,0.0
|
67 |
+
-0.86328125,-0.71875,-0.75,-0.6796875,-0.7265625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
68 |
+
-0.89453125,-0.7421875,-0.78515625,-0.7421875,-0.76953125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
69 |
+
-0.90625,-0.73046875,-0.78125,-0.734375,-0.74609375,DeepSeek-V3-0324,1.0,0.0,0.0,1.0,1.0
|
70 |
+
-0.88671875,-0.76953125,-0.82421875,-0.7109375,-0.765625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
71 |
+
-0.8828125,-0.765625,-0.8046875,-0.75,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
72 |
+
-0.921875,-0.7734375,-0.8046875,-0.78515625,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
73 |
+
-0.89453125,-0.7109375,-0.8203125,-0.7265625,-0.71875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
|
74 |
+
-0.91015625,-0.74609375,-0.78125,-0.71484375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
75 |
+
-0.8828125,-0.734375,-0.796875,-0.71875,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
76 |
+
-0.9609375,-0.765625,-0.85546875,-0.84375,-0.8046875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
77 |
+
-0.96875,-0.796875,-0.86328125,-0.79296875,-0.80859375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
|
78 |
+
-0.85546875,-0.70703125,-0.78515625,-0.6875,-0.734375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
79 |
+
-0.95703125,-0.76953125,-0.87109375,-0.71875,-0.828125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
80 |
+
-0.99609375,-0.73046875,-0.8515625,-0.80078125,-0.7734375,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
|
81 |
+
-0.90625,-0.78125,-0.8671875,-0.76171875,-0.85546875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
|
82 |
+
|
83 |
+
predicted_proportions,0.0000,0.3250,0.0000,0.5500,0.1250
|
84 |
+
true_proportions,0.9625,0.0250,0.0125,0.0000,0.0000
|