wlg1 commited on
Commit
f577cfa
·
verified ·
1 Parent(s): fa768db

Upload folder using huggingface_hub

Browse files
checkpoint-200/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71904c7fca051d32bef8fe85431df4407a1c679d1d572f4a930d636405d67b80
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
3
  size 249323242
checkpoint-225/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:35bfcb8b439e96197ce2f220957adfda5afbb5dbf814d8915b771748b8c3c207
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b35188d753a2de13f875f831c22747d631bd20d4c77f0f268580cbfadfb5acf
3
  size 249323242
choice_distribution.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "gpt-4o": 0.0,
3
- "DeepSeek-V3-0324": 0.15,
4
  "Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
5
- "qwen25-coder-32b-instruct": 0.0625,
6
- "gpt-4.1-mini": 0.7875
7
  }
 
1
  {
2
  "gpt-4o": 0.0,
3
+ "DeepSeek-V3-0324": 0.325,
4
  "Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
5
+ "qwen25-coder-32b-instruct": 0.55,
6
+ "gpt-4.1-mini": 0.125
7
  }
logs/events.out.tfevents.1754579550.209-20-159-47.43998.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71eb240364939c43e03aeb2ee1e2e35a30862d5f1efe81f4e7685311d8321d43
3
+ size 7091
logs/events.out.tfevents.1754579550.209-20-159-47.43998.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2ddee3604959657ecb44aec848f678c3c0019e16150326096c9f34635fa3d6e
3
+ size 7091
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71904c7fca051d32bef8fe85431df4407a1c679d1d572f4a930d636405d67b80
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7c9f6209c224a23b708f315f04453b7cc4803a7764fe291f14e159899ba995f
3
  size 249323242
per_sample_predictions.csv CHANGED
@@ -1,81 +1,84 @@
1
  logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
2
- -0.828125,-0.7109375,-0.7578125,-0.66015625,-0.60546875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
3
- -0.95703125,-0.7890625,-0.890625,-0.80859375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
4
- -0.96875,-0.78515625,-0.8828125,-0.86328125,-0.7890625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
5
- -0.890625,-0.78515625,-0.8203125,-0.78515625,-0.71875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
6
- -0.93359375,-0.78515625,-0.8828125,-0.796875,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
7
- -0.9609375,-0.8125,-0.85546875,-0.81640625,-0.77734375,gpt-4.1-mini,0.0,0.0,1.0,0.0,1.0
8
- -0.91015625,-0.80078125,-0.859375,-0.765625,-0.7109375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
9
- -0.94921875,-0.8046875,-0.87890625,-0.796875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
10
- -0.84375,-0.71484375,-0.76953125,-0.6328125,-0.64453125,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
11
- -0.92578125,-0.76171875,-0.83984375,-0.7734375,-0.76953125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
12
- -0.98046875,-0.85546875,-0.8671875,-0.81640625,-0.8046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
13
- -0.890625,-0.8046875,-0.875,-0.79296875,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
14
- -0.9296875,-0.80078125,-0.87109375,-0.8125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
15
- -0.9140625,-0.75390625,-0.83984375,-0.765625,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
16
- -0.984375,-0.828125,-0.9375,-0.828125,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
17
- -0.875,-0.74609375,-0.83203125,-0.76953125,-0.73046875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
18
- -0.95703125,-0.8046875,-0.828125,-0.796875,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
19
- -0.97265625,-0.83984375,-0.859375,-0.7890625,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
20
- -0.96875,-0.84375,-0.859375,-0.8046875,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
21
- -1.0,-0.82421875,-0.82421875,-0.83203125,-0.796875,gpt-4.1-mini,1.0,1.0,0.0,1.0,1.0
22
- -0.98828125,-0.84375,-0.86328125,-0.84375,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
23
- -0.92578125,-0.765625,-0.80859375,-0.78125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
24
- -0.9765625,-0.8046875,-0.81640625,-0.8203125,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
25
- -0.90625,-0.80078125,-0.85546875,-0.8359375,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
26
- -0.9375,-0.84765625,-0.84765625,-0.828125,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
27
- -0.8671875,-0.73046875,-0.78125,-0.65625,-0.64453125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
28
- -0.953125,-0.83203125,-0.89453125,-0.828125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
29
- -0.90234375,-0.82421875,-0.828125,-0.79296875,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
30
- -0.9609375,-0.79296875,-0.80078125,-0.84765625,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
31
- -0.921875,-0.78125,-0.80859375,-0.796875,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
32
- -0.8984375,-0.8125,-0.84375,-0.81640625,-0.75,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
33
- -0.890625,-0.80078125,-0.83203125,-0.8046875,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
34
- -0.8671875,-0.7265625,-0.83984375,-0.7578125,-0.69921875,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
35
- -0.96875,-0.81640625,-0.85546875,-0.8359375,-0.7890625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
36
- -1.0,-0.828125,-0.83203125,-0.80859375,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
37
- -0.94140625,-0.81640625,-0.85546875,-0.77734375,-0.73828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
38
- -0.94921875,-0.81640625,-0.86328125,-0.80078125,-0.734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
39
- -0.91015625,-0.8203125,-0.8671875,-0.84765625,-0.73828125,gpt-4.1-mini,1.0,1.0,0.0,1.0,1.0
40
- -0.921875,-0.7890625,-0.859375,-0.78515625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
41
- -0.94140625,-0.78515625,-0.8515625,-0.77734375,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
42
- -0.94140625,-0.8203125,-0.87109375,-0.83203125,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
43
- -0.9296875,-0.78125,-0.8671875,-0.78125,-0.78125,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
44
- -1.0234375,-0.828125,-0.9140625,-0.84765625,-0.76953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
45
- -0.9375,-0.8046875,-0.84765625,-0.81640625,-0.82421875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
46
- -0.97265625,-0.8515625,-0.8515625,-0.83984375,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
47
- -0.94140625,-0.82421875,-0.890625,-0.8359375,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
48
- -0.93359375,-0.78515625,-0.8515625,-0.77734375,-0.76171875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
49
- -0.98046875,-0.8125,-0.91015625,-0.79296875,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
50
- -1.0,-0.84765625,-0.8125,-0.828125,-0.7734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
51
- -0.97265625,-0.76171875,-0.85546875,-0.76171875,-0.76171875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
52
- -0.91015625,-0.8046875,-0.81640625,-0.80078125,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
53
- -0.890625,-0.7578125,-0.8203125,-0.7890625,-0.7109375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
54
- -0.984375,-0.81640625,-0.9296875,-0.8515625,-0.82421875,DeepSeek-V3-0324,1.0,1.0,1.0,0.0,1.0
55
- -0.79296875,-0.6640625,-0.74609375,-0.65625,-0.58984375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
56
- -0.97265625,-0.7890625,-0.8828125,-0.78515625,-0.80078125,qwen25-coder-32b-instruct,1.0,0.0,0.0,0.0,0.0
57
- -0.97265625,-0.83984375,-0.8671875,-0.80078125,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
58
- -1.015625,-0.82421875,-0.84375,-0.83984375,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
59
- -0.87890625,-0.734375,-0.7578125,-0.73046875,-0.6875,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
60
- -0.921875,-0.79296875,-0.859375,-0.78515625,-0.7265625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
61
- -0.98046875,-0.82421875,-0.83984375,-0.82421875,-0.82421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
62
- -0.99609375,-0.83984375,-0.859375,-0.80078125,-0.7421875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
63
- -0.93359375,-0.80859375,-0.890625,-0.81640625,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
64
- -1.0078125,-0.83984375,-0.875,-0.8359375,-0.80078125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
65
- -0.94140625,-0.77734375,-0.875,-0.7890625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
66
- -0.90625,-0.78125,-0.84765625,-0.7734375,-0.7265625,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
67
- -0.87109375,-0.75390625,-0.859375,-0.73828125,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
68
- -0.89453125,-0.78125,-0.7734375,-0.7890625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
69
- -0.9296875,-0.76171875,-0.8125,-0.76953125,-0.76171875,DeepSeek-V3-0324,1.0,0.0,0.0,1.0,1.0
70
- -0.85546875,-0.765625,-0.83984375,-0.76171875,-0.6953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
71
- -0.96484375,-0.828125,-0.81640625,-0.80078125,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
72
- -0.921875,-0.828125,-0.85546875,-0.8359375,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
73
- -0.94921875,-0.76953125,-0.859375,-0.8125,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
74
- -0.94140625,-0.7890625,-0.8359375,-0.8125,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
75
- -0.9296875,-0.7890625,-0.85546875,-0.7734375,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
76
- -0.95703125,-0.7890625,-0.82421875,-0.78515625,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
77
- -0.9453125,-0.796875,-0.83203125,-0.796875,-0.76171875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
78
- -0.91796875,-0.79296875,-0.84375,-0.81640625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
79
- -0.97265625,-0.796875,-0.8984375,-0.84375,-0.80859375,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
80
- -0.94921875,-0.82421875,-0.8515625,-0.8203125,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
81
- -0.91015625,-0.8125,-0.86328125,-0.82421875,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
 
 
 
 
1
  logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
2
+ -0.8203125,-0.73046875,-0.734375,-0.6015625,-0.66796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
3
+ -0.8671875,-0.7890625,-0.7890625,-0.72265625,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
4
+ -0.9375,-0.7734375,-0.83203125,-0.7578125,-0.7890625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
5
+ -0.8984375,-0.7734375,-0.81640625,-0.75390625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
6
+ -0.8984375,-0.7734375,-0.796875,-0.7109375,-0.78125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
7
+ -0.89453125,-0.7734375,-0.84765625,-0.78515625,-0.8125,DeepSeek-V3-0324,0.0,0.0,1.0,0.0,1.0
8
+ -0.91015625,-0.74609375,-0.8203125,-0.71875,-0.72265625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
9
+ -0.90625,-0.80078125,-0.78125,-0.7109375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
10
+ -0.8359375,-0.73828125,-0.7421875,-0.5703125,-0.6328125,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
11
+ -0.859375,-0.71875,-0.78125,-0.70703125,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
12
+ -0.953125,-0.78515625,-0.83984375,-0.8203125,-0.796875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
13
+ -0.89453125,-0.7421875,-0.8046875,-0.74609375,-0.76171875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
14
+ -0.91796875,-0.79296875,-0.8125,-0.75,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
15
+ -0.8828125,-0.7578125,-0.76953125,-0.73046875,-0.7421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
16
+ -0.953125,-0.7421875,-0.859375,-0.80078125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
17
+ -0.91796875,-0.78125,-0.81640625,-0.74609375,-0.83203125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
18
+ -0.94140625,-0.796875,-0.796875,-0.80859375,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
19
+ -0.90234375,-0.79296875,-0.79296875,-0.78125,-0.79296875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
20
+ -0.9375,-0.75,-0.8046875,-0.77734375,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
21
+ -0.88671875,-0.72265625,-0.81640625,-0.7578125,-0.76171875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
22
+ -0.90234375,-0.75390625,-0.84375,-0.734375,-0.8125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
23
+ -0.8515625,-0.66796875,-0.6875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
24
+ -0.92578125,-0.7890625,-0.8203125,-0.81640625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
25
+ -0.90625,-0.78125,-0.8046875,-0.7578125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
26
+ -0.94140625,-0.82421875,-0.88671875,-0.80859375,-0.85546875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
27
+ -0.80078125,-0.7109375,-0.67578125,-0.6953125,-0.61328125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
28
+ -0.90234375,-0.78515625,-0.84765625,-0.70703125,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
29
+ -0.94140625,-0.73046875,-0.83984375,-0.76171875,-0.78125,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
30
+ -0.90234375,-0.73046875,-0.78515625,-0.73828125,-0.74609375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
31
+ -0.83203125,-0.71484375,-0.734375,-0.66796875,-0.69140625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
32
+ -0.90234375,-0.7578125,-0.80859375,-0.73046875,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
33
+ -0.90234375,-0.80078125,-0.80078125,-0.72265625,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
34
+ -0.859375,-0.65234375,-0.78515625,-0.703125,-0.71875,DeepSeek-V3-0324,1.0,0.0,1.0,1.0,1.0
35
+ -0.875,-0.7578125,-0.796875,-0.7109375,-0.79296875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
36
+ -0.9375,-0.8046875,-0.8203125,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
37
+ -0.9375,-0.78125,-0.828125,-0.79296875,-0.81640625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
38
+ -0.921875,-0.7421875,-0.81640625,-0.72265625,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
39
+ -0.9375,-0.75390625,-0.84375,-0.765625,-0.796875,DeepSeek-V3-0324,1.0,1.0,0.0,1.0,1.0
40
+ -0.93359375,-0.78125,-0.78125,-0.76171875,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
41
+ -0.890625,-0.75390625,-0.78515625,-0.765625,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
42
+ -0.91015625,-0.79296875,-0.87109375,-0.7578125,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
43
+ -0.921875,-0.75390625,-0.8125,-0.765625,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
44
+ -0.94921875,-0.7734375,-0.90234375,-0.7578125,-0.82421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
45
+ -0.921875,-0.796875,-0.8125,-0.76953125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
46
+ -0.91796875,-0.7890625,-0.85546875,-0.78125,-0.7890625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
47
+ -0.90234375,-0.80078125,-0.82421875,-0.7265625,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
48
+ -0.921875,-0.75,-0.7890625,-0.7265625,-0.76953125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
49
+ -0.91796875,-0.7578125,-0.8203125,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
50
+ -0.95703125,-0.76953125,-0.83984375,-0.8203125,-0.78515625,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
51
+ -0.9453125,-0.76953125,-0.76953125,-0.68359375,-0.70703125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
52
+ -0.88671875,-0.7890625,-0.8046875,-0.76953125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
53
+ -0.875,-0.68359375,-0.81640625,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
54
+ -0.93359375,-0.7890625,-0.7890625,-0.78125,-0.78515625,qwen25-coder-32b-instruct,1.0,1.0,1.0,0.0,1.0
55
+ -0.82421875,-0.7265625,-0.71875,-0.65234375,-0.64453125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
56
+ -0.92578125,-0.75390625,-0.828125,-0.80859375,-0.8046875,DeepSeek-V3-0324,1.0,0.0,0.0,0.0,0.0
57
+ -0.97265625,-0.734375,-0.8515625,-0.859375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
58
+ -0.9453125,-0.765625,-0.86328125,-0.78125,-0.80078125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
59
+ -0.8359375,-0.7109375,-0.73046875,-0.70703125,-0.6640625,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
60
+ -0.9140625,-0.71484375,-0.83984375,-0.68359375,-0.78515625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
61
+ -0.9296875,-0.7734375,-0.8359375,-0.78125,-0.79296875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
62
+ -0.96484375,-0.75390625,-0.8203125,-0.76171875,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
63
+ -0.87109375,-0.71875,-0.83984375,-0.734375,-0.7890625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
64
+ -0.94140625,-0.765625,-0.859375,-0.7578125,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
65
+ -0.90234375,-0.74609375,-0.83203125,-0.72265625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
66
+ -0.9453125,-0.72265625,-0.83203125,-0.71484375,-0.78125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,0.0
67
+ -0.86328125,-0.71875,-0.75,-0.6796875,-0.7265625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
68
+ -0.89453125,-0.7421875,-0.78515625,-0.7421875,-0.76953125,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
69
+ -0.90625,-0.73046875,-0.78125,-0.734375,-0.74609375,DeepSeek-V3-0324,1.0,0.0,0.0,1.0,1.0
70
+ -0.88671875,-0.76953125,-0.82421875,-0.7109375,-0.765625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
71
+ -0.8828125,-0.765625,-0.8046875,-0.75,-0.75,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
72
+ -0.921875,-0.7734375,-0.8046875,-0.78515625,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
73
+ -0.89453125,-0.7109375,-0.8203125,-0.7265625,-0.71875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
74
+ -0.91015625,-0.74609375,-0.78125,-0.71484375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
75
+ -0.8828125,-0.734375,-0.796875,-0.71875,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
76
+ -0.9609375,-0.765625,-0.85546875,-0.84375,-0.8046875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
77
+ -0.96875,-0.796875,-0.86328125,-0.79296875,-0.80859375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
78
+ -0.85546875,-0.70703125,-0.78515625,-0.6875,-0.734375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
79
+ -0.95703125,-0.76953125,-0.87109375,-0.71875,-0.828125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
80
+ -0.99609375,-0.73046875,-0.8515625,-0.80078125,-0.7734375,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
81
+ -0.90625,-0.78125,-0.8671875,-0.76171875,-0.85546875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
82
+
83
+ predicted_proportions,0.0000,0.3250,0.0000,0.5500,0.1250
84
+ true_proportions,0.9625,0.0250,0.0125,0.0000,0.0000