wlg1 commited on
Commit
399d4e9
·
verified ·
1 Parent(s): 92f80cc

Upload folder using huggingface_hub

Browse files
checkpoint-200/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ef64c5712241e8f09a484116849e9f3572cb1f147e896aa5e053d6167b9d24b
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
3
  size 249323242
checkpoint-225/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b836fdbf507a3a22890ea1189616e23e54f4023c13561deffc3db5fec6f6d11d
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85afa3559924d8e598e87b57f11617f919bec98cfca85e983d0b7184ed264e80
3
  size 249323242
choice_distribution.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "gpt-4o": 0.0,
3
- "DeepSeek-V3-0324": 0.025,
4
  "Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
5
- "qwen25-coder-32b-instruct": 0.3875,
6
- "gpt-4.1-mini": 0.5875
7
  }
 
1
  {
2
  "gpt-4o": 0.0,
3
+ "DeepSeek-V3-0324": 0.05,
4
  "Llama-4-maverick-17b-128e-instruct-fp8": 0.0,
5
+ "qwen25-coder-32b-instruct": 0.15,
6
+ "gpt-4.1-mini": 0.8
7
  }
logs/events.out.tfevents.1754579928.209-20-159-47.44655.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac1ad2e67b689a5d930e272625f2c782899025a1fa6640256a8a435b8572e2e9
3
+ size 7091
logs/events.out.tfevents.1754579928.209-20-159-47.44655.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7557e3aefe35343339020f4d613dced2aec601871122b7ec4b8de505093bf924
3
+ size 7091
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ef64c5712241e8f09a484116849e9f3572cb1f147e896aa5e053d6167b9d24b
3
  size 249323242
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c6e09a21dcb26a17d03fe73b4a58156f53f84d3abb5fb3dc9877b9cb348a8dc
3
  size 249323242
per_sample_predictions.csv CHANGED
@@ -1,83 +1,84 @@
1
  logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
2
- -0.859375,-0.65625,-0.73046875,-0.65234375,-0.67578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
3
- -0.96484375,-0.8125,-0.8671875,-0.7890625,-0.7109375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
4
- -0.98046875,-0.81640625,-0.8828125,-0.7578125,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
5
- -0.92578125,-0.76953125,-0.828125,-0.7578125,-0.76171875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
6
- -0.98828125,-0.8125,-0.890625,-0.734375,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
7
- -0.98046875,-0.78125,-0.83984375,-0.74609375,-0.75,qwen25-coder-32b-instruct,0.0,0.0,1.0,0.0,1.0
8
- -0.93359375,-0.75390625,-0.79296875,-0.73828125,-0.73046875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
9
- -0.96484375,-0.8203125,-0.91796875,-0.765625,-0.7578125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
10
- -0.875,-0.59375,-0.72265625,-0.6015625,-0.6953125,DeepSeek-V3-0324,0.0,1.0,1.0,1.0,1.0
11
- -0.9375,-0.78125,-0.88671875,-0.76171875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
12
- -1.0234375,-0.828125,-0.83984375,-0.78125,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
13
- -0.921875,-0.79296875,-0.87890625,-0.734375,-0.74609375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
14
- -0.9609375,-0.8203125,-0.87890625,-0.734375,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
15
- -0.9453125,-0.74609375,-0.81640625,-0.671875,-0.734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
16
- -1.0078125,-0.7890625,-0.80859375,-0.75390625,-0.8046875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
17
- -0.91796875,-0.80078125,-0.84375,-0.703125,-0.73046875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
18
- -0.98046875,-0.83984375,-0.87109375,-0.75,-0.734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
19
- -0.9609375,-0.83984375,-0.85546875,-0.7421875,-0.71875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
20
- -0.9921875,-0.83203125,-0.90234375,-0.7890625,-0.76953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
21
- -1.0,-0.83203125,-0.90234375,-0.78515625,-0.76171875,gpt-4.1-mini,1.0,1.0,0.0,1.0,1.0
22
- -0.98828125,-0.84765625,-0.9140625,-0.78515625,-0.7265625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
23
- -0.9140625,-0.75390625,-0.91015625,-0.76953125,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
24
- -0.99609375,-0.8046875,-0.8671875,-0.75390625,-0.79296875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
25
- -0.95703125,-0.84765625,-0.9140625,-0.7734375,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
26
- -0.984375,-0.82421875,-0.8671875,-0.765625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
27
- -0.91015625,-0.6015625,-0.69921875,-0.59375,-0.66796875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
28
- -0.99609375,-0.80078125,-0.89453125,-0.7421875,-0.7421875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
29
- -0.9375,-0.78515625,-0.84375,-0.76953125,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
30
- -1.0078125,-0.796875,-0.859375,-0.75,-0.75390625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
31
- -0.96484375,-0.78125,-0.8515625,-0.75390625,-0.6953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
32
- -0.9375,-0.8203125,-0.875,-0.75,-0.75,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
33
- -0.93359375,-0.796875,-0.8515625,-0.7421875,-0.7265625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
34
- -0.94140625,-0.73828125,-0.8515625,-0.7109375,-0.6953125,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
35
- -0.96875,-0.80859375,-0.8984375,-0.78125,-0.72265625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
36
- -1.015625,-0.83984375,-0.80078125,-0.7578125,-0.76171875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
37
- -0.98046875,-0.8125,-0.84765625,-0.76171875,-0.73828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
38
- -0.93359375,-0.78125,-0.85546875,-0.77734375,-0.77734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
39
- -0.95703125,-0.80859375,-0.828125,-0.78125,-0.76171875,gpt-4.1-mini,1.0,1.0,0.0,1.0,1.0
40
- -0.8984375,-0.78515625,-0.890625,-0.75390625,-0.7578125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
41
- -0.9296875,-0.78515625,-0.8515625,-0.79296875,-0.78515625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
42
- -0.96484375,-0.83984375,-0.83203125,-0.79296875,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
43
- -0.9453125,-0.8203125,-0.86328125,-0.765625,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
44
- -1.03125,-0.84375,-0.8671875,-0.76953125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
45
- -0.96484375,-0.8359375,-0.8984375,-0.75390625,-0.765625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
46
- -1.015625,-0.81640625,-0.84375,-0.79296875,-0.76953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
47
- -1.0078125,-0.81640625,-0.859375,-0.73828125,-0.7265625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
48
- -0.9375,-0.8203125,-0.87890625,-0.78515625,-0.734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
49
- -0.98046875,-0.765625,-0.84375,-0.82421875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
50
- -1.03125,-0.82421875,-0.8515625,-0.765625,-0.78125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
51
- -0.9609375,-0.7890625,-0.80078125,-0.734375,-0.765625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
52
- -0.96875,-0.7734375,-0.8359375,-0.76953125,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
53
- -0.94140625,-0.796875,-0.87109375,-0.734375,-0.69140625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
54
- -0.9921875,-0.8359375,-0.92578125,-0.76171875,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,0.0,1.0
55
- -0.83203125,-0.609375,-0.69140625,-0.60546875,-0.65234375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
56
- -0.984375,-0.83203125,-0.86328125,-0.796875,-0.73828125,gpt-4.1-mini,1.0,0.0,0.0,0.0,0.0
57
- -0.93359375,-0.80078125,-0.84375,-0.80859375,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
58
- -1.0078125,-0.8359375,-0.85546875,-0.78515625,-0.76171875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
59
- -0.9140625,-0.7265625,-0.83984375,-0.70703125,-0.6953125,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
60
- -0.95703125,-0.78515625,-0.8359375,-0.76171875,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
61
- -1.03125,-0.828125,-0.83203125,-0.8125,-0.76953125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
62
- -0.96875,-0.77734375,-0.8125,-0.78125,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
63
- -1.0,-0.8046875,-0.8828125,-0.7890625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
64
- -0.98046875,-0.81640625,-0.859375,-0.80078125,-0.7734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
65
- -0.9765625,-0.76953125,-0.84375,-0.72265625,-0.77734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
66
- -0.94140625,-0.76171875,-0.859375,-0.76171875,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
67
- -0.9296875,-0.74609375,-0.84375,-0.69921875,-0.71875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
68
- -0.953125,-0.76171875,-0.8515625,-0.7578125,-0.72265625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
69
- -0.95703125,-0.81640625,-0.87890625,-0.734375,-0.73828125,qwen25-coder-32b-instruct,1.0,0.0,0.0,1.0,1.0
70
- -0.93359375,-0.76171875,-0.8203125,-0.72265625,-0.72265625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
71
- -0.9453125,-0.80859375,-0.89453125,-0.75,-0.765625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
72
- -0.9453125,-0.7578125,-0.84375,-0.74609375,-0.75390625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
73
- -0.98046875,-0.78125,-0.8515625,-0.75,-0.75390625,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
74
- -0.96875,-0.8125,-0.89453125,-0.7421875,-0.734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
75
- -0.92578125,-0.7890625,-0.859375,-0.76171875,-0.734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
76
- -0.9609375,-0.8125,-0.875,-0.75390625,-0.75390625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
77
- -0.96484375,-0.7734375,-0.82421875,-0.79296875,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
78
- -0.94921875,-0.80078125,-0.875,-0.76171875,-0.6875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
79
- -0.9609375,-0.80078125,-0.8828125,-0.80078125,-0.78515625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
80
- -0.9921875,-0.828125,-0.921875,-0.7421875,-0.765625,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
81
- -0.9921875,-0.81640625,-0.828125,-0.75390625,-0.77734375,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
82
 
83
- predicted_proportions,0.0000,0.0250,0.0000,0.3875,0.5875
 
 
1
  logit_gpt-4o,logit_DeepSeek-V3-0324,logit_Llama-4-maverick-17b-128e-instruct-fp8,logit_qwen25-coder-32b-instruct,logit_gpt-4.1-mini,chosen_executor,true_gpt-4o,true_DeepSeek-V3-0324,true_Llama-4-maverick-17b-128e-instruct-fp8,true_qwen25-coder-32b-instruct,true_gpt-4.1-mini
2
+ -0.84765625,-0.69140625,-0.76171875,-0.7265625,-0.68359375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
3
+ -0.9296875,-0.8125,-0.84375,-0.80078125,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
4
+ -0.9609375,-0.8515625,-0.8671875,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
5
+ -0.953125,-0.8046875,-0.83984375,-0.8125,-0.80859375,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
6
+ -0.953125,-0.8125,-0.8359375,-0.8046875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
7
+ -0.95703125,-0.8046875,-0.83984375,-0.8125,-0.78515625,gpt-4.1-mini,0.0,0.0,1.0,0.0,1.0
8
+ -0.88671875,-0.7578125,-0.83984375,-0.77734375,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
9
+ -0.9453125,-0.796875,-0.828125,-0.7890625,-0.7578125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
10
+ -0.8671875,-0.78515625,-0.72265625,-0.69140625,-0.75,qwen25-coder-32b-instruct,0.0,1.0,1.0,1.0,1.0
11
+ -0.93359375,-0.79296875,-0.80859375,-0.8046875,-0.74609375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
12
+ -0.97265625,-0.85546875,-0.89453125,-0.82421875,-0.79296875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
13
+ -0.94140625,-0.80078125,-0.8203125,-0.80078125,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
14
+ -0.9609375,-0.8515625,-0.84765625,-0.81640625,-0.8125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
15
+ -0.96875,-0.7421875,-0.8671875,-0.78515625,-0.69921875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
16
+ -1.0078125,-0.890625,-0.88671875,-0.8359375,-0.8203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
17
+ -0.94921875,-0.80078125,-0.8125,-0.7890625,-0.75,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
18
+ -1.0,-0.8203125,-0.91015625,-0.80078125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
19
+ -0.91796875,-0.8125,-0.85546875,-0.78515625,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
20
+ -1.0,-0.90625,-0.88671875,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
21
+ -0.96875,-0.90625,-0.890625,-0.8125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
22
+ -0.9296875,-0.83203125,-0.82421875,-0.78515625,-0.796875,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
23
+ -0.90625,-0.73046875,-0.81640625,-0.8046875,-0.71484375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
24
+ -1.0,-0.875,-0.95703125,-0.83203125,-0.81640625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
25
+ -0.9609375,-0.859375,-0.84765625,-0.8125,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
26
+ -0.9609375,-0.86328125,-0.86328125,-0.828125,-0.828125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
27
+ -0.85546875,-0.71875,-0.81640625,-0.76171875,-0.62109375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
28
+ -0.96484375,-0.8125,-0.84765625,-0.7890625,-0.80078125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
29
+ -0.98046875,-0.83984375,-0.87109375,-0.828125,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
30
+ -0.984375,-0.91015625,-0.88671875,-0.78125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
31
+ -0.9296875,-0.74609375,-0.7890625,-0.75,-0.703125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
32
+ -0.91796875,-0.7890625,-0.8125,-0.8046875,-0.79296875,DeepSeek-V3-0324,1.0,1.0,1.0,1.0,1.0
33
+ -0.92578125,-0.8046875,-0.84375,-0.796875,-0.78125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
34
+ -0.97265625,-0.76953125,-0.85546875,-0.7734375,-0.73046875,gpt-4.1-mini,1.0,0.0,1.0,1.0,1.0
35
+ -0.9609375,-0.78515625,-0.8515625,-0.78515625,-0.7578125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
36
+ -0.98046875,-0.87109375,-0.921875,-0.8125,-0.765625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
37
+ -0.953125,-0.84375,-0.8359375,-0.8125,-0.7890625,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
38
+ -0.9609375,-0.81640625,-0.85546875,-0.83203125,-0.80078125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
39
+ -0.96484375,-0.84765625,-0.84375,-0.80859375,-0.80859375,qwen25-coder-32b-instruct,1.0,1.0,0.0,1.0,1.0
40
+ -0.95703125,-0.81640625,-0.84375,-0.80859375,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
41
+ -0.98046875,-0.8125,-0.87109375,-0.81640625,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
42
+ -0.921875,-0.8359375,-0.828125,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
43
+ -1.0,-0.83984375,-0.83984375,-0.79296875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
44
+ -0.97265625,-0.890625,-0.875,-0.80859375,-0.7890625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
45
+ -0.984375,-0.875,-0.8515625,-0.8203125,-0.82421875,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
46
+ -0.97265625,-0.85546875,-0.859375,-0.828125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
47
+ -0.9296875,-0.79296875,-0.79296875,-0.7734375,-0.7734375,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
48
+ -0.94140625,-0.78515625,-0.80859375,-0.78125,-0.74609375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
49
+ -0.97265625,-0.90234375,-0.91015625,-0.8125,-0.8046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
50
+ -1.0078125,-0.91015625,-0.94140625,-0.82421875,-0.8125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
51
+ -0.92578125,-0.82421875,-0.828125,-0.81640625,-0.75390625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
52
+ -0.95703125,-0.765625,-0.85546875,-0.8046875,-0.75,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
53
+ -0.99609375,-0.77734375,-0.87890625,-0.765625,-0.73046875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
54
+ -0.96875,-0.84375,-0.8203125,-0.7890625,-0.765625,gpt-4.1-mini,1.0,1.0,1.0,0.0,1.0
55
+ -0.85546875,-0.6640625,-0.7109375,-0.7421875,-0.63671875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
56
+ -0.984375,-0.87890625,-0.8828125,-0.8125,-0.796875,gpt-4.1-mini,1.0,0.0,0.0,0.0,0.0
57
+ -0.984375,-0.8515625,-0.87890625,-0.84375,-0.828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
58
+ -0.98046875,-0.90625,-0.85546875,-0.79296875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
59
+ -0.9609375,-0.79296875,-0.84765625,-0.765625,-0.703125,gpt-4.1-mini,0.0,1.0,0.0,0.0,0.0
60
+ -0.9296875,-0.78125,-0.8515625,-0.8125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
61
+ -0.9609375,-0.89453125,-0.87109375,-0.8046875,-0.78125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
62
+ -0.9765625,-0.84765625,-0.890625,-0.8203125,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
63
+ -0.98828125,-0.90234375,-0.87890625,-0.81640625,-0.78515625,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
64
+ -0.98828125,-0.890625,-0.86328125,-0.8359375,-0.82421875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
65
+ -0.9453125,-0.82421875,-0.828125,-0.828125,-0.796875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
66
+ -0.96484375,-0.796875,-0.828125,-0.8203125,-0.76953125,gpt-4.1-mini,1.0,1.0,1.0,1.0,0.0
67
+ -0.91796875,-0.734375,-0.796875,-0.78515625,-0.7421875,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
68
+ -0.9296875,-0.76171875,-0.83203125,-0.79296875,-0.7421875,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
69
+ -0.96875,-0.82421875,-0.8671875,-0.76953125,-0.75390625,gpt-4.1-mini,1.0,0.0,0.0,1.0,1.0
70
+ -0.87109375,-0.71484375,-0.79296875,-0.78125,-0.72265625,DeepSeek-V3-0324,0.0,0.0,0.0,0.0,0.0
71
+ -0.9609375,-0.90234375,-0.87109375,-0.8046875,-0.8203125,qwen25-coder-32b-instruct,0.0,0.0,0.0,0.0,0.0
72
+ -0.953125,-0.8359375,-0.859375,-0.80078125,-0.8125,qwen25-coder-32b-instruct,1.0,1.0,1.0,1.0,1.0
73
+ -0.96875,-0.8046875,-0.84765625,-0.78515625,-0.73828125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
74
+ -0.96875,-0.82421875,-0.8515625,-0.80078125,-0.77734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
75
+ -0.94921875,-0.8515625,-0.88671875,-0.859375,-0.83203125,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
76
+ -0.9765625,-0.84375,-0.90625,-0.796875,-0.79296875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
77
+ -1.0,-0.828125,-0.87109375,-0.82421875,-0.7734375,gpt-4.1-mini,0.0,0.0,0.0,0.0,0.0
78
+ -0.921875,-0.828125,-0.84375,-0.796875,-0.77734375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
79
+ -0.97265625,-0.83984375,-0.85546875,-0.83203125,-0.80859375,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
80
+ -1.015625,-0.8828125,-0.94140625,-0.859375,-0.828125,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
81
+ -0.94140625,-0.8203125,-0.8515625,-0.82421875,-0.796875,gpt-4.1-mini,1.0,1.0,1.0,1.0,1.0
82
 
83
+ predicted_proportions,0.0000,0.0500,0.0000,0.1500,0.8000
84
+ true_proportions,0.0848,0.0806,0.0713,0.0723,0.0785