Commit
·
fffbe5d
1
Parent(s):
d7502bf
- src/lib/benchmarks/ index.ts +3 -1
- src/lib/benchmarks/qwen.ts +84 -0
src/lib/benchmarks/ index.ts
CHANGED
@@ -4,11 +4,13 @@ import { googleBenchmarks } from "./google";
|
|
4 |
import { anthropicBenchmarks } from "./anthropic";
|
5 |
import { openaiBenchmarks } from "./openai";
|
6 |
import { deepseekBenchmarks } from "./deepseek";
|
|
|
7 |
|
8 |
export const benchmarkData: Benchmark[] = [
|
9 |
...xaiBenchmarks,
|
10 |
...googleBenchmarks,
|
11 |
...anthropicBenchmarks,
|
12 |
...openaiBenchmarks,
|
13 |
-
...deepseekBenchmarks
|
|
|
14 |
];
|
|
|
4 |
import { anthropicBenchmarks } from "./anthropic";
|
5 |
import { openaiBenchmarks } from "./openai";
|
6 |
import { deepseekBenchmarks } from "./deepseek";
|
7 |
+
import { qwenBenchmarks } from "./qwen";
|
8 |
|
9 |
export const benchmarkData: Benchmark[] = [
|
10 |
...xaiBenchmarks,
|
11 |
...googleBenchmarks,
|
12 |
...anthropicBenchmarks,
|
13 |
...openaiBenchmarks,
|
14 |
+
...deepseekBenchmarks,
|
15 |
+
...qwenBenchmarks
|
16 |
];
|
src/lib/benchmarks/qwen.ts
ADDED
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import { Benchmark } from "./types";
|
2 |
+
|
3 |
+
export const qwenBenchmarks: Benchmark[] = [
|
4 |
+
{
|
5 |
+
model: "Qwen3-235B-A22B",
|
6 |
+
provider: "Qwen",
|
7 |
+
inputPrice: 0.0,
|
8 |
+
outputPrice: 0.0,
|
9 |
+
benchmark: {
|
10 |
+
aime_24: 85.7,
|
11 |
+
aime_2025: 81.5,
|
12 |
+
gpqa_diamond: 44.06,
|
13 |
+
// livecodebench_v6: 70.7,
|
14 |
+
mmlu_pro: 68.18,
|
15 |
+
mmlu: 87.81,
|
16 |
+
mmmu: 71.84,
|
17 |
+
// gsm8k: 47.47,
|
18 |
+
// math: 71.84,
|
19 |
+
// bigbench_extra_hard: 59.54,
|
20 |
+
// global_mmlu_lite: 87.40,
|
21 |
+
// evalplus: 77.60,
|
22 |
+
// humaneval: 79.00,
|
23 |
+
// mbpp: 81.40,
|
24 |
+
// cruxeval_c: 79.00,
|
25 |
+
simpleqa: 85.8,
|
26 |
+
egoschema: 81.1,
|
27 |
+
},
|
28 |
+
source: "https://qwenlm.github.io/blog/qwen3/",
|
29 |
+
},
|
30 |
+
{
|
31 |
+
model: "Qwen3-32B",
|
32 |
+
provider: "Qwen",
|
33 |
+
inputPrice: 0.0,
|
34 |
+
outputPrice: 0.0,
|
35 |
+
benchmark: {
|
36 |
+
aime_24: 81.4,
|
37 |
+
aime_2025: 72.9,
|
38 |
+
// livecodebench_v6: 65.7,
|
39 |
+
//: 1977,
|
40 |
+
//aider_polyglot: 50.2,
|
41 |
+
// livebench: 74.9,
|
42 |
+
// bfcl: 70.3,
|
43 |
+
// multillm: 73.0,
|
44 |
+
},
|
45 |
+
source: "https://qwenlm.github.io/blog/qwe,n3/ (image table)",
|
46 |
+
},
|
47 |
+
|
48 |
+
{
|
49 |
+
model: "Qwen3-30B-A3B",
|
50 |
+
provider: "Qwen",
|
51 |
+
inputPrice: 0.0,
|
52 |
+
outputPrice: 0.0,
|
53 |
+
benchmark: {
|
54 |
+
aime_24: 80.4,
|
55 |
+
aime_2025: 70.9,
|
56 |
+
//livecodebench_v6: 62.6,
|
57 |
+
//codeforces: 1974,
|
58 |
+
gpqa: 65.8,
|
59 |
+
// livebench: 74.3,
|
60 |
+
// bfcl: 69.1,
|
61 |
+
// multillm: 72.2,
|
62 |
+
},
|
63 |
+
source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
|
64 |
+
},
|
65 |
+
{
|
66 |
+
model: "Qwen3-4B",
|
67 |
+
provider: "Qwen",
|
68 |
+
inputPrice: 0.0,
|
69 |
+
outputPrice: 0.0,
|
70 |
+
benchmark: {
|
71 |
+
aime_24: 73.8,
|
72 |
+
aime_2025: 65.6,
|
73 |
+
//livecodebench_v6: 54.2,
|
74 |
+
// codeforces: 1671,
|
75 |
+
gpqa: 55.9,
|
76 |
+
|
77 |
+
// bfcl: 65.9,
|
78 |
+
// math: 62.12,
|
79 |
+
//multillm: 66.3,
|
80 |
+
},
|
81 |
+
source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
|
82 |
+
}
|
83 |
+
|
84 |
+
];
|