Presidentlin commited on
Commit
fffbe5d
·
1 Parent(s): d7502bf
src/lib/benchmarks/ index.ts CHANGED
@@ -4,11 +4,13 @@ import { googleBenchmarks } from "./google";
4
  import { anthropicBenchmarks } from "./anthropic";
5
  import { openaiBenchmarks } from "./openai";
6
  import { deepseekBenchmarks } from "./deepseek";
 
7
 
8
  export const benchmarkData: Benchmark[] = [
9
  ...xaiBenchmarks,
10
  ...googleBenchmarks,
11
  ...anthropicBenchmarks,
12
  ...openaiBenchmarks,
13
- ...deepseekBenchmarks
 
14
  ];
 
4
  import { anthropicBenchmarks } from "./anthropic";
5
  import { openaiBenchmarks } from "./openai";
6
  import { deepseekBenchmarks } from "./deepseek";
7
+ import { qwenBenchmarks } from "./qwen";
8
 
9
  export const benchmarkData: Benchmark[] = [
10
  ...xaiBenchmarks,
11
  ...googleBenchmarks,
12
  ...anthropicBenchmarks,
13
  ...openaiBenchmarks,
14
+ ...deepseekBenchmarks,
15
+ ...qwenBenchmarks
16
  ];
src/lib/benchmarks/qwen.ts ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import { Benchmark } from "./types";
2
+
3
+ export const qwenBenchmarks: Benchmark[] = [
4
+ {
5
+ model: "Qwen3-235B-A22B",
6
+ provider: "Qwen",
7
+ inputPrice: 0.0,
8
+ outputPrice: 0.0,
9
+ benchmark: {
10
+ aime_24: 85.7,
11
+ aime_2025: 81.5,
12
+ gpqa_diamond: 44.06,
13
+ // livecodebench_v6: 70.7,
14
+ mmlu_pro: 68.18,
15
+ mmlu: 87.81,
16
+ mmmu: 71.84,
17
+ // gsm8k: 47.47,
18
+ // math: 71.84,
19
+ // bigbench_extra_hard: 59.54,
20
+ // global_mmlu_lite: 87.40,
21
+ // evalplus: 77.60,
22
+ // humaneval: 79.00,
23
+ // mbpp: 81.40,
24
+ // cruxeval_c: 79.00,
25
+ simpleqa: 85.8,
26
+ egoschema: 81.1,
27
+ },
28
+ source: "https://qwenlm.github.io/blog/qwen3/",
29
+ },
30
+ {
31
+ model: "Qwen3-32B",
32
+ provider: "Qwen",
33
+ inputPrice: 0.0,
34
+ outputPrice: 0.0,
35
+ benchmark: {
36
+ aime_24: 81.4,
37
+ aime_2025: 72.9,
38
+ // livecodebench_v6: 65.7,
39
+ //: 1977,
40
+ //aider_polyglot: 50.2,
41
+ // livebench: 74.9,
42
+ // bfcl: 70.3,
43
+ // multillm: 73.0,
44
+ },
45
+ source: "https://qwenlm.github.io/blog/qwe,n3/ (image table)",
46
+ },
47
+
48
+ {
49
+ model: "Qwen3-30B-A3B",
50
+ provider: "Qwen",
51
+ inputPrice: 0.0,
52
+ outputPrice: 0.0,
53
+ benchmark: {
54
+ aime_24: 80.4,
55
+ aime_2025: 70.9,
56
+ //livecodebench_v6: 62.6,
57
+ //codeforces: 1974,
58
+ gpqa: 65.8,
59
+ // livebench: 74.3,
60
+ // bfcl: 69.1,
61
+ // multillm: 72.2,
62
+ },
63
+ source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
64
+ },
65
+ {
66
+ model: "Qwen3-4B",
67
+ provider: "Qwen",
68
+ inputPrice: 0.0,
69
+ outputPrice: 0.0,
70
+ benchmark: {
71
+ aime_24: 73.8,
72
+ aime_2025: 65.6,
73
+ //livecodebench_v6: 54.2,
74
+ // codeforces: 1671,
75
+ gpqa: 55.9,
76
+
77
+ // bfcl: 65.9,
78
+ // math: 62.12,
79
+ //multillm: 66.3,
80
+ },
81
+ source: "https://qwenlm.github.io/blog/qwen3/ (image table)",
82
+ }
83
+
84
+ ];