import { Benchmark } from "./types"; export const deepseekBenchmarks: Benchmark[] = [ { model: "DeepSeek-R1-0528", provider: "DeepSeek", inputPrice: 0.55, // Placeholder, update if pricing becomes available outputPrice: 2.19, benchmark: { aime_24: 91.4, aime_2025: 87.5, gpqa_diamond: 81.0, gpqa: 81.0, // For compatibility; can remove if you want to only use gpqa_diamond mmlu_pro: 85.0, mmlu: 93.4, // MMLU-Redux assumed to be "mmlu" simpleqa: 27.8, lcb: 73.3, // LiveCodeBench aider_polyglot: 71.6, swe_bench_verified: 57.6, // Optional or less frequent benchmarks: humanitys_last_exam: 17.7, // Not in BenchmarkMetric, but useful (commented for type safety): // codeforces_div1: 1930, // frames: 83.0, tau_bench_airline: 53.5, tau_bench_retail: 63.9, // bfcl_v3_multiturn: 37.0, // cnmo_2024: 86.9, // hmmt_2025: 79.4, }, source: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", }, { model: "DeepSeek-V3-0324", provider: "DeepSeek", inputPrice: 0.27, // Placeholder — adjust if actual pricing becomes available outputPrice: 1.10, benchmark: { mmlu: 87.1, // From original DeepSeek-V3 mmlu_pro: 81.2, // Updated in V3-0324 gpqa: 68.4, // Updated in V3-0324 gpqa_diamond: 59.1, // From V3 aime_24: 59.4, // Updated in V3-0324 lcb: 49.2, // Updated LiveCodeBench simpleqa: 24.9, // From V3 aider_polyglot: 49.6, // From V3 swe_bench_verified: 42.0 // From V3 }, source: "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324", }, { model: "DeepSeek-V3", provider: "DeepSeek", inputPrice: 0.27, // Placeholder — update if real pricing is known outputPrice: 1.10, benchmark: { mmlu: 87.1, mmlu_pro: 64.4, // mmlu_redux: 86.2, // Commented: not in BenchmarkMetric gpqa_diamond: 59.1, simpleqa: 24.9, aime_24: 39.2, lcb: 37.6, // LiveCodeBench (Pass@1) aider_polyglot: 49.6, swe_bench_verified: 42.0, // Optional or not yet in your schema: // humanitys_last_exam: undefined, // codeforces: 51.6, // drop: 89.0, // gsm8k: 89.3, // math_em: 61.6, // mgsm: 79.8, // cmath: 90.7, // cruxeval_i: 67.3, // cruxeval_o: 69.8, // triviaqa: 82.9, // naturalquestions: 40.0, // agieval: 79.6, // hellaSwag: 88.9, // piqa: 84.7, // winogrande: 84.9, }, source: "https://huggingface.co/deepseek-ai/DeepSeek-V3", }, { model: "DeepSeek-R1", provider: "DeepSeek", inputPrice: 0.55, // Placeholder, update if pricing becomes available outputPrice: 2.19, benchmark: { mmlu: 90.8, mmlu_pro: 84.0, gpqa_diamond: 71.5, simpleqa: 30.1, lcb: 65.9, // LiveCodeBench (Pass@1-CoT) swe_bench_verified: 49.2, aider_polyglot: 53.3, aime_24: 79.8, // aime_2025: undefined, // not provided // gpqa: undefined, // use gpqa_diamond // egoschema: undefined, // mmmu: undefined, // loft: undefined, // humanitys_last_exam: undefined, // optional }, source: "https://huggingface.co/deepseek-ai/DeepSeek-R1", }, ];