llm-pricing-calculator

Running

App Files Files Community

Presidentlin commited on 6 days ago

Commit

713e157

1 Parent(s): b29bfe7

x

Browse files

Files changed (2) hide show

src/lib/benchmarks/ index.ts +2 -0
src/lib/benchmarks/deepseek.ts +111 -0

src/lib/benchmarks/ index.ts CHANGED Viewed

@@ -3,10 +3,12 @@ import { xaiBenchmarks } from "./xai";
 import { googleBenchmarks } from "./google";
 import { anthropicBenchmarks } from "./anthropic";
 import { openaiBenchmarks } from "./openai";
 export const benchmarkData: Benchmark[] = [
   ...xaiBenchmarks,
   ...googleBenchmarks,
   ...anthropicBenchmarks,
    ...openaiBenchmarks,
 ];

 import { googleBenchmarks } from "./google";
 import { anthropicBenchmarks } from "./anthropic";
 import { openaiBenchmarks } from "./openai";
+import { deepseekBenchmarks } from "./deepseek";
 export const benchmarkData: Benchmark[] = [
   ...xaiBenchmarks,
   ...googleBenchmarks,
   ...anthropicBenchmarks,
    ...openaiBenchmarks,
+   ...deepseekBenchmarks
 ];

src/lib/benchmarks/deepseek.ts ADDED Viewed

	@@ -0,0 +1,111 @@

+import { Benchmark } from "./types";
+export const deepseekBenchmarks: Benchmark[] = [
+    {
+        model: "DeepSeek-R1-0528",
+        provider: "DeepSeek",
+        inputPrice: 0.55, // Placeholder, update if pricing becomes available
+        outputPrice: 2.19,
+        benchmark: {
+            aime_24: 91.4,
+            aime_2025: 87.5,
+            gpqa_diamond: 81.0,
+            gpqa: 81.0, // For compatibility; can remove if you want to only use gpqa_diamond
+            mmlu_pro: 85.0,
+            mmlu: 93.4, // MMLU-Redux assumed to be "mmlu"
+            simpleqa: 27.8,
+            lcb: 73.3, // LiveCodeBench
+            aider_polyglot: 71.6,
+            swe_bench_verified: 57.6,
+            // Optional or less frequent benchmarks:
+            humanitys_last_exam: 17.7,
+            // Not in BenchmarkMetric, but useful (commented for type safety):
+            // codeforces_div1: 1930,
+            // frames: 83.0,
+            // tau_bench_airline: 53.5,
+            // tau_bench_retail: 63.9,
+            // bfcl_v3_multiturn: 37.0,
+            // cnmo_2024: 86.9,
+            // hmmt_2025: 79.4,
+        },
+        source: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528",
+    },
+    {
+        model: "DeepSeek-V3-0324",
+        provider: "DeepSeek",
+        inputPrice: 0.27, // Placeholder — adjust if actual pricing becomes available
+        outputPrice: 1.10,
+        benchmark: {
+            mmlu: 87.1,              // From original DeepSeek-V3
+            mmlu_pro: 81.2,          // Updated in V3-0324
+            gpqa: 68.4,              // Updated in V3-0324
+            gpqa_diamond: 59.1,      // From V3
+            aime_24: 59.4,           // Updated in V3-0324
+            lcb: 49.2,               // Updated LiveCodeBench
+            simpleqa: 24.9,          // From V3
+            aider_polyglot: 49.6,    // From V3
+            swe_bench_verified: 42.0 // From V3
+        },
+        source: "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324",
+    },
+    {
+        model: "DeepSeek-V3",
+        provider: "DeepSeek",
+        inputPrice: 0.27, // Placeholder — update if real pricing is known
+        outputPrice: 1.10,
+        benchmark: {
+            mmlu: 87.1,
+            mmlu_pro: 64.4,
+            // mmlu_redux: 86.2, // Commented: not in BenchmarkMetric
+            gpqa_diamond: 59.1,
+            simpleqa: 24.9,
+            aime_24: 39.2,
+            lcb: 37.6, // LiveCodeBench (Pass@1)
+            aider_polyglot: 49.6,
+            swe_bench_verified: 42.0,
+            // Optional or not yet in your schema:
+            // humanitys_last_exam: undefined,
+            // codeforces: 51.6,
+            // drop: 89.0,
+            // gsm8k: 89.3,
+            // math_em: 61.6,
+            // mgsm: 79.8,
+            // cmath: 90.7,
+            // cruxeval_i: 67.3,
+            // cruxeval_o: 69.8,
+            // triviaqa: 82.9,
+            // naturalquestions: 40.0,
+            // agieval: 79.6,
+            // hellaSwag: 88.9,
+            // piqa: 84.7,
+            // winogrande: 84.9,
+        },
+        source: "https://huggingface.co/deepseek-ai/DeepSeek-V3",
+    },
+    {
+        model: "DeepSeek-R1",
+        provider: "DeepSeek",
+        inputPrice: 0.60, // Placeholder — update if actual pricing is available
+        outputPrice: 1.20,
+        benchmark: {
+            mmlu: 90.8,
+            mmlu_pro: 84.0,
+            gpqa_diamond: 71.5,
+            simpleqa: 30.1,
+            lcb: 65.9, // LiveCodeBench (Pass@1-CoT)
+            swe_bench_verified: 49.2,
+            aider_polyglot: 53.3,
+            aime_24: 79.8,
+            // aime_2025: undefined, // not provided
+            // gpqa: undefined,      // use gpqa_diamond
+            // egoschema: undefined,
+            // mmmu: undefined,
+            // loft: undefined,
+            // humanitys_last_exam: undefined, // optional
+        },
+        source: "https://huggingface.co/deepseek-ai/DeepSeek-R1",
+    },
+];