llm-pricing-calculator

Running

App Files Files Community

Presidentlin commited on Jun 21

Commit

a4caafd

1 Parent(s): 829c0ca

x

Browse files

Files changed (2) hide show

src/lib/benchmarks/google.ts +127 -104
src/lib/benchmarks/types.ts +2 -0

src/lib/benchmarks/google.ts CHANGED Viewed

@@ -1,153 +1,176 @@
 import { Benchmark } from "./types";
 export const googleBenchmarks: Benchmark[] = [
     {
-        model: "Gemini Diffusion",
-        provider: "Google",
-        inputPrice: 0,
-        outputPrice: 0,
-        benchmark: {
-            livecodebench_v6: 30.9,
-            bigcodebench: 45.4,
-            lbpp_v2: 56.8,
-            swe_bench_verified: 22.9,
-            humaneval: 89.6,
-            mbpp: 76.0,
-            gpqa_diamond: 40.4,
-            aime_2025: 23.3,
-            bigbench_extra_hard: 15.0,
-            global_mmlu_lite: 69.1,
-        },
-        source: "https://deepmind.google/models/gemini-diffusion/",
-    },
-    {
-        model: "Gemini 2.0 Flash-Lite",
         provider: "Google",
-        inputPrice: 0.10,
-        outputPrice: 0.40,
         benchmark: {
-            livecodebench_v6: 28.5,
-            bigcodebench: 45.8,
-            lbpp_v2: 56.0,
-            swe_bench_verified: 28.5,
-            humaneval: 90.2,
-            mbpp: 75.8,
-            gpqa_diamond: 56.5,
-            aime_2025: 20.0,
-            bigbench_extra_hard: 21.0,
-            global_mmlu_lite: 79.0,
         },
-        source: "https://deepmind.google/models/gemini-diffusion/",
     },
     {
-        model: "Gemini 2.5 Flash Preview (05-20)",
         provider: "Google",
         inputPrice: 0.15,
         outputPrice: 3.5,
-        source: "https://ai.google.dev/gemini-api/docs/thinking",
         benchmark: {
-            aime_2025: 72.0,
             gpqa_diamond: 82.8,
             simpleqa: 26.9,
             global_mmlu_lite: 88.4,
-            swe_bench_verified: 60.4,
-            livecodebench_v6: 63.9,
             mmmu: 79.7,
-            lbpp_v2: 61.9,
-            bigcodebench: 56.7,
-            facts_grounding: 85.3,
-            humanitys_last_exam: 11.0,
-            mrcr_v2_avg_128k: 74.0,
-            mrcr_v2_pointwise_1m: 32.0,
         },
     },
     {
-        model: "Gemini 2.5 Flash Preview (04-17) Thinking",
         provider: "Google",
-        inputPrice: 0.15,
-        outputPrice: 3.5,
-        source: "https://ai.google.dev/gemini-api/docs/thinking",
         benchmark: {
-            aime_2025: 78.0,
             gpqa_diamond: 78.3,
-            simpleqa: 29.7,
-            global_mmlu_lite: 88.4,
-            livecodebench_v6: 63.5,
-            lbpp_v2: 51.1,
-            bigcodebench: 44.2,
-            mmmu: 76.7,
-            humanitys_last_exam: 12.1
         },
     },
     {
         model: "Gemini 2.0 Flash",
         provider: "Google",
         inputPrice: 0.1,
         outputPrice: 0.4,
-        source: "https://ai.google.dev/gemini-api/docs/thinking",
         benchmark: {
-            aime_2025: 27.5,
-            gpqa_diamond: 60.1,
             simpleqa: 29.9,
             global_mmlu_lite: 83.4,
-            livecodebench_v6: 34.5,
-            lbpp_v2: 22.2,
-            mmmu: 71.7,
             facts_grounding: 84.6,
             humanitys_last_exam: 5.1,
-            mrcr_v2_avg_128k: 36.0,
-            mrcr_v2_pointwise_1m: 6.0,
         },
     },
     {
-        model: "Gemini 2.5 Pro Preview (05-06)",
         provider: "Google",
-        inputPrice: 2.5,
-        outputPrice: 15.0,
-        source: "https://blog.google/products/gemini/gemini-2-5-pro-updates/",
         benchmark: {
-            humanitys_last_exam: 17.8,
-            gpqa_diamond: 83.0,
-            aime_2025: 83.0,
-            livecodebench_v6: 75.6,
-            lbpp_v2: 76.5,
-            bigcodebench: 72.7,
-            swe_bench_verified: 63.2,
-            simpleqa: 50.8,
-            mmmu: 79.6,
-            video_mme: 84.8,
-            mrcr_v2_avg_128k: 93.0,
-            mrcr_v2_pointwise_1m: 82.9,
-            global_mmlu_lite: 88.6,
         },
     },
     {
-        model: "Gemini 2.5 Pro Experimental (03-25)",
         provider: "Google",
-        inputPrice: 2.5,
-        outputPrice: 15.0,
-        source: "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/",
         benchmark: {
-            humanitys_last_exam: 18.8,
-            gpqa_diamond: 84.0,
-            aime_2025: 86.7,
-            livecodebench_v6: 70.4,
-            lbpp_v2: 74.0,
-            bigcodebench: 68.6,
-            swe_bench_verified: 63.8,
-            simpleqa: 52.9,
-            mmmu: 81.7,
-            mrcr_v2_avg_128k: 94.5,
-            mrcr_v2_pointwise_1m: 83.1,
-            global_mmlu_lite: 89.8,
         },
     },
 ];

 import { Benchmark } from "./types";
 export const googleBenchmarks: Benchmark[] = [
     {
+        model: "Gemini 2.5 Pro (Thinking-enabled, default)",
         provider: "Google",
+        inputPrice: 2.5,
+        outputPrice: 15.0,
+        source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf",
         benchmark: {
+            livecodebench_v6: 69.0,
+            aider_polyglot: 82.2,
+            swe_bench_verified: 67.2,
+            gpqa_diamond: 86.4,
+            aime_2025: 88.0,
+            humanitys_last_exam: 21.6,
+            simpleqa: 54.0,
+            facts_grounding: 87.8,
+            global_mmlu_lite: 89.2,
+            mrcr_v2_avg_128k: 58.0,
+            mrcr_v2_pointwise_1m: 16.4,
+            mmmu: 82.0,
         },
     },
     {
+        model: "Gemini 2.5 Flash (Thinking-enabled, default)",
         provider: "Google",
         inputPrice: 0.15,
         outputPrice: 3.5,
+        source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf",
         benchmark: {
+            livecodebench_v6: 55.4,
+            aider_polyglot: 56.7,
+            swe_bench_verified: 60.3,
             gpqa_diamond: 82.8,
+            aime_2025: 72.0,
+            humanitys_last_exam: 11.0,
             simpleqa: 26.9,
+            facts_grounding: 85.3,
             global_mmlu_lite: 88.4,
+            mrcr_v2_avg_128k: 54.3,
+            mrcr_v2_pointwise_1m: 21.0,
             mmmu: 79.7,
         },
     },
     {
+        model: "Gemini 2.5 Flash (Non-Thinking)",
         provider: "Google",
+        inputPrice: 0.30,
+        outputPrice: 2.50,
+        source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/",
         benchmark: {
+            humanitys_last_exam: 8.4,
             gpqa_diamond: 78.3,
+            aime_2025: 61.6,
+            livecodebench_v6: 41.1,
+            aider_polyglot: 44.0,
+            swe_bench_verified: 50.0,
+            simpleqa: 25.8,
+            facts_grounding: 83.4,
+            mmmu: 76.9,
+            //vibe_eval: 66.2,
+            mrcr_v2_avg_128k: 34.1,
+            mrcr_v2_pointwise_1m: 16.8,
+            global_mmlu_lite: 85.8,
+        },
+    },
+    {
+        model: "Gemini 2.5 Flash-Lite (Non-Thinking)",
+        provider: "Google",
+        inputPrice: 0.10,
+        outputPrice: 0.40,
+        source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/",
+        benchmark: {
+            humanitys_last_exam: 5.1,
+            gpqa_diamond: 64.6,
+            aime_2025: 49.8,
+            livecodebench_v6: 33.7,
+            aider_polyglot: 26.7,
+            swe_bench_verified: 42.6,
+            simpleqa: 10.7,
+            facts_grounding: 84.1,
+            mmmu: 72.9,
+            // vibe_eval: 51.3,
+            mrcr_v2_avg_128k: 16.6,
+            mrcr_v2_pointwise_1m: 4.1,
+            global_mmlu_lite: 81.1,
         },
     },
+    {
+        model: "Gemini 2.5 Flash-Lite (Thinking)",
+        provider: "Google",
+        inputPrice: 0.10,
+        outputPrice: 0.40,
+        source: "https://blog.google/products/gemini/gemini-2-5-model-family-expands/",
+        benchmark: {
+            humanitys_last_exam: 6.9,
+            gpqa_diamond: 66.7,
+            aime_2025: 63.1,
+            livecodebench_v6: 34.3,
+            aider_polyglot: 27.1,
+            swe_bench_verified: 44.9,
+            simpleqa: 13.0,
+            facts_grounding: 86.8,
+            mmmu: 72.9,
+            //vibe_eval: 57.5,
+            mrcr_v2_avg_128k: 30.6,
+            mrcr_v2_pointwise_1m: 5.4,
+            global_mmlu_lite: 84.5,
+        },
+    },
     {
         model: "Gemini 2.0 Flash",
         provider: "Google",
         inputPrice: 0.1,
         outputPrice: 0.4,
+        source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf",
         benchmark: {
+            aime_2025: 29.7,
+            gpqa_diamond: 65.2,
             simpleqa: 29.9,
             global_mmlu_lite: 83.4,
+            livecodebench_v6: 29.1,
+            mmmu: 69.3,
             facts_grounding: 84.6,
             humanitys_last_exam: 5.1,
+            mrcr_v2_avg_128k: 19.0,
+            mrcr_v2_pointwise_1m: 5.3,
         },
     },
     {
+        model: "Gemini 1.5 Pro",
         provider: "Google",
+        inputPrice: 0.015,
+        outputPrice: 0.075,
+        source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf",
         benchmark: {
+            livecodebench_v6: 29.7,
+            aider_polyglot: 16.9,
+            swe_bench_verified: 34.2,
+            gpqa_diamond: 58.1,
+            aime_2025: 17.5,
+            humanitys_last_exam: 4.6,
+            simpleqa: 24.9,
+            facts_grounding: 80.0,
+            global_mmlu_lite: 80.8,
+            mrcr_v2_avg_128k: 26.2,
+            mrcr_v2_pointwise_1m: 12.1,
+            mmmu: 67.7,
         },
     },
     {
+        model: "Gemini 1.5 Flash",
         provider: "Google",
+        inputPrice: 0.0025,
+        outputPrice: 0.0075,
+        source: "https://storage.googleapis.com/deepmind-media/gemini/gemini_v2_5_report.pdf",
         benchmark: {
+            livecodebench_v6: 30.3,
+            aider_polyglot: 2.8,
+            swe_bench_verified: 19.7,
+            gpqa_diamond: 50.0,
+            aime_2025: 14.7,
+            simpleqa: 8.6,
+            facts_grounding: 82.9,
+            global_mmlu_lite: 72.5,
+            mrcr_v2_avg_128k: 18.4,
+            mrcr_v2_pointwise_1m: 10.2,
+            mmmu: 58.3,
         },
     },
 ];

src/lib/benchmarks/types.ts CHANGED Viewed

@@ -13,6 +13,7 @@ export type BenchmarkMetric =
   // Code benchmarks (frequent)
   | "humaneval"
   | "mbpp"
   | "bigcodebench"
   | "livecodebench_v6"
@@ -54,6 +55,7 @@ export const benchmarkMetricOrder: BenchmarkMetric[] = [
   "aime_24",
   "aime_2025",
   "gpqa_diamond",
   // // Code benchmarks (frequent)
   // "humaneval",

   // Code benchmarks (frequent)
   | "humaneval"
+  | "aider_polyglot"
   | "mbpp"
   | "bigcodebench"
   | "livecodebench_v6"
   "aime_24",
   "aime_2025",
   "gpqa_diamond",
+  "aider_polyglot"
   // // Code benchmarks (frequent)
   // "humaneval",