import { Benchmark } from "./types";

export const anthropicBenchmarks: Benchmark[] = [
  {
    model: "Claude Opus 4",
    provider: "Anthropic",
    inputPrice: 15.0,
    outputPrice: 75.0,
    source: "https://www.anthropic.com/news/claude-4",
    benchmark: {
      swe_bench_verified: 72.5,
      //terminal_bench: 43.2,
      gpqa_diamond: 79.6,
      aime_2025: 75.5,
      mmmlu: 88.8,
      mmmu: 76.5,
      tau_bench_retail: 81.4,
      tau_bench_airline: 59.6,
    },
  },
  {
    model: "Claude Sonnet 4",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/claude-4",
    benchmark: {
      swe_bench_verified: 72.7,
      //terminal_bench: 35.5,
      gpqa_diamond: 75.4,
      aime_2025: 70.5,
      mmmlu: 86.5,
      mmmu: 74.4,
      tau_bench_retail: 80.5,
      tau_bench_airline: 60.0,
    },
  },
  {
    model: "Claude 3.7 Sonnet (Extended Thinking 64K)",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/claude-3-7-sonnet",
    benchmark: {
      gpqa_diamond: 78.2,
      tau_bench_retail: 81.2,
      tau_bench_airline: 58.4,
      mmmlu: 86.1,
      mmmu: 75.0,
      aime_24: 61.3,
    },
  },
  {
    model: "Claude 3.7 Sonnet (No Extended Thinking)",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/claude-3-7-sonnet",
    benchmark: {
      gpqa_diamond: 68.0,
      swe_bench_verified: 62.3,
      mmmlu: 83.2,
      mmmu: 71.8,
      aime_24: 51.7, // using average of 23.3 & 80.0
    },
  },
  {
    model: "Claude 3.5 Sonnet (New)",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/claude-3-7-sonnet",
    benchmark: {
      gpqa_diamond: 65.0,
      swe_bench_verified: 49.0,
      tau_bench_retail: 71.5,
      tau_bench_airline: 48.8,
      mmmlu: 82.1,
      mmmu: 70.4,
      aime_24: 16.0, // average of 16.0 & 65.4
    },
  },
  {
    model: "Claude 3.5 Haiku",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/3-5-models-and-computer-use",
    benchmark: {
      gpqa_diamond: 41.6,
      swe_bench_verified: 49.0,
      tau_bench_retail: 51.0,
      tau_bench_airline: 22.8,
      humaneval: 88.1,
      mmmlu: 65.0,
      aime_24: 5.3,
    },
  },
  {
    model: "Claude 3 Opus",
    provider: "Anthropic",
    inputPrice: 15.0,
    outputPrice: 75.0,
    source: "https://www.anthropic.com/news/claude-3-family",
    benchmark: {
      gpqa_diamond: 50.4,
      mmmlu: 86.8,
      mmmu: 59.4,
      // gsm8k: 95.0,
      // math: 60.1,
      // mgsm: 90.7,
      // humaneval: 84.9,
      // drop: 83.1,
      // big_bench_hard: 86.8,
      // arc_challenge: 96.4,
      // hellaswag: 95.4,
      // mathvista: 50.5,
      // ai2d: 88.1,
      // chart_qa: 80.8,
      // docvqa_anls: 89.3,
    },
  },
  {
    model: "Claude 3 Sonnet",
    provider: "Anthropic",
    inputPrice: 3.0,
    outputPrice: 15.0,
    source: "https://www.anthropic.com/news/claude-3-family",
    benchmark: {
      gpqa_diamond: 40.4,
      mmmlu: 79.0,
      mmmu: 53.1,
      // gsm8k: 92.3,
      // math: 43.1,
      // mgsm: 83.5,
      // humaneval: 73.0,
      // drop: 78.9,
      // big_bench_hard: 82.9,
      // arc_challenge: 93.2,
      // hellaswag: 89.0,
      // mathvista: 47.9,
      // ai2d: 88.7,
      // chart_qa: 81.1,
      // docvqa_anls: 89.5,
    },
  },
  {
    model: "Claude 3 Haiku",
    provider: "Anthropic",
    inputPrice: 0.25,
    outputPrice: 1.25,
    source: "https://www.anthropic.com/news/claude-3-family",
    benchmark: {
      gpqa_diamond: 33.3,
      mmmlu: 75.2,
      mmmu: 50.2,
      // gsm8k: 88.9,
      // math: 38.9,
      // mgsm: 75.1,
      // humaneval: 75.9,
      // drop: 78.4,
      // big_bench_hard: 73.7,
      // arc_challenge: 89.2,
      // hellaswag: 85.9,
      // mathvista: 46.4,
      // ai2d: 86.7,
      // chart_qa: 81.7,
      // docvqa_anls: 88.8,
    },
  },
];