File size: 4,202 Bytes
b3d493a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0f1951
 
b3d493a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e17557b
 
 
 
 
 
 
b3d493a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0f1951
b3d493a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e17557b
b3d493a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0f1951
 
 
 
1f689a2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
import React, { useState, useEffect } from 'react';
import _ from 'lodash';

const ScoreBar = ({ score }) => {
  const percentage = score <= 1 ? score * 100 : score;
  const hue = Math.min(percentage * 1.2, 120); // 0 = red, 120 = green
  const backgroundColor = `hsl(${hue}, 70%, 45%)`;
  
  return (
    <div className="relative h-8 bg-gray-100 rounded w-full">
      <div 
        className="absolute top-0 left-0 h-full rounded transition-all duration-200"
        style={{ 
          width: `${percentage}%`,
          backgroundColor
        }}
      />
      <div className="absolute inset-0 flex items-center justify-end px-3">
        <span className="text-sm font-medium text-white mix-blend-difference">
          {percentage.toFixed(2)}%
        </span>
      </div>
    </div>
  );
};

function App() {
  const [data, setData] = useState([]);
  const [loading, setLoading] = useState(true);
  const [error, setError] = useState(null);
  const [sortConfig, setSortConfig] = useState({ key: 'GAIA', direction: 'desc' });

  useEffect(() => {
    const fetchData = async () => {
      try {
        setLoading(true);
        const response = await fetch('/api/results');
        if (!response.ok) {
          throw new Error('Failed to fetch data');
        }
        const jsonData = await response.json();
        setData(jsonData);
      } catch (err) {
        console.error('Error fetching data:', err);
        setError(err.message);
      } finally {
        setLoading(false);
      }
    };

    fetchData();
  }, []);

  const handleSort = (key) => {
    const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
    setSortConfig({ key, direction });
  };

  const sortedData = _.orderBy(
    data,
    [item => item.scores[sortConfig.key] || 0],
    [sortConfig.direction]
  );

  const getSortIcon = (key) => {
    if (sortConfig.key === key) {
      return sortConfig.direction === 'desc' ? ' ↓' : ' ↑';
    }
    return ' ↕';
  };

  if (loading) {
    return (
      <div className="flex items-center justify-center min-h-screen">
        <div className="text-lg">Loading benchmark results...</div>
      </div>
    );
  }

  if (error) {
    return (
      <div className="flex items-center justify-center min-h-screen text-red-600">
        Error: {error}
      </div>
    );
  }

  return (
    <div className="p-6">
      <div className="mb-6">
        <h1 className="text-2xl font-bold mb-2">Model Benchmark Results</h1>
        <p className="text-gray-600">Comparing model performance across different benchmarks</p>
      </div>

      <div className="overflow-x-auto">
        <table className="w-full border-collapse">
          <thead>
            <tr className="border-b border-gray-200">
              <th className="py-3 text-left font-medium text-gray-700 w-1/4">Model</th>
              {["GAIA", "MATH", "SimpleQA"].map(benchmark => (
                <th 
                  key={benchmark}
                  onClick={() => handleSort(benchmark)}
                  className="py-3 px-4 text-left font-medium text-gray-700 cursor-pointer hover:text-blue-600"
                >
                  <div className="flex items-center gap-1">
                    {benchmark}
                    <span className="text-gray-500">{getSortIcon(benchmark)}</span>
                  </div>
                </th>
              ))}
            </tr>
          </thead>
          <tbody>
            {sortedData.map((item, index) => (
              <tr key={index} className="border-b border-gray-100">
                <td className="py-3 pr-4 font-medium truncate" title={item.model_id}>
                  {item.model_id}
                </td>
                <td className="py-3 px-4">
                  <ScoreBar score={item.scores.GAIA} />
                </td>
                <td className="py-3 px-4">
                  <ScoreBar score={item.scores.MATH} />
                </td>
                <td className="py-3 px-4">
                  <ScoreBar score={item.scores.SimpleQA} />
                </td>
              </tr>
            ))}
          </tbody>
        </table>
      </div>
    </div>
  );
}

export default App;