Update src/App.js
Browse files- src/App.js +60 -150
src/App.js
CHANGED
@@ -9,164 +9,60 @@ const UrologyLeaderboard = () => {
|
|
9 |
const [loading, setLoading] = useState(true);
|
10 |
const [refreshing, setRefreshing] = useState(false);
|
11 |
const [lastUpdated, setLastUpdated] = useState(null);
|
12 |
-
|
13 |
-
const mockData = [
|
14 |
-
{
|
15 |
-
model: "openai/gpt-4o-mini",
|
16 |
-
baseModel: "",
|
17 |
-
accuracy: 0.18543046357615803,
|
18 |
-
totalQuestions: 151,
|
19 |
-
correctAnswers: 28,
|
20 |
-
license: "API Service",
|
21 |
-
submitType: "openrouter",
|
22 |
-
submittedTime: "2025-05-22T16:53:10Z",
|
23 |
-
params: 0,
|
24 |
-
precision: "float16",
|
25 |
-
status: "FINISHED"
|
26 |
-
},
|
27 |
-
{
|
28 |
-
model: "anthropic/claude-3-sonnet",
|
29 |
-
baseModel: "claude-3-sonnet",
|
30 |
-
accuracy: 0.32450331125827815,
|
31 |
-
totalQuestions: 151,
|
32 |
-
correctAnswers: 49,
|
33 |
-
license: "API Service",
|
34 |
-
submitType: "openrouter",
|
35 |
-
submittedTime: "2025-05-22T14:30:25Z",
|
36 |
-
params: 0,
|
37 |
-
precision: "float16",
|
38 |
-
status: "FINISHED"
|
39 |
-
},
|
40 |
-
{
|
41 |
-
model: "meta-llama/llama-3.1-70b",
|
42 |
-
baseModel: "llama-3.1-70b",
|
43 |
-
accuracy: 0.27814569536423844,
|
44 |
-
totalQuestions: 151,
|
45 |
-
correctAnswers: 42,
|
46 |
-
license: "Apache 2.0",
|
47 |
-
submitType: "huggingface",
|
48 |
-
submittedTime: "2025-05-22T12:15:30Z",
|
49 |
-
params: 70000000000,
|
50 |
-
precision: "bfloat16",
|
51 |
-
status: "FINISHED"
|
52 |
-
},
|
53 |
-
{
|
54 |
-
model: "mistralai/mixtral-8x7b",
|
55 |
-
baseModel: "mixtral-8x7b",
|
56 |
-
accuracy: 0.23841059602649006,
|
57 |
-
totalQuestions: 151,
|
58 |
-
correctAnswers: 36,
|
59 |
-
license: "Apache 2.0",
|
60 |
-
submitType: "huggingface",
|
61 |
-
submittedTime: "2025-05-22T10:45:15Z",
|
62 |
-
params: 46700000000,
|
63 |
-
precision: "bfloat16",
|
64 |
-
status: "FINISHED"
|
65 |
-
},
|
66 |
-
{
|
67 |
-
model: "google/gemini-pro",
|
68 |
-
baseModel: "gemini-pro",
|
69 |
-
accuracy: 0.29801324503311255,
|
70 |
-
totalQuestions: 151,
|
71 |
-
correctAnswers: 45,
|
72 |
-
license: "API Service",
|
73 |
-
submitType: "google",
|
74 |
-
submittedTime: "2025-05-22T08:20:40Z",
|
75 |
-
params: 0,
|
76 |
-
precision: "float16",
|
77 |
-
status: "FINISHED"
|
78 |
-
},
|
79 |
-
{
|
80 |
-
model: "cohere/command-r-plus",
|
81 |
-
baseModel: "command-r-plus",
|
82 |
-
accuracy: 0.35761589403973510,
|
83 |
-
totalQuestions: 151,
|
84 |
-
correctAnswers: 54,
|
85 |
-
license: "API Service",
|
86 |
-
submitType: "cohere",
|
87 |
-
submittedTime: "2025-05-21T18:12:30Z",
|
88 |
-
params: 0,
|
89 |
-
precision: "float16",
|
90 |
-
status: "FINISHED"
|
91 |
-
},
|
92 |
-
{
|
93 |
-
model: "openai/gpt-4-turbo",
|
94 |
-
baseModel: "gpt-4-turbo",
|
95 |
-
accuracy: 0.41721854304635763,
|
96 |
-
totalQuestions: 151,
|
97 |
-
correctAnswers: 63,
|
98 |
-
license: "API Service",
|
99 |
-
submitType: "openai",
|
100 |
-
submittedTime: "2025-05-21T16:45:10Z",
|
101 |
-
params: 0,
|
102 |
-
precision: "float16",
|
103 |
-
status: "FINISHED"
|
104 |
-
},
|
105 |
-
{
|
106 |
-
model: "microsoft/phi-3-medium",
|
107 |
-
baseModel: "phi-3-medium",
|
108 |
-
accuracy: 0.21854304635761590,
|
109 |
-
totalQuestions: 151,
|
110 |
-
correctAnswers: 33,
|
111 |
-
license: "MIT",
|
112 |
-
submitType: "azure",
|
113 |
-
submittedTime: "2025-05-21T14:22:45Z",
|
114 |
-
params: 14000000000,
|
115 |
-
precision: "float16",
|
116 |
-
status: "FINISHED"
|
117 |
-
}
|
118 |
-
];
|
119 |
|
120 |
const loadData = async () => {
|
121 |
try {
|
122 |
-
// Intentar cargar datos reales desde HuggingFace
|
123 |
const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=SASLeaderboard/results&config=default&split=train');
|
124 |
|
125 |
-
if (response.ok) {
|
126 |
-
|
127 |
-
console.log('Raw HuggingFace data:', data);
|
128 |
-
|
129 |
-
// Procesar los datos reales
|
130 |
-
const processedData = data.rows.map(row => {
|
131 |
-
const config = row.row.config;
|
132 |
-
const results = row.row.results;
|
133 |
-
|
134 |
-
return {
|
135 |
-
model: config.model || 'Unknown Model',
|
136 |
-
baseModel: config.base_model || '',
|
137 |
-
accuracy: results.overall?.accuracy || 0,
|
138 |
-
totalQuestions: results.overall?.total_questions || 151,
|
139 |
-
correctAnswers: Math.round((results.overall?.accuracy || 0) * (results.overall?.total_questions || 151)),
|
140 |
-
license: config.license || 'Unknown',
|
141 |
-
submitType: config.submit_type || 'unknown',
|
142 |
-
submittedTime: config.submitted_time || new Date().toISOString(),
|
143 |
-
params: config.params || 0,
|
144 |
-
precision: config.precision || 'unknown',
|
145 |
-
status: config.status || 'UNKNOWN'
|
146 |
-
};
|
147 |
-
});
|
148 |
-
|
149 |
-
console.log('Processed data:', processedData);
|
150 |
-
return processedData;
|
151 |
-
} else {
|
152 |
-
console.log('HuggingFace API failed, using mock data');
|
153 |
-
return mockData;
|
154 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
} catch (error) {
|
156 |
-
console.
|
157 |
-
|
158 |
}
|
159 |
};
|
160 |
|
161 |
const refreshData = async () => {
|
162 |
setRefreshing(true);
|
|
|
163 |
try {
|
164 |
const newData = await loadData();
|
165 |
-
|
166 |
-
setData(shuffledData);
|
167 |
setLastUpdated(new Date());
|
168 |
} catch (error) {
|
169 |
-
|
|
|
170 |
} finally {
|
171 |
setRefreshing(false);
|
172 |
}
|
@@ -174,10 +70,16 @@ const UrologyLeaderboard = () => {
|
|
174 |
|
175 |
useEffect(() => {
|
176 |
const initializeData = async () => {
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
181 |
};
|
182 |
initializeData();
|
183 |
}, []);
|
@@ -666,15 +568,23 @@ const UrologyLeaderboard = () => {
|
|
666 |
React.createElement('div', { style: styles.infoCard },
|
667 |
React.createElement('h3', { style: { fontSize: '20px', fontWeight: '600', color: 'white', marginBottom: '12px' } }, '馃搳 About This Evaluation'),
|
668 |
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '12px' } }, 'This leaderboard evaluates natural language models on their ability to answer urology questions. Models must respond to multiple-choice questions about urological knowledge, demonstrating their understanding and mastery of this medical specialty.'),
|
669 |
-
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '16px' } },
|
670 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
671 |
React.createElement('span', null, 'Dataset: ', React.createElement('a', {
|
672 |
href: 'https://huggingface.co/datasets/SASLeaderboard/results',
|
673 |
style: { color: '#60a5fa', textDecoration: 'none' },
|
674 |
target: '_blank',
|
675 |
rel: 'noopener noreferrer'
|
676 |
-
}, 'SASLeaderboard/results'))
|
677 |
-
lastUpdated && React.createElement('span', { style: { fontSize: '14px' } }, `Last updated: ${lastUpdated.toLocaleString('en-US')}`)
|
678 |
)
|
679 |
),
|
680 |
React.createElement('div', { style: styles.academicCard },
|
|
|
9 |
const [loading, setLoading] = useState(true);
|
10 |
const [refreshing, setRefreshing] = useState(false);
|
11 |
const [lastUpdated, setLastUpdated] = useState(null);
|
12 |
+
const [error, setError] = useState(null);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
const loadData = async () => {
|
15 |
try {
|
|
|
16 |
const response = await fetch('https://datasets-server.huggingface.co/rows?dataset=SASLeaderboard/results&config=default&split=train');
|
17 |
|
18 |
+
if (!response.ok) {
|
19 |
+
throw new Error(`HTTP ${response.status}: ${response.statusText}`);
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
}
|
21 |
+
|
22 |
+
const data = await response.json();
|
23 |
+
console.log('Raw HuggingFace data:', data);
|
24 |
+
|
25 |
+
if (!data.rows || data.rows.length === 0) {
|
26 |
+
throw new Error('No data found in the dataset');
|
27 |
+
}
|
28 |
+
|
29 |
+
const processedData = data.rows.map(row => {
|
30 |
+
const config = row.row.config;
|
31 |
+
const results = row.row.results;
|
32 |
+
|
33 |
+
return {
|
34 |
+
model: config.model || 'Unknown Model',
|
35 |
+
baseModel: config.base_model || '',
|
36 |
+
accuracy: results.overall?.accuracy || 0,
|
37 |
+
totalQuestions: results.overall?.total_questions || 151,
|
38 |
+
correctAnswers: Math.round((results.overall?.accuracy || 0) * (results.overall?.total_questions || 151)),
|
39 |
+
license: config.license || 'Unknown',
|
40 |
+
submitType: config.submit_type || 'unknown',
|
41 |
+
submittedTime: config.submitted_time || new Date().toISOString(),
|
42 |
+
params: config.params || 0,
|
43 |
+
precision: config.precision || 'unknown',
|
44 |
+
status: config.status || 'UNKNOWN'
|
45 |
+
};
|
46 |
+
});
|
47 |
+
|
48 |
+
console.log('Processed data:', processedData);
|
49 |
+
return processedData;
|
50 |
} catch (error) {
|
51 |
+
console.error('Error loading data from HuggingFace:', error);
|
52 |
+
throw error;
|
53 |
}
|
54 |
};
|
55 |
|
56 |
const refreshData = async () => {
|
57 |
setRefreshing(true);
|
58 |
+
setError(null);
|
59 |
try {
|
60 |
const newData = await loadData();
|
61 |
+
setData(newData);
|
|
|
62 |
setLastUpdated(new Date());
|
63 |
} catch (error) {
|
64 |
+
setError(`Failed to load data: ${error.message}`);
|
65 |
+
setData([]);
|
66 |
} finally {
|
67 |
setRefreshing(false);
|
68 |
}
|
|
|
70 |
|
71 |
useEffect(() => {
|
72 |
const initializeData = async () => {
|
73 |
+
try {
|
74 |
+
const initialData = await loadData();
|
75 |
+
setData(initialData);
|
76 |
+
setLastUpdated(new Date());
|
77 |
+
} catch (error) {
|
78 |
+
setError(`Failed to load data: ${error.message}`);
|
79 |
+
setData([]);
|
80 |
+
} finally {
|
81 |
+
setLoading(false);
|
82 |
+
}
|
83 |
};
|
84 |
initializeData();
|
85 |
}, []);
|
|
|
568 |
React.createElement('div', { style: styles.infoCard },
|
569 |
React.createElement('h3', { style: { fontSize: '20px', fontWeight: '600', color: 'white', marginBottom: '12px' } }, '馃搳 About This Evaluation'),
|
570 |
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '12px' } }, 'This leaderboard evaluates natural language models on their ability to answer urology questions. Models must respond to multiple-choice questions about urological knowledge, demonstrating their understanding and mastery of this medical specialty.'),
|
571 |
+
React.createElement('p', { style: { color: '#d1d5db', marginBottom: '16px' } },
|
572 |
+
'Questions are from the SAS (Servicio Andaluz de Salud) for the ',
|
573 |
+
React.createElement('a', {
|
574 |
+
href: 'https://www.sspa.juntadeandalucia.es/servicioandaluzdesalud/profesionales/ofertas-de-empleo/oferta-de-empleo-publico-puestos-base/oep-extraordinaria-decreto-ley-122022-centros-sas/cuadro-de-evolucion-concurso-oposicion-centros-sas/fea-urologia',
|
575 |
+
target: '_blank',
|
576 |
+
rel: 'noopener noreferrer',
|
577 |
+
style: { color: '#60a5fa', textDecoration: 'none', fontWeight: '600' }
|
578 |
+
}, React.createElement('strong', null, 'Convocatoria Concurso Oposici贸n')),
|
579 |
+
' - specialized medical examination for urology residents.'
|
580 |
+
),
|
581 |
+
React.createElement('div', { style: { display: 'flex', justifyContent: 'center', color: '#d1d5db' } },
|
582 |
React.createElement('span', null, 'Dataset: ', React.createElement('a', {
|
583 |
href: 'https://huggingface.co/datasets/SASLeaderboard/results',
|
584 |
style: { color: '#60a5fa', textDecoration: 'none' },
|
585 |
target: '_blank',
|
586 |
rel: 'noopener noreferrer'
|
587 |
+
}, 'SASLeaderboard/results'))
|
|
|
588 |
)
|
589 |
),
|
590 |
React.createElement('div', { style: styles.academicCard },
|