David Pomerenke
commited on
Commit
·
ecf4195
1
Parent(s):
59a65af
Model table using React
Browse files- evals.py +28 -5
- frontend/package-lock.json +75 -0
- frontend/package.json +1 -0
- frontend/public/results.json +44 -0
- frontend/src/App.css +26 -25
- frontend/src/App.js +41 -13
- frontend/src/components/Header.js +29 -0
- frontend/src/components/Medal.js +59 -0
- frontend/src/components/ModelTable.js +48 -0
- results.json +0 -0
evals.py
CHANGED
|
@@ -481,6 +481,10 @@ def mean(lst):
|
|
| 481 |
return sum(lst) / len(lst) if lst else None
|
| 482 |
|
| 483 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 484 |
async def main():
|
| 485 |
print("running evaluations")
|
| 486 |
results = [
|
|
@@ -517,13 +521,32 @@ async def main():
|
|
| 517 |
.reset_index()
|
| 518 |
)
|
| 519 |
all_results = {
|
| 520 |
-
"tasks": task_results.replace({np.nan:None}).to_dict(orient="records"),
|
| 521 |
-
"models": model_results.replace({np.nan:None}).to_dict(orient="records"),
|
| 522 |
-
"languages": lang_results.replace({np.nan:None}).to_dict(orient="records"),
|
| 523 |
-
"scores": results.replace({np.nan:None}).to_dict(orient="records"),
|
| 524 |
}
|
| 525 |
-
with open("
|
| 526 |
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 527 |
|
| 528 |
|
| 529 |
if __name__ == "__main__":
|
|
|
|
| 481 |
return sum(lst) / len(lst) if lst else None
|
| 482 |
|
| 483 |
|
| 484 |
+
def fmt_name(s):
|
| 485 |
+
return " ".join(w.capitalize() for w in s.split("-")).replace("Gpt", "GPT").replace("ai", "AI")
|
| 486 |
+
|
| 487 |
+
|
| 488 |
async def main():
|
| 489 |
print("running evaluations")
|
| 490 |
results = [
|
|
|
|
| 521 |
.reset_index()
|
| 522 |
)
|
| 523 |
all_results = {
|
| 524 |
+
"tasks": task_results.replace({np.nan: None}).to_dict(orient="records"),
|
| 525 |
+
"models": model_results.replace({np.nan: None}).to_dict(orient="records"),
|
| 526 |
+
"languages": lang_results.replace({np.nan: None}).to_dict(orient="records"),
|
| 527 |
+
"scores": results.replace({np.nan: None}).to_dict(orient="records"),
|
| 528 |
}
|
| 529 |
+
with open("results.json", "w") as f:
|
| 530 |
json.dump(all_results, f, indent=2, ensure_ascii=False)
|
| 531 |
+
model_results["task_metric"] = model_results["task"] + "_" + model_results["metric"]
|
| 532 |
+
model_results = model_results.drop(columns=["task", "metric"])
|
| 533 |
+
model_table = model_results.pivot(
|
| 534 |
+
index="model", columns="task_metric", values="score"
|
| 535 |
+
).fillna(0)
|
| 536 |
+
model_table["average"] = model_table.mean(axis=1)
|
| 537 |
+
model_table = model_table.sort_values(by="average", ascending=False)
|
| 538 |
+
model_table = model_table.round(2).reset_index()
|
| 539 |
+
model_table["provider"] = model_table["model"].str.split("/").str[0].apply(fmt_name)
|
| 540 |
+
model_table["model"] = model_table["model"].str.split("/").str[1].apply(fmt_name)
|
| 541 |
+
model_table["rank"] = model_table.index + 1
|
| 542 |
+
model_table = model_table[
|
| 543 |
+
["rank", "provider", "model", "average", *model_table.columns[1:-3]]
|
| 544 |
+
]
|
| 545 |
+
all_tables = {
|
| 546 |
+
"model_table": model_table.to_dict(orient="records"),
|
| 547 |
+
}
|
| 548 |
+
with open("frontend/public/results.json", "w") as f:
|
| 549 |
+
json.dump(all_tables, f, indent=2, ensure_ascii=False)
|
| 550 |
|
| 551 |
|
| 552 |
if __name__ == "__main__":
|
frontend/package-lock.json
CHANGED
|
@@ -12,6 +12,7 @@
|
|
| 12 |
"@testing-library/jest-dom": "^6.6.3",
|
| 13 |
"@testing-library/react": "^16.2.0",
|
| 14 |
"@testing-library/user-event": "^13.5.0",
|
|
|
|
| 15 |
"react": "^19.0.0",
|
| 16 |
"react-dom": "^19.0.0",
|
| 17 |
"react-scripts": "5.0.1",
|
|
@@ -3833,6 +3834,25 @@
|
|
| 3833 |
"integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==",
|
| 3834 |
"license": "MIT"
|
| 3835 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3836 |
"node_modules/@types/resolve": {
|
| 3837 |
"version": "1.17.1",
|
| 3838 |
"resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-1.17.1.tgz",
|
|
@@ -6363,6 +6383,12 @@
|
|
| 6363 |
"integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==",
|
| 6364 |
"license": "MIT"
|
| 6365 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6366 |
"node_modules/damerau-levenshtein": {
|
| 6367 |
"version": "1.0.8",
|
| 6368 |
"resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
|
|
@@ -6689,6 +6715,16 @@
|
|
| 6689 |
"utila": "~0.4"
|
| 6690 |
}
|
| 6691 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6692 |
"node_modules/dom-serializer": {
|
| 6693 |
"version": "1.4.1",
|
| 6694 |
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
|
|
@@ -13559,6 +13595,29 @@
|
|
| 13559 |
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
| 13560 |
}
|
| 13561 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13562 |
"node_modules/process-nextick-args": {
|
| 13563 |
"version": "2.0.1",
|
| 13564 |
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
|
@@ -13996,6 +14055,22 @@
|
|
| 13996 |
}
|
| 13997 |
}
|
| 13998 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13999 |
"node_modules/read-cache": {
|
| 14000 |
"version": "1.0.0",
|
| 14001 |
"resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
|
|
|
|
| 12 |
"@testing-library/jest-dom": "^6.6.3",
|
| 13 |
"@testing-library/react": "^16.2.0",
|
| 14 |
"@testing-library/user-event": "^13.5.0",
|
| 15 |
+
"primereact": "^10.9.3",
|
| 16 |
"react": "^19.0.0",
|
| 17 |
"react-dom": "^19.0.0",
|
| 18 |
"react-scripts": "5.0.1",
|
|
|
|
| 3834 |
"integrity": "sha512-hKormJbkJqzQGhziax5PItDUTMAM9uE2XXQmM37dyd4hVM+5aVl7oVxMVUiVQn2oCQFN/LKCZdvSM0pFRqbSmQ==",
|
| 3835 |
"license": "MIT"
|
| 3836 |
},
|
| 3837 |
+
"node_modules/@types/react": {
|
| 3838 |
+
"version": "19.0.10",
|
| 3839 |
+
"resolved": "https://registry.npmjs.org/@types/react/-/react-19.0.10.tgz",
|
| 3840 |
+
"integrity": "sha512-JuRQ9KXLEjaUNjTWpzuR231Z2WpIwczOkBEIvbHNCzQefFIT0L8IqE6NV6ULLyC1SI/i234JnDoMkfg+RjQj2g==",
|
| 3841 |
+
"license": "MIT",
|
| 3842 |
+
"peer": true,
|
| 3843 |
+
"dependencies": {
|
| 3844 |
+
"csstype": "^3.0.2"
|
| 3845 |
+
}
|
| 3846 |
+
},
|
| 3847 |
+
"node_modules/@types/react-transition-group": {
|
| 3848 |
+
"version": "4.4.12",
|
| 3849 |
+
"resolved": "https://registry.npmjs.org/@types/react-transition-group/-/react-transition-group-4.4.12.tgz",
|
| 3850 |
+
"integrity": "sha512-8TV6R3h2j7a91c+1DXdJi3Syo69zzIZbz7Lg5tORM5LEJG7X/E6a1V3drRyBRZq7/utz7A+c4OgYLiLcYGHG6w==",
|
| 3851 |
+
"license": "MIT",
|
| 3852 |
+
"peerDependencies": {
|
| 3853 |
+
"@types/react": "*"
|
| 3854 |
+
}
|
| 3855 |
+
},
|
| 3856 |
"node_modules/@types/resolve": {
|
| 3857 |
"version": "1.17.1",
|
| 3858 |
"resolved": "https://registry.npmjs.org/@types/resolve/-/resolve-1.17.1.tgz",
|
|
|
|
| 6383 |
"integrity": "sha512-b0tGHbfegbhPJpxpiBPU2sCkigAqtM9O121le6bbOlgyV+NyGyCmVfJ6QW9eRjz8CpNfWEOYBIMIGRYkLwsIYg==",
|
| 6384 |
"license": "MIT"
|
| 6385 |
},
|
| 6386 |
+
"node_modules/csstype": {
|
| 6387 |
+
"version": "3.1.3",
|
| 6388 |
+
"resolved": "https://registry.npmjs.org/csstype/-/csstype-3.1.3.tgz",
|
| 6389 |
+
"integrity": "sha512-M1uQkMl8rQK/szD0LNhtqxIPLpimGm8sOBwU7lLnCpSbTyY3yeU1Vc7l4KT5zT4s/yOxHH5O7tIuuLOCnLADRw==",
|
| 6390 |
+
"license": "MIT"
|
| 6391 |
+
},
|
| 6392 |
"node_modules/damerau-levenshtein": {
|
| 6393 |
"version": "1.0.8",
|
| 6394 |
"resolved": "https://registry.npmjs.org/damerau-levenshtein/-/damerau-levenshtein-1.0.8.tgz",
|
|
|
|
| 6715 |
"utila": "~0.4"
|
| 6716 |
}
|
| 6717 |
},
|
| 6718 |
+
"node_modules/dom-helpers": {
|
| 6719 |
+
"version": "5.2.1",
|
| 6720 |
+
"resolved": "https://registry.npmjs.org/dom-helpers/-/dom-helpers-5.2.1.tgz",
|
| 6721 |
+
"integrity": "sha512-nRCa7CK3VTrM2NmGkIy4cbK7IZlgBE/PYMn55rrXefr5xXDP0LdtfPnblFDoVdcAfslJ7or6iqAUnx0CCGIWQA==",
|
| 6722 |
+
"license": "MIT",
|
| 6723 |
+
"dependencies": {
|
| 6724 |
+
"@babel/runtime": "^7.8.7",
|
| 6725 |
+
"csstype": "^3.0.2"
|
| 6726 |
+
}
|
| 6727 |
+
},
|
| 6728 |
"node_modules/dom-serializer": {
|
| 6729 |
"version": "1.4.1",
|
| 6730 |
"resolved": "https://registry.npmjs.org/dom-serializer/-/dom-serializer-1.4.1.tgz",
|
|
|
|
| 13595 |
"url": "https://github.com/chalk/ansi-styles?sponsor=1"
|
| 13596 |
}
|
| 13597 |
},
|
| 13598 |
+
"node_modules/primereact": {
|
| 13599 |
+
"version": "10.9.3",
|
| 13600 |
+
"resolved": "https://registry.npmjs.org/primereact/-/primereact-10.9.3.tgz",
|
| 13601 |
+
"integrity": "sha512-mvIVw4Ap5HfEviWdnPFY14h9Bf8k4rTMkq7swHsJhNfgyH88osrdbjuDQ66mPwjPgI6DwqN2jMo4zxNNxe7nGQ==",
|
| 13602 |
+
"license": "MIT",
|
| 13603 |
+
"dependencies": {
|
| 13604 |
+
"@types/react-transition-group": "^4.4.1",
|
| 13605 |
+
"react-transition-group": "^4.4.1"
|
| 13606 |
+
},
|
| 13607 |
+
"engines": {
|
| 13608 |
+
"node": ">=14.0.0"
|
| 13609 |
+
},
|
| 13610 |
+
"peerDependencies": {
|
| 13611 |
+
"@types/react": "^17.0.0 || ^18.0.0 || ^19.0.0",
|
| 13612 |
+
"react": "^17.0.0 || ^18.0.0 || ^19.0.0",
|
| 13613 |
+
"react-dom": "^17.0.0 || ^18.0.0 || ^19.0.0"
|
| 13614 |
+
},
|
| 13615 |
+
"peerDependenciesMeta": {
|
| 13616 |
+
"@types/react": {
|
| 13617 |
+
"optional": true
|
| 13618 |
+
}
|
| 13619 |
+
}
|
| 13620 |
+
},
|
| 13621 |
"node_modules/process-nextick-args": {
|
| 13622 |
"version": "2.0.1",
|
| 13623 |
"resolved": "https://registry.npmjs.org/process-nextick-args/-/process-nextick-args-2.0.1.tgz",
|
|
|
|
| 14055 |
}
|
| 14056 |
}
|
| 14057 |
},
|
| 14058 |
+
"node_modules/react-transition-group": {
|
| 14059 |
+
"version": "4.4.5",
|
| 14060 |
+
"resolved": "https://registry.npmjs.org/react-transition-group/-/react-transition-group-4.4.5.tgz",
|
| 14061 |
+
"integrity": "sha512-pZcd1MCJoiKiBR2NRxeCRg13uCXbydPnmB4EOeRrY7480qNWO8IIgQG6zlDkm6uRMsURXPuKq0GWtiM59a5Q6g==",
|
| 14062 |
+
"license": "BSD-3-Clause",
|
| 14063 |
+
"dependencies": {
|
| 14064 |
+
"@babel/runtime": "^7.5.5",
|
| 14065 |
+
"dom-helpers": "^5.0.1",
|
| 14066 |
+
"loose-envify": "^1.4.0",
|
| 14067 |
+
"prop-types": "^15.6.2"
|
| 14068 |
+
},
|
| 14069 |
+
"peerDependencies": {
|
| 14070 |
+
"react": ">=16.6.0",
|
| 14071 |
+
"react-dom": ">=16.6.0"
|
| 14072 |
+
}
|
| 14073 |
+
},
|
| 14074 |
"node_modules/read-cache": {
|
| 14075 |
"version": "1.0.0",
|
| 14076 |
"resolved": "https://registry.npmjs.org/read-cache/-/read-cache-1.0.0.tgz",
|
frontend/package.json
CHANGED
|
@@ -7,6 +7,7 @@
|
|
| 7 |
"@testing-library/jest-dom": "^6.6.3",
|
| 8 |
"@testing-library/react": "^16.2.0",
|
| 9 |
"@testing-library/user-event": "^13.5.0",
|
|
|
|
| 10 |
"react": "^19.0.0",
|
| 11 |
"react-dom": "^19.0.0",
|
| 12 |
"react-scripts": "5.0.1",
|
|
|
|
| 7 |
"@testing-library/jest-dom": "^6.6.3",
|
| 8 |
"@testing-library/react": "^16.2.0",
|
| 9 |
"@testing-library/user-event": "^13.5.0",
|
| 10 |
+
"primereact": "^10.9.3",
|
| 11 |
"react": "^19.0.0",
|
| 12 |
"react-dom": "^19.0.0",
|
| 13 |
"react-scripts": "5.0.1",
|
frontend/public/results.json
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"model_table": [
|
| 3 |
+
{
|
| 4 |
+
"rank": 1,
|
| 5 |
+
"provider": "Google",
|
| 6 |
+
"model": "Gemini 2.0 Flash 001",
|
| 7 |
+
"average": 0.68,
|
| 8 |
+
"classification_accuracy": 0.87,
|
| 9 |
+
"language_modeling_chrf": 0.96,
|
| 10 |
+
"translation_bleu": 0.36,
|
| 11 |
+
"translation_chrf": 0.53
|
| 12 |
+
},
|
| 13 |
+
{
|
| 14 |
+
"rank": 2,
|
| 15 |
+
"provider": "OpenAI",
|
| 16 |
+
"model": "GPT 4o Mini",
|
| 17 |
+
"average": 0.56,
|
| 18 |
+
"classification_accuracy": 0.51,
|
| 19 |
+
"language_modeling_chrf": 0.95,
|
| 20 |
+
"translation_bleu": 0.31,
|
| 21 |
+
"translation_chrf": 0.47
|
| 22 |
+
},
|
| 23 |
+
{
|
| 24 |
+
"rank": 3,
|
| 25 |
+
"provider": "MistralAI",
|
| 26 |
+
"model": "Mistral Small 24b Instruct 2501",
|
| 27 |
+
"average": 0.54,
|
| 28 |
+
"classification_accuracy": 0.57,
|
| 29 |
+
"language_modeling_chrf": 0.9,
|
| 30 |
+
"translation_bleu": 0.26,
|
| 31 |
+
"translation_chrf": 0.42
|
| 32 |
+
},
|
| 33 |
+
{
|
| 34 |
+
"rank": 4,
|
| 35 |
+
"provider": "Meta Llama",
|
| 36 |
+
"model": "Llama 3.3 70b Instruct",
|
| 37 |
+
"average": 0.53,
|
| 38 |
+
"classification_accuracy": 0.51,
|
| 39 |
+
"language_modeling_chrf": 0.94,
|
| 40 |
+
"translation_bleu": 0.25,
|
| 41 |
+
"translation_chrf": 0.43
|
| 42 |
+
}
|
| 43 |
+
]
|
| 44 |
+
}
|
frontend/src/App.css
CHANGED
|
@@ -1,38 +1,39 @@
|
|
| 1 |
.App {
|
| 2 |
text-align: center;
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
pointer-events: none;
|
| 8 |
-
}
|
| 9 |
-
|
| 10 |
-
@media (prefers-reduced-motion: no-preference) {
|
| 11 |
-
.App-logo {
|
| 12 |
-
animation: App-logo-spin infinite 20s linear;
|
| 13 |
-
}
|
| 14 |
}
|
| 15 |
|
| 16 |
.App-header {
|
| 17 |
-
background-color: #
|
| 18 |
-
min-height:
|
| 19 |
display: flex;
|
| 20 |
flex-direction: column;
|
| 21 |
align-items: center;
|
| 22 |
-
justify-content:
|
| 23 |
-
|
| 24 |
-
color:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
}
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
| 29 |
}
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
to {
|
| 36 |
-
transform: rotate(360deg);
|
| 37 |
-
}
|
| 38 |
}
|
|
|
|
| 1 |
.App {
|
| 2 |
text-align: center;
|
| 3 |
+
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Open Sans', 'Helvetica Neue', sans-serif;
|
| 4 |
+
max-width: 1200px;
|
| 5 |
+
margin: 0 auto;
|
| 6 |
+
padding: 0 20px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
}
|
| 8 |
|
| 9 |
.App-header {
|
| 10 |
+
background-color: #ffffff;
|
| 11 |
+
min-height: 50vh;
|
| 12 |
display: flex;
|
| 13 |
flex-direction: column;
|
| 14 |
align-items: center;
|
| 15 |
+
justify-content: flex-start;
|
| 16 |
+
padding-top: 60px;
|
| 17 |
+
color: #333;
|
| 18 |
+
}
|
| 19 |
+
|
| 20 |
+
.emoji-container {
|
| 21 |
+
margin-bottom: 10px;
|
| 22 |
+
}
|
| 23 |
+
|
| 24 |
+
.header-emoji {
|
| 25 |
+
font-size: 70px;
|
| 26 |
}
|
| 27 |
|
| 28 |
+
h1 {
|
| 29 |
+
font-size: 2.5rem;
|
| 30 |
+
margin-bottom: 15px;
|
| 31 |
+
font-weight: 700;
|
| 32 |
+
color: #222;
|
| 33 |
}
|
| 34 |
|
| 35 |
+
p {
|
| 36 |
+
font-size: 1.15rem;
|
| 37 |
+
color: #555;
|
| 38 |
+
margin-top: 0;
|
|
|
|
|
|
|
|
|
|
| 39 |
}
|
frontend/src/App.js
CHANGED
|
@@ -1,22 +1,50 @@
|
|
| 1 |
-
import logo from './logo.svg';
|
| 2 |
import './App.css';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
function App() {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5 |
return (
|
| 6 |
<div className="App">
|
| 7 |
<header className="App-header">
|
| 8 |
-
<
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
</
|
| 12 |
-
<
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
|
|
|
| 20 |
</header>
|
| 21 |
</div>
|
| 22 |
);
|
|
|
|
|
|
|
| 1 |
import './App.css';
|
| 2 |
+
import { useState, useEffect } from 'react';
|
| 3 |
+
import { PrimeReactProvider } from 'primereact/api';
|
| 4 |
+
import "primereact/resources/themes/lara-light-cyan/theme.css";
|
| 5 |
+
import ModelTable from './components/ModelTable';
|
| 6 |
+
|
| 7 |
+
|
| 8 |
|
| 9 |
function App() {
|
| 10 |
+
const [data, setData] = useState(null);
|
| 11 |
+
const [loading, setLoading] = useState(true);
|
| 12 |
+
const [error, setError] = useState(null);
|
| 13 |
+
|
| 14 |
+
useEffect(() => {
|
| 15 |
+
fetch('/results.json')
|
| 16 |
+
.then(response => {
|
| 17 |
+
if (!response.ok) {
|
| 18 |
+
throw new Error('Network response was not ok');
|
| 19 |
+
}
|
| 20 |
+
return response.json();
|
| 21 |
+
})
|
| 22 |
+
.then(jsonData => {
|
| 23 |
+
setData(jsonData);
|
| 24 |
+
setLoading(false);
|
| 25 |
+
})
|
| 26 |
+
.catch(err => {
|
| 27 |
+
setError(err.message);
|
| 28 |
+
setLoading(false);
|
| 29 |
+
});
|
| 30 |
+
}, []);
|
| 31 |
+
|
| 32 |
return (
|
| 33 |
<div className="App">
|
| 34 |
<header className="App-header">
|
| 35 |
+
<div className="emoji-container">
|
| 36 |
+
<span role="img" aria-label="Hugging Face Emoji" className="header-emoji">🌍</span>
|
| 37 |
+
</div>
|
| 38 |
+
<h1>Language AI Monitor</h1>
|
| 39 |
+
<p>Tracking language proficiency of AI models for every language</p>
|
| 40 |
+
|
| 41 |
+
<div className="data-container">
|
| 42 |
+
<PrimeReactProvider>
|
| 43 |
+
{loading && <p>...</p>}
|
| 44 |
+
{error && <p>Error: {error}</p>}
|
| 45 |
+
{data && <ModelTable data={data} />}
|
| 46 |
+
</PrimeReactProvider>
|
| 47 |
+
</div>
|
| 48 |
</header>
|
| 49 |
</div>
|
| 50 |
);
|
frontend/src/components/Header.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React from 'react';
|
| 2 |
+
import { Box, Typography, Tab, Tabs, IconButton } from '@mui/material';
|
| 3 |
+
import DarkModeOutlinedIcon from '@mui/icons-material/DarkModeOutlined';
|
| 4 |
+
import OpenInNewIcon from '@mui/icons-material/OpenInNew';
|
| 5 |
+
|
| 6 |
+
function Header() {
|
| 7 |
+
const [value, setValue] = React.useState(0);
|
| 8 |
+
|
| 9 |
+
const handleChange = (event, newValue) => {
|
| 10 |
+
setValue(newValue);
|
| 11 |
+
};
|
| 12 |
+
|
| 13 |
+
return (
|
| 14 |
+
<Box sx={{ textAlign: 'center', mb: 4 }}>
|
| 15 |
+
<Box sx={{ display: 'flex', justifyContent: 'center', mb: 2 }}>
|
| 16 |
+
<div style={{ fontSize: '50px' }}>🤗</div>
|
| 17 |
+
</Box>
|
| 18 |
+
<Typography variant="h4" component="h1" gutterBottom>
|
| 19 |
+
Open LLM Leaderboard Archived
|
| 20 |
+
</Typography>
|
| 21 |
+
<Typography variant="subtitle1" gutterBottom sx={{ color: 'text.secondary' }}>
|
| 22 |
+
Comparing Large Language Models in an <strong>open</strong> and <strong>reproducible</strong> way
|
| 23 |
+
</Typography>
|
| 24 |
+
|
| 25 |
+
</Box>
|
| 26 |
+
);
|
| 27 |
+
}
|
| 28 |
+
|
| 29 |
+
export default Header;
|
frontend/src/components/Medal.js
ADDED
|
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import React from 'react';
|
| 2 |
+
|
| 3 |
+
const Medal = ({ rank }) => {
|
| 4 |
+
const baseMedalStyle = {
|
| 5 |
+
margin: '0px',
|
| 6 |
+
fontWeight: '900',
|
| 7 |
+
fontStretch: '150%',
|
| 8 |
+
fontFamily: 'Inter, -apple-system, sans-serif',
|
| 9 |
+
width: '24px',
|
| 10 |
+
height: '24px',
|
| 11 |
+
borderRadius: '50%',
|
| 12 |
+
display: 'flex',
|
| 13 |
+
alignItems: 'center',
|
| 14 |
+
justifyContent: 'center',
|
| 15 |
+
fontSize: '0.95rem',
|
| 16 |
+
lineHeight: '1',
|
| 17 |
+
padding: '0px',
|
| 18 |
+
position: 'relative'
|
| 19 |
+
}
|
| 20 |
+
const medalStyle1 = {
|
| 21 |
+
...baseMedalStyle,
|
| 22 |
+
color: 'rgb(181, 138, 27)',
|
| 23 |
+
background:
|
| 24 |
+
'linear-gradient(135deg, rgb(255, 247, 224) 0%, rgb(255, 215, 0) 100%)',
|
| 25 |
+
border: '1px solid rgba(212, 160, 23, 0.35)',
|
| 26 |
+
boxShadow: 'rgba(212, 160, 23, 0.8) 1px 1px 0px'
|
| 27 |
+
}
|
| 28 |
+
const medalStyle2 = {
|
| 29 |
+
color: 'rgb(102, 115, 128)',
|
| 30 |
+
background:
|
| 31 |
+
'linear-gradient(135deg, rgb(255, 255, 255) 0%, rgb(216, 227, 237) 100%)',
|
| 32 |
+
border: '1px solid rgba(124, 139, 153, 0.35)',
|
| 33 |
+
boxShadow: 'rgba(124, 139, 153, 0.8) 1px 1px 0px'
|
| 34 |
+
}
|
| 35 |
+
const medalStyle3 = {
|
| 36 |
+
color: 'rgb(184, 92, 47)',
|
| 37 |
+
background:
|
| 38 |
+
'linear-gradient(135deg, rgb(253, 240, 233) 0%, rgb(255, 188, 140) 100%)',
|
| 39 |
+
border: '1px solid rgba(204, 108, 61, 0.35)',
|
| 40 |
+
boxShadow: 'rgba(204, 108, 61, 0.8) 1px 1px 0px'
|
| 41 |
+
}
|
| 42 |
+
const medalStyle = {
|
| 43 |
+
...baseMedalStyle,
|
| 44 |
+
...(rank < 4 ? [medalStyle1, medalStyle2, medalStyle3][rank - 1] : {})
|
| 45 |
+
}
|
| 46 |
+
return (
|
| 47 |
+
<div
|
| 48 |
+
style={{
|
| 49 |
+
alignItems: 'center',
|
| 50 |
+
justifyContent: 'center',
|
| 51 |
+
display: 'flex'
|
| 52 |
+
}}
|
| 53 |
+
>
|
| 54 |
+
<div style={medalStyle}>{rank}</div>
|
| 55 |
+
</div>
|
| 56 |
+
)
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
export default Medal;
|
frontend/src/components/ModelTable.js
ADDED
|
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import { DataTable } from 'primereact/datatable';
|
| 2 |
+
import { Column } from 'primereact/column';
|
| 3 |
+
import { FilterMatchMode } from 'primereact/api';
|
| 4 |
+
import { MultiSelect } from 'primereact/multiselect';
|
| 5 |
+
import { useState } from 'react';
|
| 6 |
+
import Medal from './Medal';
|
| 7 |
+
const ModelTable = ({ data }) => {
|
| 8 |
+
const [filters, setFilters] = useState({
|
| 9 |
+
"provider": { value: null, matchMode: FilterMatchMode.IN },
|
| 10 |
+
"model": { value: null, matchMode: FilterMatchMode.CONTAINS }
|
| 11 |
+
});
|
| 12 |
+
const table = data.model_table;
|
| 13 |
+
const rankBodyTemplate = (rowData) => {
|
| 14 |
+
return <Medal rank={rowData.rank} />;
|
| 15 |
+
};
|
| 16 |
+
|
| 17 |
+
const providers = [...new Set(table.map(item => item.provider))];
|
| 18 |
+
const providerRowFilterTemplate = (options) => {
|
| 19 |
+
return (
|
| 20 |
+
<MultiSelect
|
| 21 |
+
value={options.value}
|
| 22 |
+
options={providers}
|
| 23 |
+
onChange={(e) => {
|
| 24 |
+
options.filterApplyCallback(e.value);
|
| 25 |
+
setFilters(prevFilters => ({
|
| 26 |
+
...prevFilters,
|
| 27 |
+
provider: { value: e.value, matchMode: FilterMatchMode.IN }
|
| 28 |
+
}));
|
| 29 |
+
}}
|
| 30 |
+
placeholder="All providers"
|
| 31 |
+
/>
|
| 32 |
+
);
|
| 33 |
+
};
|
| 34 |
+
|
| 35 |
+
return (
|
| 36 |
+
<DataTable value={table} header={<>AI Models</>} sortField="average" removableSort filters={filters} filterDisplay="menu">
|
| 37 |
+
<Column field="rank" body={rankBodyTemplate} />
|
| 38 |
+
<Column field="provider" header="Provider" filter filterElement={providerRowFilterTemplate} showFilterMatchModes={false} />
|
| 39 |
+
<Column field="model" header="Model" filter showFilterMatchModes={false} />
|
| 40 |
+
<Column field="average" header="Average" sortable />
|
| 41 |
+
<Column field="translation_chrf" header="Translation" sortable />
|
| 42 |
+
<Column field="classification_accuracy" header="Classification" sortable />
|
| 43 |
+
<Column field="language_modeling_chrf" header="Language Modeling" sortable />
|
| 44 |
+
</DataTable>
|
| 45 |
+
);
|
| 46 |
+
};
|
| 47 |
+
|
| 48 |
+
export default ModelTable;
|
results.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|