Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
33e7caf
1
Parent(s):
e2369db
External models: Sabia-3 e Llama-3.1-405b
Browse files- external_models_results.json +45 -0
external_models_results.json
CHANGED
|
@@ -241,5 +241,50 @@
|
|
| 241 |
},
|
| 242 |
"result_metrics_average": 0.7777870380406591,
|
| 243 |
"result_metrics_npm": 0.6740728488043128
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 244 |
}
|
| 245 |
]
|
|
|
|
| 241 |
},
|
| 242 |
"result_metrics_average": 0.7777870380406591,
|
| 243 |
"result_metrics_npm": 0.6740728488043128
|
| 244 |
+
},
|
| 245 |
+
{
|
| 246 |
+
"model": "llama_405b_instruct",
|
| 247 |
+
"name": "meta-llama/Meta-Llama-3.1-405B-Instruct (Vertex AI)",
|
| 248 |
+
"link": "https://cloud.google.com/vertex-ai",
|
| 249 |
+
"date": "2024-08-20",
|
| 250 |
+
"status": "full",
|
| 251 |
+
"main_language": "English",
|
| 252 |
+
"model_type": "chat",
|
| 253 |
+
"params": 406.0,
|
| 254 |
+
"result_metrics": {
|
| 255 |
+
"enem_challenge": 0.8523442967109867,
|
| 256 |
+
"bluex": 0.8011126564673157,
|
| 257 |
+
"oab_exams": 0.7640091116173121,
|
| 258 |
+
"assin2_sts": 0.7888441732870783,
|
| 259 |
+
"assin2_rte": 0.6317630318610981,
|
| 260 |
+
"faquad_nli": 0.825063276593557,
|
| 261 |
+
"hatebr_offensive": 0.9073940659389119,
|
| 262 |
+
"portuguese_hate_speech": 0.7191480935512969,
|
| 263 |
+
"tweetsentbr": 0.7821434639106575
|
| 264 |
+
},
|
| 265 |
+
"result_metrics_average": 0.7857580188820238,
|
| 266 |
+
"result_metrics_npm": 0.6584973442501938
|
| 267 |
+
},
|
| 268 |
+
{
|
| 269 |
+
"model": "sabia-3",
|
| 270 |
+
"name": "Sabiá-3",
|
| 271 |
+
"link": "https://www.maritaca.ai/",
|
| 272 |
+
"date": "2024-08-20",
|
| 273 |
+
"status": "full",
|
| 274 |
+
"main_language": "Portuguese",
|
| 275 |
+
"model_type": "proprietary",
|
| 276 |
+
"result_metrics": {
|
| 277 |
+
"enem_challenge": 0.8789363191042687,
|
| 278 |
+
"bluex": 0.7899860917941586,
|
| 279 |
+
"oab_exams": 0.8391799544419134,
|
| 280 |
+
"assin2_sts": 0.8253863689009022,
|
| 281 |
+
"assin2_rte": 0.9477034821619312,
|
| 282 |
+
"faquad_nli": 0.8243848812618203,
|
| 283 |
+
"hatebr_offensive": 0.5519158516393349,
|
| 284 |
+
"portuguese_hate_speech": 0.48273809523809524,
|
| 285 |
+
"tweetsentbr": 0.5632959814986498
|
| 286 |
+
},
|
| 287 |
+
"result_metrics_average": 0.744836336226786,
|
| 288 |
+
"result_metrics_npm": 0.5802643096708316
|
| 289 |
}
|
| 290 |
]
|