Update assets/results/aragen_v2_results.json
Browse files
assets/results/aragen_v2_results.json
CHANGED
@@ -3030,6 +3030,36 @@
|
|
3030 |
}
|
3031 |
},
|
3032 |
{
|
3033 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3034 |
}
|
3035 |
]
|
|
|
3030 |
}
|
3031 |
},
|
3032 |
{
|
3033 |
+
"claude-3.5-sonnet Scores": {
|
3034 |
+
"3C3H Scores": {
|
3035 |
+
"Correctness": 0.7532,
|
3036 |
+
"Completeness": 0.703,
|
3037 |
+
"Conciseness": 0.483,
|
3038 |
+
"Helpfulness": 0.7089,
|
3039 |
+
"Honesty": 0.7483,
|
3040 |
+
"Harmlessness": 0.7517,
|
3041 |
+
"3C3H Score": 0.6914
|
3042 |
+
},
|
3043 |
+
"Tasks Scores": {
|
3044 |
+
"Question Answering (QA)": 0.5892,
|
3045 |
+
"Orthographic and Grammatical Analysis": 0.575,
|
3046 |
+
"Safety": 0.8156,
|
3047 |
+
"Reasoning": 0.9607
|
3048 |
+
}
|
3049 |
+
},
|
3050 |
+
"Meta": {
|
3051 |
+
"Model Name": "gpt-5-2025-08-07",
|
3052 |
+
"License": "Proprietary",
|
3053 |
+
"Revision": "UNK",
|
3054 |
+
"Precision": "UNK",
|
3055 |
+
"Params": "UNK",
|
3056 |
+
"Total Entries": 340,
|
3057 |
+
"Successful Entries": 339,
|
3058 |
+
"Failed Entries": 1,
|
3059 |
+
"Success Ratio": 0.9971
|
3060 |
+
}
|
3061 |
+
},
|
3062 |
+
{
|
3063 |
+
"_last_sync_timestamp": "2025-0812T11:24:33.422103"
|
3064 |
}
|
3065 |
]
|