ai-forever commited on
Commit
3cf227f
·
verified ·
1 Parent(s): 8090724

Add/update results for RuadaptQwen2.5-32B-Instruct (version 1.34.1, guid 3ffd4582172e4dd08a6df3a2bc7b0a70)

Browse files
Files changed (1) hide show
  1. results.json +79 -0
results.json CHANGED
@@ -241,6 +241,85 @@
241
  "n_questions": 600,
242
  "submit_timestamp": ""
243
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  }
245
  }
246
  },
 
241
  "n_questions": 600,
242
  "submit_timestamp": ""
243
  }
244
+ },
245
+ "3ffd4582172e4dd08a6df3a2bc7b0a70": {
246
+ "model_name": "RuadaptQwen2.5-32B-Instruct",
247
+ "timestamp": "2025-07-03T14:00:09",
248
+ "config": {
249
+ "embedding_model": "FRIDA_2",
250
+ "retriever_type": "mmr",
251
+ "retrieval_config": {}
252
+ },
253
+ "metrics": {
254
+ "simple": {
255
+ "retrieval": {
256
+ "hit_rate": 0.9,
257
+ "mrr": 0.835867724867725,
258
+ "precision": 0.128
259
+ },
260
+ "generation": {
261
+ "rouge1": 0.1989593421164771,
262
+ "rougeL": 0.1989593421164771
263
+ }
264
+ },
265
+ "cond": {
266
+ "retrieval": {
267
+ "hit_rate": 0.9066666666666666,
268
+ "mrr": 0.8335555555555555,
269
+ "precision": 0.14066666666666666
270
+ },
271
+ "generation": {
272
+ "rouge1": 0.30769111937570354,
273
+ "rougeL": 0.30769111937570354
274
+ }
275
+ },
276
+ "set": {
277
+ "retrieval": {
278
+ "hit_rate": 0.9,
279
+ "mrr": 0.7952301587301587,
280
+ "precision": 0.1293333333333333
281
+ },
282
+ "generation": {
283
+ "rouge1": 0.13627692051127355,
284
+ "rougeL": 0.11183016718646731
285
+ }
286
+ },
287
+ "mh": {
288
+ "retrieval": {
289
+ "hit_rate": 0.9466666666666667,
290
+ "mrr": 0.8368492063492063,
291
+ "precision": 0.1333333333333333
292
+ },
293
+ "generation": {
294
+ "rouge1": 0.3335992099545257,
295
+ "rougeL": 0.3335992099545257
296
+ }
297
+ },
298
+ "overall": {
299
+ "retrieval": {
300
+ "hit_rate": 0.9133333333333333,
301
+ "mrr": 0.8253756613756614,
302
+ "precision": 0.13283333333333333
303
+ },
304
+ "generation": {
305
+ "rouge1": 0.244131647989495,
306
+ "rougeL": 0.2380199596582934
307
+ }
308
+ },
309
+ "judge": {
310
+ "judge_completeness_score": 0.7183333333333334,
311
+ "judge_cons_w_real_world_score": 0.9916666666666667,
312
+ "judge_correctness_score": 1.32,
313
+ "judge_factual_accuracy_score": 0.8133333333333334,
314
+ "judge_fluff_score": 1.18,
315
+ "judge_pres_details_score": 0.855,
316
+ "judge_total_score": 0.9797222222222223
317
+ }
318
+ },
319
+ "metadata": {
320
+ "n_questions": 600,
321
+ "submit_timestamp": ""
322
+ }
323
  }
324
  }
325
  },