ai-forever commited on
Commit
8090724
·
verified ·
1 Parent(s): 2cfe902

Add/update results for Qwen2.5-7B-Instruct (version 1.34.1, guid d6ccf3ae4d874a1e8fd256e07a9189d7)

Browse files
Files changed (1) hide show
  1. results.json +79 -0
results.json CHANGED
@@ -162,6 +162,85 @@
162
  "n_questions": 600,
163
  "submit_timestamp": ""
164
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
  }
167
  },
 
162
  "n_questions": 600,
163
  "submit_timestamp": ""
164
  }
165
+ },
166
+ "d6ccf3ae4d874a1e8fd256e07a9189d7": {
167
+ "model_name": "Qwen2.5-7B-Instruct",
168
+ "timestamp": "2025-07-03T14:00:00",
169
+ "config": {
170
+ "embedding_model": "e5-mistral-7b-instruct_2",
171
+ "retriever_type": "mmr",
172
+ "retrieval_config": {}
173
+ },
174
+ "metrics": {
175
+ "simple": {
176
+ "retrieval": {
177
+ "hit_rate": 0.94,
178
+ "mrr": 0.8339814814814815,
179
+ "precision": 0.16666666666666666
180
+ },
181
+ "generation": {
182
+ "rouge1": 0.12809115762301465,
183
+ "rougeL": 0.12720226873412577
184
+ }
185
+ },
186
+ "cond": {
187
+ "retrieval": {
188
+ "hit_rate": 0.9733333333333334,
189
+ "mrr": 0.8786931216931217,
190
+ "precision": 0.17733333333333332
191
+ },
192
+ "generation": {
193
+ "rouge1": 0.22438283005922466,
194
+ "rougeL": 0.22407979975619435
195
+ }
196
+ },
197
+ "set": {
198
+ "retrieval": {
199
+ "hit_rate": 0.9133333333333333,
200
+ "mrr": 0.8075925925925925,
201
+ "precision": 0.164
202
+ },
203
+ "generation": {
204
+ "rouge1": 0.11787408709850448,
205
+ "rougeL": 0.09943376362370457
206
+ }
207
+ },
208
+ "mh": {
209
+ "retrieval": {
210
+ "hit_rate": 0.9533333333333334,
211
+ "mrr": 0.8364444444444444,
212
+ "precision": 0.156
213
+ },
214
+ "generation": {
215
+ "rouge1": 0.17552305949148053,
216
+ "rougeL": 0.17552305949148053
217
+ }
218
+ },
219
+ "overall": {
220
+ "retrieval": {
221
+ "hit_rate": 0.945,
222
+ "mrr": 0.8391779100529101,
223
+ "precision": 0.16599999999999998
224
+ },
225
+ "generation": {
226
+ "rouge1": 0.16146778356805608,
227
+ "rougeL": 0.15655972290137632
228
+ }
229
+ },
230
+ "judge": {
231
+ "judge_completeness_score": 0.5292153589315526,
232
+ "judge_cons_w_real_world_score": 0.895,
233
+ "judge_correctness_score": 1.255,
234
+ "judge_factual_accuracy_score": 0.71,
235
+ "judge_fluff_score": 0.9266666666666666,
236
+ "judge_pres_details_score": 0.6583333333333333,
237
+ "judge_total_score": 0.8290358931552587
238
+ }
239
+ },
240
+ "metadata": {
241
+ "n_questions": 600,
242
+ "submit_timestamp": ""
243
+ }
244
  }
245
  }
246
  },