Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
feat: switch the default metric to ndcg_at_10
Browse files- app.py +2 -2
- tests/src/leaderboard/test_read_evals.py +1 -1
app.py
CHANGED
|
@@ -31,9 +31,9 @@ except Exception:
|
|
| 31 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
| 32 |
|
| 33 |
original_df_qa = get_leaderboard_df(
|
| 34 |
-
raw_data, task='qa', metric='
|
| 35 |
original_df_long_doc = get_leaderboard_df(
|
| 36 |
-
raw_data, task='long-doc', metric='
|
| 37 |
print(f'raw data: {len(raw_data)}')
|
| 38 |
print(f'QA data loaded: {original_df_qa.shape}')
|
| 39 |
print(f'Long-Doc data loaded: {len(original_df_long_doc)}')
|
|
|
|
| 31 |
raw_data = get_raw_eval_results(f"{EVAL_RESULTS_PATH}/AIR-Bench_24.04")
|
| 32 |
|
| 33 |
original_df_qa = get_leaderboard_df(
|
| 34 |
+
raw_data, task='qa', metric='ndcg_at_10')
|
| 35 |
original_df_long_doc = get_leaderboard_df(
|
| 36 |
+
raw_data, task='long-doc', metric='ndcg_at_10')
|
| 37 |
print(f'raw data: {len(raw_data)}')
|
| 38 |
print(f'QA data loaded: {original_df_qa.shape}')
|
| 39 |
print(f'Long-Doc data loaded: {len(original_df_long_doc)}')
|
tests/src/leaderboard/test_read_evals.py
CHANGED
|
@@ -41,7 +41,7 @@ def test_get_raw_eval_results():
|
|
| 41 |
def test_get_leaderboard_df():
|
| 42 |
results_path = cur_fp.parents[2] / "toydata" / "eval_results" / "AIR-Bench_24.04"
|
| 43 |
raw_data = get_raw_eval_results(results_path)
|
| 44 |
-
df = get_leaderboard_df(raw_data, 'qa', '
|
| 45 |
assert df.shape[0] == 4
|
| 46 |
# the results contain only one embedding model
|
| 47 |
# for i in range(4):
|
|
|
|
| 41 |
def test_get_leaderboard_df():
|
| 42 |
results_path = cur_fp.parents[2] / "toydata" / "eval_results" / "AIR-Bench_24.04"
|
| 43 |
raw_data = get_raw_eval_results(results_path)
|
| 44 |
+
df = get_leaderboard_df(raw_data, 'qa', 'ndcg_at_10')
|
| 45 |
assert df.shape[0] == 4
|
| 46 |
# the results contain only one embedding model
|
| 47 |
# for i in range(4):
|