Spaces:
Running
Running
lixuejing
commited on
Commit
·
3a0ece1
1
Parent(s):
2d0b0d7
update
Browse files- src/populate.py +1 -8
src/populate.py
CHANGED
@@ -11,22 +11,15 @@ from src.leaderboard.filter_models import filter_models_flags
|
|
11 |
|
12 |
def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,cols: list, benchmark_cols: list) -> pd.DataFrame:
|
13 |
"""Creates a dataframe from all the individual experiment results"""
|
14 |
-
print("results_path", "requests_path", "dynamic_path")
|
15 |
-
print(results_path, requests_path, dynamic_path)
|
16 |
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
17 |
-
print("raw_data", raw_data)
|
18 |
for v in raw_data:
|
19 |
print(v.to_dict())
|
20 |
all_data_json = [v.to_dict() for v in raw_data]
|
21 |
-
print("all_data_json init", all_data_json)
|
22 |
#all_data_json.append(baseline_row)
|
23 |
filter_models_flags(all_data_json)
|
24 |
-
print("all_data_json", all_data_json)
|
25 |
df = pd.DataFrame.from_records(all_data_json)
|
26 |
-
print("df", df)
|
27 |
print("AutoEvalColumn.average.name",AutoEvalColumn.average.name)
|
28 |
-
|
29 |
-
print("cols", cols)
|
30 |
df = df[cols].round(decimals=2)
|
31 |
|
32 |
# filter out if any of the benchmarks have not been produced
|
|
|
11 |
|
12 |
def get_leaderboard_df(results_path: str, requests_path: str, dynamic_path: str,cols: list, benchmark_cols: list) -> pd.DataFrame:
|
13 |
"""Creates a dataframe from all the individual experiment results"""
|
|
|
|
|
14 |
raw_data = get_raw_eval_results(results_path, requests_path, dynamic_path)
|
|
|
15 |
for v in raw_data:
|
16 |
print(v.to_dict())
|
17 |
all_data_json = [v.to_dict() for v in raw_data]
|
|
|
18 |
#all_data_json.append(baseline_row)
|
19 |
filter_models_flags(all_data_json)
|
|
|
20 |
df = pd.DataFrame.from_records(all_data_json)
|
|
|
21 |
print("AutoEvalColumn.average.name",AutoEvalColumn.average.name)
|
22 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
|
|
23 |
df = df[cols].round(decimals=2)
|
24 |
|
25 |
# filter out if any of the benchmarks have not been produced
|