Spaces:

autogenCTF
/

agent_ctf_leaderboard

Running

App Files Files Community

bhys commited on Jun 8, 2024

Commit

2d6e814

verified ·

1 Parent(s): 6d991bb

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -7

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from content import format_error, format_warning, format_log, TITLE, INTRODUCTIO
     CITATION_BUTTON_TEXT, model_hyperlink
 TOKEN = os.environ.get("TOKEN", None)
 OWNER = "autogenCTF"
 DATA_DATASET = f"{OWNER}/CTFAIA"
@@ -78,11 +79,10 @@ def get_dataframe_from_results(eval_results, split):
     df = pd.DataFrame(local_df)
     df = df.sort_values(by=["completion_level"], ascending=False)
-    numeric_cols = [c for c in local_df.column_names if
-                    c in ["success_rate", "completion_level", "expertise", "reasoning", "comprehension"]]
     df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
     df = df.style.format("{:.2%}", subset=numeric_cols)
-    print(type(df))
     return df
@@ -134,6 +134,7 @@ def add_new_eval(
     print("Adding new eval")
     if model.lower() in set(
             [m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
         [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
@@ -155,6 +156,7 @@ def add_new_eval(
     comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
     num = {'all': 0, 1: 0, 2: 0, 3: 0}
     with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
         with open(file_path, 'r') as f:
             for ix, line in enumerate(f):
@@ -171,8 +173,7 @@ def add_new_eval(
                     level = int(gold_results[val_or_test][task_name]["Level"])
                     score = question_scorer(task, gold_results[val_or_test][task_name])
                 except KeyError:
-                    return format_error(
-                        f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
                 scored_file.write(
                     json.dumps({
@@ -200,8 +201,8 @@ def add_new_eval(
                     success_rate['all'] += 1
         for key in LEVELS:
-            success_rate[key] = round_and_pad(success_rate[key] / num[key])
-            completion_level[key] = round_and_pad(completion_level[key] / num[key])
             expertise[key] = round_and_pad(expertise[key] / num[key])
             reasoning[key] = round_and_pad(reasoning[key] / num[key])
             comprehension[key] = round_and_pad(comprehension[key] / num[key])

     CITATION_BUTTON_TEXT, model_hyperlink
 TOKEN = os.environ.get("TOKEN", None)
+print(TOKEN)
 OWNER = "autogenCTF"
 DATA_DATASET = f"{OWNER}/CTFAIA"
     df = pd.DataFrame(local_df)
     df = df.sort_values(by=["completion_level"], ascending=False)
+    numeric_cols = [c for c in local_df.column_names
+                    if c in ["success_rate", "completion_level"]]
     df[numeric_cols] = df[numeric_cols].multiply(100).round(decimals=2)
     df = df.style.format("{:.2%}", subset=numeric_cols)
     return df
     print("Adding new eval")
+    # Check if the combination model/org already exists and prints a warning message if yes
     if model.lower() in set(
             [m.lower() for m in eval_results[dataset_version][val_or_test]["model"]]) and organisation.lower() in set(
         [o.lower() for o in eval_results[dataset_version][val_or_test]["organisation"]]):
     comprehension = {'all': 0, 1: 0, 2: 0, 3: 0}
     num = {'all': 0, 1: 0, 2: 0, 3: 0}
+    # with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
     with open(f"scored/{organisation}_{model}.jsonl", "w") as scored_file:
         with open(file_path, 'r') as f:
             for ix, line in enumerate(f):
                     level = int(gold_results[val_or_test][task_name]["Level"])
                     score = question_scorer(task, gold_results[val_or_test][task_name])
                 except KeyError:
+                    return format_error(f"{task_name} not found in split {val_or_test}. Are you sure you submitted the correct file?")
                 scored_file.write(
                     json.dumps({
                     success_rate['all'] += 1
         for key in LEVELS:
+            success_rate[key] = round_and_pad(success_rate[key] / num[key] / 100)
+            completion_level[key] = round_and_pad(completion_level[key] / num[key] / 1000)
             expertise[key] = round_and_pad(expertise[key] / num[key])
             reasoning[key] = round_and_pad(reasoning[key] / num[key])
             comprehension[key] = round_and_pad(comprehension[key] / num[key])