Spaces:
Running
Running
File size: 3,804 Bytes
7c691e6 201da5d 7c691e6 201da5d 7c691e6 201da5d 7c691e6 201da5d 56a86ce 201da5d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
import pandas as pd
TYPES = [
"str",
"number",
"number"
]
SWEBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
SWEBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
SWEBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"]
USACO_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
USACO_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
USACO_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"]
COREBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
COREBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
COREBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"]
MLAGENTBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Overall Score",
"Total Cost",
]
MLAGENTBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
MLAGENTBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Accuracy']
NUMERIC_INTERVALS = {
"?": pd.Interval(-1, 0, closed="right"),
"~1.5": pd.Interval(0, 2, closed="right"),
"~3": pd.Interval(2, 4, closed="right"),
"~7": pd.Interval(4, 9, closed="right"),
"~13": pd.Interval(9, 20, closed="right"),
"~35": pd.Interval(20, 45, closed="right"),
"~60": pd.Interval(45, 70, closed="right"),
"70+": pd.Interval(70, 10000, closed="right"),
}
CYBENCH_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
]
CYBENCH_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
CYBENCH_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"]
APPWORLD_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Total Cost",
"Runs",
"Scenario Goal Completion"
]
APPWORLD_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
APPWORLD_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score', "Level 1 Accuracy", "Level 2 Accuracy", "Level 3 Accuracy"]
GAIA_ON_LOAD_COLUMNS = [
"Agent Name",
"Accuracy",
"Level 1 Accuracy",
"Level 2 Accuracy",
"Level 3 Accuracy",
"Total Cost",
"Runs",
]
GAIA_SEARCH_COLUMNS = ['Total Cost', 'Agent Name']
GAIA_HIDE_COLUMNS = ["F1 Score", "AUC", "Precision", "Recall", "benchmark_name", 'Overall Score', 'Vectorization Score', 'Fathomnet Score', 'Feedback Score', 'House Price Score', 'Spaceship Titanic Score', 'AMP Parkinsons Disease Progression Prediction Score', 'CIFAR10 Score', 'IMDB Score'] |