karimouda commited on
Commit
b3dd8e6
·
1 Parent(s): a5f68db
Files changed (2) hide show
  1. src/about.py +2 -2
  2. src/leaderboard/read_evals.py +13 -5
src/about.py CHANGED
@@ -12,8 +12,8 @@ class Task:
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
- task0 = Task("anli_r1", "acc", "ANLI")
16
- task1 = Task("logiqa", "acc_norm", "LogiQA")
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
19
  # ---------------------------------------------------
 
12
  # ---------------------------------------------------
13
  class Tasks(Enum):
14
  # task_key in the json file, metric_key in the json file, name to display in the leaderboard
15
+ task0 = Task("speed", "spq", "Speed")
16
+ task1 = Task("contamination", "score", "Contamination")
17
 
18
  NUM_FEWSHOT = 0 # Change with your few shot
19
  # ---------------------------------------------------
src/leaderboard/read_evals.py CHANGED
@@ -60,6 +60,10 @@ class EvalResult:
60
  still_on_hub, _, model_config = is_model_on_hub(
61
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
62
  )
 
 
 
 
63
  architecture = "?"
64
  if model_config is not None:
65
  architectures = getattr(model_config, "architectures", None)
@@ -90,11 +94,10 @@ class EvalResult:
90
  def update_with_request_file(self, requests_path):
91
  """Finds the relevant request file for the current model and updates info with it"""
92
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
93
-
94
  try:
95
  with open(request_file, "r") as f:
96
  request = json.load(f)
97
- print(request_file)
98
  self.model_type = ModelType.from_str(request.get("model_type", ""))
99
  self.weight_type = WeightType[request.get("weight_type", "Original")]
100
  self.license = request.get("license", "?")
@@ -106,7 +109,7 @@ class EvalResult:
106
 
107
  def to_dict(self):
108
  """Converts the Eval Result to a dict compatible with our dataframe display"""
109
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
110
  data_dict = {
111
  "eval_name": self.eval_name, # not a column, just a save name,
112
  AutoEvalColumn.precision.name: self.precision.value.name,
@@ -135,6 +138,7 @@ def get_request_file_for_model(requests_path, model_name, precision):
135
  requests_path,
136
  f"{model_name}_eval_request_*.json",
137
  )
 
138
  request_files = glob.glob(request_files)
139
 
140
  # Select correct request file (precision)
@@ -174,7 +178,7 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
174
  # Creation of result
175
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
176
  eval_result.update_with_request_file(requests_path)
177
-
178
  # Store results of same eval together
179
  eval_name = eval_result.eval_name
180
  if eval_name in eval_results.keys():
@@ -183,13 +187,17 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
183
  eval_results[eval_name] = eval_result
184
 
185
  results = []
186
-
187
  for v in eval_results.values():
188
  try:
189
  print(v.to_dict())
190
  v.to_dict() # we test if the dict version is complete
191
  results.append(v)
192
  except KeyError: # not all eval values present
 
 
 
 
193
  continue
194
 
195
  print(results)
 
60
  still_on_hub, _, model_config = is_model_on_hub(
61
  full_model, config.get("model_sha", "main"), trust_remote_code=True, test_tokenizer=False
62
  )
63
+
64
+ ##make still_on_hub always true for now:
65
+ still_on_hub = True
66
+
67
  architecture = "?"
68
  if model_config is not None:
69
  architectures = getattr(model_config, "architectures", None)
 
94
  def update_with_request_file(self, requests_path):
95
  """Finds the relevant request file for the current model and updates info with it"""
96
  request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
 
97
  try:
98
  with open(request_file, "r") as f:
99
  request = json.load(f)
100
+
101
  self.model_type = ModelType.from_str(request.get("model_type", ""))
102
  self.weight_type = WeightType[request.get("weight_type", "Original")]
103
  self.license = request.get("license", "?")
 
109
 
110
  def to_dict(self):
111
  """Converts the Eval Result to a dict compatible with our dataframe display"""
112
+ average = self.results["average"]
113
  data_dict = {
114
  "eval_name": self.eval_name, # not a column, just a save name,
115
  AutoEvalColumn.precision.name: self.precision.value.name,
 
138
  requests_path,
139
  f"{model_name}_eval_request_*.json",
140
  )
141
+
142
  request_files = glob.glob(request_files)
143
 
144
  # Select correct request file (precision)
 
178
  # Creation of result
179
  eval_result = EvalResult.init_from_json_file(model_result_filepath)
180
  eval_result.update_with_request_file(requests_path)
181
+
182
  # Store results of same eval together
183
  eval_name = eval_result.eval_name
184
  if eval_name in eval_results.keys():
 
187
  eval_results[eval_name] = eval_result
188
 
189
  results = []
190
+ #print(eval_results.values())
191
  for v in eval_results.values():
192
  try:
193
  print(v.to_dict())
194
  v.to_dict() # we test if the dict version is complete
195
  results.append(v)
196
  except KeyError: # not all eval values present
197
+ print("Key error in eval result, skipping")
198
+
199
+ print(v)
200
+ print(v.to_dict())
201
  continue
202
 
203
  print(results)