Andrea Seveso commited on
Commit
fd6f23a
·
1 Parent(s): 4dadd44

Remove weight type

Browse files
Files changed (1) hide show
  1. src/leaderboard/read_evals.py +34 -33
src/leaderboard/read_evals.py CHANGED
@@ -1,37 +1,33 @@
1
- print("--- CONFIRMED: Running the modified version of read_evals.py ---")
2
-
3
- import glob
4
- import json
5
- import math
6
- import os
7
- from dataclasses import dataclass
8
-
9
- import dateutil
10
- import numpy as np
11
-
12
- from src.display.formatting import make_clickable_model
13
- from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision, WeightType
14
  from src.submission.check_validity import is_model_on_hub
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  @dataclass
18
  class EvalResult:
19
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
20
  """
21
- eval_name: str # org_model_precision (uid)
22
- full_model: str # org/model (path on hub)
23
- org: str
24
  model: str
25
- revision: str # commit hash, "" if main
26
  results: dict
27
  precision: Precision = Precision.Unknown
28
- model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
29
- weight_type: WeightType = WeightType.Original # Original or Adapter
30
- architecture: str = "Unknown"
31
  license: str = "?"
32
  likes: int = 0
33
  num_params: int = 0
34
- date: str = "" # submission date of request file
35
  still_on_hub: bool = False
36
 
37
  @classmethod
@@ -46,7 +42,8 @@ class EvalResult:
46
  precision = Precision.from_str(config.get("model_dtype"))
47
 
48
  # Get model and org
49
- org_and_model = config.get("model_name", config.get("model_args", None))
 
50
  org_and_model = org_and_model.split("/", 1)
51
 
52
  if len(org_and_model) == 1:
@@ -74,7 +71,8 @@ class EvalResult:
74
  task = task.value
75
 
76
  # We average all scores of a given metric (not all metrics are present in all files)
77
- accs = np.array([v.get(task.metric, None) for k, v in data["results"].items() if task.benchmark == k])
 
78
  if accs.size == 0 or any([acc is None for acc in accs]):
79
  continue
80
 
@@ -87,37 +85,38 @@ class EvalResult:
87
  org=org,
88
  model=model,
89
  results=results,
90
- precision=precision,
91
- revision= config.get("model_sha", ""),
92
  still_on_hub=still_on_hub,
93
  architecture=architecture
94
  )
95
 
96
  def update_with_request_file(self, requests_path):
97
  """Finds the relevant request file for the current model and updates info with it"""
98
- request_file = get_request_file_for_model(requests_path, self.full_model, self.precision.value.name)
 
99
 
100
  try:
101
  with open(request_file, "r") as f:
102
  request = json.load(f)
103
  self.model_type = ModelType.from_str(request.get("model_type", ""))
104
- self.weight_type = WeightType[request.get("weight_type", "Original")]
105
  self.license = request.get("license", "?")
106
  self.likes = request.get("likes", 0)
107
  self.num_params = request.get("params", 0)
108
  self.date = request.get("submitted_time", "")
109
  except Exception:
110
- print(f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
 
111
 
112
  def to_dict(self):
113
  """Converts the Eval Result to a dict compatible with our dataframe display"""
114
- average = sum([v for v in self.results.values() if v is not None]) / len(Tasks)
 
115
  data_dict = {
116
  "eval_name": self.eval_name, # not a column, just a save name,
117
  AutoEvalColumn.precision.name: self.precision.value.name,
118
  AutoEvalColumn.model_type.name: self.model_type.value.name,
119
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
120
- AutoEvalColumn.weight_type.name: self.weight_type.value.name,
121
  AutoEvalColumn.architecture.name: self.architecture,
122
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
123
  AutoEvalColumn.revision.name: self.revision,
@@ -167,7 +166,8 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
167
 
168
  # Sort the files by date
169
  try:
170
- files.sort(key=lambda x: x.removesuffix(".json").removeprefix("results_")[:-7])
 
171
  except dateutil.parser._parser.ParserError:
172
  files = [files[-1]]
173
 
@@ -183,14 +183,15 @@ def get_raw_eval_results(results_path: str, requests_path: str) -> list[EvalResu
183
  # Store results of same eval together
184
  eval_name = eval_result.eval_name
185
  if eval_name in eval_results.keys():
186
- eval_results[eval_name].results.update({k: v for k, v in eval_result.results.items() if v is not None})
 
187
  else:
188
  eval_results[eval_name] = eval_result
189
 
190
  results = []
191
  for v in eval_results.values():
192
  try:
193
- v.to_dict() # we test if the dict version is complete
194
  results.append(v)
195
  except Exception as e:
196
  print(f"--- DEBUG: SKIPPING RESULT FILE. ERROR IS: ---")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from src.submission.check_validity import is_model_on_hub
2
+ from src.display.utils import AutoEvalColumn, ModelType, Tasks, Precision
3
+ from src.display.formatting import make_clickable_model
4
+ import numpy as np
5
+ import dateutil
6
+ from dataclasses import dataclass
7
+ import os
8
+ import math
9
+ import json
10
+ import glob
11
+ print("--- CONFIRMED: Running the modified version of read_evals.py ---")
12
 
13
 
14
  @dataclass
15
  class EvalResult:
16
  """Represents one full evaluation. Built from a combination of the result and request file for a given run.
17
  """
18
+ eval_name: str # org_model_precision (uid)
19
+ full_model: str # org/model (path on hub)
20
+ org: str
21
  model: str
22
+ revision: str # commit hash, "" if main
23
  results: dict
24
  precision: Precision = Precision.Unknown
25
+ model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
26
+ architecture: str = "Unknown"
 
27
  license: str = "?"
28
  likes: int = 0
29
  num_params: int = 0
30
+ date: str = "" # submission date of request file
31
  still_on_hub: bool = False
32
 
33
  @classmethod
 
42
  precision = Precision.from_str(config.get("model_dtype"))
43
 
44
  # Get model and org
45
+ org_and_model = config.get(
46
+ "model_name", config.get("model_args", None))
47
  org_and_model = org_and_model.split("/", 1)
48
 
49
  if len(org_and_model) == 1:
 
71
  task = task.value
72
 
73
  # We average all scores of a given metric (not all metrics are present in all files)
74
+ accs = np.array([v.get(task.metric, None)
75
+ for k, v in data["results"].items() if task.benchmark == k])
76
  if accs.size == 0 or any([acc is None for acc in accs]):
77
  continue
78
 
 
85
  org=org,
86
  model=model,
87
  results=results,
88
+ precision=precision,
89
+ revision=config.get("model_sha", ""),
90
  still_on_hub=still_on_hub,
91
  architecture=architecture
92
  )
93
 
94
  def update_with_request_file(self, requests_path):
95
  """Finds the relevant request file for the current model and updates info with it"""
96
+ request_file = get_request_file_for_model(
97
+ requests_path, self.full_model, self.precision.value.name)
98
 
99
  try:
100
  with open(request_file, "r") as f:
101
  request = json.load(f)
102
  self.model_type = ModelType.from_str(request.get("model_type", ""))
 
103
  self.license = request.get("license", "?")
104
  self.likes = request.get("likes", 0)
105
  self.num_params = request.get("params", 0)
106
  self.date = request.get("submitted_time", "")
107
  except Exception:
108
+ print(
109
+ f"Could not find request file for {self.org}/{self.model} with precision {self.precision.value.name}")
110
 
111
  def to_dict(self):
112
  """Converts the Eval Result to a dict compatible with our dataframe display"""
113
+ average = sum([v for v in self.results.values()
114
+ if v is not None]) / len(Tasks)
115
  data_dict = {
116
  "eval_name": self.eval_name, # not a column, just a save name,
117
  AutoEvalColumn.precision.name: self.precision.value.name,
118
  AutoEvalColumn.model_type.name: self.model_type.value.name,
119
  AutoEvalColumn.model_type_symbol.name: self.model_type.value.symbol,
 
120
  AutoEvalColumn.architecture.name: self.architecture,
121
  AutoEvalColumn.model.name: make_clickable_model(self.full_model),
122
  AutoEvalColumn.revision.name: self.revision,
 
166
 
167
  # Sort the files by date
168
  try:
169
+ files.sort(key=lambda x: x.removesuffix(
170
+ ".json").removeprefix("results_")[:-7])
171
  except dateutil.parser._parser.ParserError:
172
  files = [files[-1]]
173
 
 
183
  # Store results of same eval together
184
  eval_name = eval_result.eval_name
185
  if eval_name in eval_results.keys():
186
+ eval_results[eval_name].results.update(
187
+ {k: v for k, v in eval_result.results.items() if v is not None})
188
  else:
189
  eval_results[eval_name] = eval_result
190
 
191
  results = []
192
  for v in eval_results.values():
193
  try:
194
+ v.to_dict() # we test if the dict version is complete
195
  results.append(v)
196
  except Exception as e:
197
  print(f"--- DEBUG: SKIPPING RESULT FILE. ERROR IS: ---")