rntc Claude commited on
Commit
17f029a
Β·
1 Parent(s): 25c6939

Replace hardcoded architecture detection with user selection

Browse files

🎯 Key Changes:
- Remove hardcoded architecture patterns (future-proof for new models)
- Add model type dropdown to submission form: πŸ”€ πŸ”½ πŸ”„
- Users now select: Encoder/Decoder/Encoder-Decoder during submission
- Model type stored in request files and displayed in T column

✨ Benefits:
- Works with any new/custom architecture
- User explicitly declares their model type
- No need to maintain hardcoded pattern lists
- Clear UI with emoji + description

πŸ”€ Encoder (BERT-like) < /dev/null | πŸ”½ Decoder (GPT-like) | πŸ”„ Encoder-Decoder (T5-like)

πŸ€– Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <[email protected]>

app.py CHANGED
@@ -159,6 +159,13 @@ with demo:
159
  value="float16",
160
  interactive=True,
161
  )
 
 
 
 
 
 
 
162
 
163
  submit_button = gr.Button("Soumettre l'Γ©valuation")
164
  submission_result = gr.Markdown()
@@ -168,6 +175,7 @@ with demo:
168
  model_name_textbox,
169
  revision_name_textbox,
170
  precision,
 
171
  ],
172
  submission_result,
173
  )
 
159
  value="float16",
160
  interactive=True,
161
  )
162
+ model_type = gr.Dropdown(
163
+ choices=["πŸ”€ Encoder (BERT-like)", "πŸ”½ Decoder (GPT-like)", "πŸ”„ Encoder-Decoder (T5-like)"],
164
+ label="Type d'architecture",
165
+ multiselect=False,
166
+ value="πŸ”€ Encoder (BERT-like)",
167
+ interactive=True,
168
+ )
169
 
170
  submit_button = gr.Button("Soumettre l'Γ©valuation")
171
  submission_result = gr.Markdown()
 
175
  model_name_textbox,
176
  revision_name_textbox,
177
  precision,
178
+ model_type,
179
  ],
180
  submission_result,
181
  )
src/display/utils.py CHANGED
@@ -49,6 +49,7 @@ class EvalQueueColumn: # Queue column
49
  model = ColumnContent("model", "markdown", True)
50
  revision = ColumnContent("revision", "str", True)
51
  precision = ColumnContent("precision", "str", True)
 
52
  status = ColumnContent("status", "str", True)
53
 
54
  ## All the model information that we might need
@@ -60,23 +61,75 @@ class ModelDetails:
60
 
61
 
62
  class ModelType(Enum):
63
- FT = ModelDetails(name="fine-tuned", symbol="πŸ”Ά")
64
- Unknown = ModelDetails(name="", symbol="?")
 
 
65
 
66
  def to_str(self, separator=" "):
67
  return f"{self.value.symbol}{separator}{self.value.name}"
68
 
69
  @staticmethod
70
- def from_str(type):
71
- if "fine-tuned" in type or "πŸ”Ά" in type:
72
- return ModelType.FT
 
 
 
 
73
  return ModelType.Unknown
74
 
75
  @staticmethod
76
  def from_config(config):
77
- """Determine model type from configuration - for NER models, most will be fine-tuned"""
78
- if hasattr(config, 'num_labels') and config.num_labels > 2:
79
- return ModelType.FT # Fine-tuned for NER
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  return ModelType.Unknown
81
 
82
  class WeightType(Enum):
 
49
  model = ColumnContent("model", "markdown", True)
50
  revision = ColumnContent("revision", "str", True)
51
  precision = ColumnContent("precision", "str", True)
52
+ model_type = ColumnContent("model_type", "str", True)
53
  status = ColumnContent("status", "str", True)
54
 
55
  ## All the model information that we might need
 
61
 
62
 
63
  class ModelType(Enum):
64
+ ENCODER = ModelDetails(name="encoder", symbol="πŸ”€") # BERT-like
65
+ DECODER = ModelDetails(name="decoder", symbol="πŸ”½") # GPT-like
66
+ ENCODER_DECODER = ModelDetails(name="encoder-decoder", symbol="πŸ”„") # T5-like
67
+ Unknown = ModelDetails(name="unknown", symbol="?")
68
 
69
  def to_str(self, separator=" "):
70
  return f"{self.value.symbol}{separator}{self.value.name}"
71
 
72
  @staticmethod
73
+ def from_str(type_str):
74
+ if "encoder-decoder" in type_str.lower() or "πŸ”„" in type_str:
75
+ return ModelType.ENCODER_DECODER
76
+ elif "encoder" in type_str.lower() or "πŸ”€" in type_str:
77
+ return ModelType.ENCODER
78
+ elif "decoder" in type_str.lower() or "πŸ”½" in type_str:
79
+ return ModelType.DECODER
80
  return ModelType.Unknown
81
 
82
  @staticmethod
83
  def from_config(config):
84
+ """Detect model architecture type from config"""
85
+ if hasattr(config, 'model_type'):
86
+ model_type = config.model_type.lower()
87
+
88
+ # Encoder-decoder models
89
+ if model_type in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot', 'bigbird_pegasus']:
90
+ return ModelType.ENCODER_DECODER
91
+
92
+ # Decoder-only models (GPT-like)
93
+ elif model_type in ['gpt', 'gpt2', 'gpt_neo', 'gpt_neox', 'gptj', 'bloom', 'llama', 'mistral', 'qwen']:
94
+ return ModelType.DECODER
95
+
96
+ # Encoder-only models (BERT-like)
97
+ elif model_type in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert']:
98
+ return ModelType.ENCODER
99
+
100
+ # Fallback: detect from architecture class name
101
+ if hasattr(config, 'architectures') and config.architectures:
102
+ arch_name = config.architectures[0].lower()
103
+
104
+ if any(name in arch_name for name in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot']):
105
+ return ModelType.ENCODER_DECODER
106
+ elif any(name in arch_name for name in ['gpt', 'bloom', 'llama', 'mistral', 'qwen']):
107
+ return ModelType.DECODER
108
+ elif any(name in arch_name for name in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert']):
109
+ return ModelType.ENCODER
110
+
111
+ return ModelType.Unknown
112
+
113
+ @staticmethod
114
+ def from_architecture(architecture):
115
+ """Detect model type from architecture string"""
116
+ if not architecture or architecture == "?":
117
+ return ModelType.Unknown
118
+
119
+ arch_lower = architecture.lower()
120
+
121
+ # Encoder-decoder patterns
122
+ if any(pattern in arch_lower for pattern in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot']):
123
+ return ModelType.ENCODER_DECODER
124
+
125
+ # Decoder patterns (GPT-like)
126
+ elif any(pattern in arch_lower for pattern in ['gpt', 'bloom', 'llama', 'mistral', 'qwen', 'causal']):
127
+ return ModelType.DECODER
128
+
129
+ # Encoder patterns (BERT-like)
130
+ elif any(pattern in arch_lower for pattern in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert', 'formaskedlm', 'fortokenclassification', 'forsequenceclassification']):
131
+ return ModelType.ENCODER
132
+
133
  return ModelType.Unknown
134
 
135
  class WeightType(Enum):
src/leaderboard/read_evals.py CHANGED
@@ -114,7 +114,18 @@ class EvalResult:
114
  try:
115
  with open(request_file, "r") as f:
116
  request = json.load(f)
117
- self.model_type = ModelType.from_str(request.get("model_type", ""))
 
 
 
 
 
 
 
 
 
 
 
118
  self.weight_type = WeightType[request.get("weight_type", "Original")]
119
  self.license = request.get("license", "?")
120
  self.likes = request.get("likes", 0)
 
114
  try:
115
  with open(request_file, "r") as f:
116
  request = json.load(f)
117
+
118
+ # Convert emoji symbol to ModelType
119
+ model_type_symbol = request.get("model_type", "?")
120
+ if model_type_symbol == "πŸ”€":
121
+ self.model_type = ModelType.ENCODER
122
+ elif model_type_symbol == "πŸ”½":
123
+ self.model_type = ModelType.DECODER
124
+ elif model_type_symbol == "πŸ”„":
125
+ self.model_type = ModelType.ENCODER_DECODER
126
+ else:
127
+ self.model_type = ModelType.Unknown
128
+
129
  self.weight_type = WeightType[request.get("weight_type", "Original")]
130
  self.license = request.get("license", "?")
131
  self.likes = request.get("likes", 0)
src/submission/submit.py CHANGED
@@ -18,6 +18,7 @@ def add_new_eval(
18
  model: str,
19
  revision: str,
20
  precision: str,
 
21
  ):
22
  global REQUESTED_MODELS
23
  global USERS_TO_SUBMISSION_DATES
@@ -31,6 +32,8 @@ def add_new_eval(
31
  model_path = model.split("/")[1]
32
 
33
  precision = precision.split(" ")[0]
 
 
34
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
35
 
36
  # Does the model actually exist?
@@ -71,6 +74,7 @@ def add_new_eval(
71
  "model": model,
72
  "revision": revision,
73
  "precision": precision,
 
74
  "status": "PENDING",
75
  "submitted_time": current_time,
76
  "likes": model_info.likes,
 
18
  model: str,
19
  revision: str,
20
  precision: str,
21
+ model_type: str,
22
  ):
23
  global REQUESTED_MODELS
24
  global USERS_TO_SUBMISSION_DATES
 
32
  model_path = model.split("/")[1]
33
 
34
  precision = precision.split(" ")[0]
35
+ # Extract just the emoji from model_type (e.g., "πŸ”€ Encoder (BERT-like)" -> "πŸ”€")
36
+ model_type_symbol = model_type.split(" ")[0] if model_type else "?"
37
  current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
38
 
39
  # Does the model actually exist?
 
74
  "model": model,
75
  "revision": revision,
76
  "precision": precision,
77
+ "model_type": model_type_symbol,
78
  "status": "PENDING",
79
  "submitted_time": current_time,
80
  "likes": model_info.likes,