Spaces:
Sleeping
Sleeping
Replace hardcoded architecture detection with user selection
Browse filesπ― Key Changes:
- Remove hardcoded architecture patterns (future-proof for new models)
- Add model type dropdown to submission form: π€ π½ π
- Users now select: Encoder/Decoder/Encoder-Decoder during submission
- Model type stored in request files and displayed in T column
β¨ Benefits:
- Works with any new/custom architecture
- User explicitly declares their model type
- No need to maintain hardcoded pattern lists
- Clear UI with emoji + description
π€ Encoder (BERT-like) < /dev/null | π½ Decoder (GPT-like) | π Encoder-Decoder (T5-like)
π€ Generated with [Claude Code](https://claude.ai/code)
Co-Authored-By: Claude <[email protected]>
- app.py +8 -0
- src/display/utils.py +61 -8
- src/leaderboard/read_evals.py +12 -1
- src/submission/submit.py +4 -0
app.py
CHANGED
@@ -159,6 +159,13 @@ with demo:
|
|
159 |
value="float16",
|
160 |
interactive=True,
|
161 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
162 |
|
163 |
submit_button = gr.Button("Soumettre l'Γ©valuation")
|
164 |
submission_result = gr.Markdown()
|
@@ -168,6 +175,7 @@ with demo:
|
|
168 |
model_name_textbox,
|
169 |
revision_name_textbox,
|
170 |
precision,
|
|
|
171 |
],
|
172 |
submission_result,
|
173 |
)
|
|
|
159 |
value="float16",
|
160 |
interactive=True,
|
161 |
)
|
162 |
+
model_type = gr.Dropdown(
|
163 |
+
choices=["π€ Encoder (BERT-like)", "π½ Decoder (GPT-like)", "π Encoder-Decoder (T5-like)"],
|
164 |
+
label="Type d'architecture",
|
165 |
+
multiselect=False,
|
166 |
+
value="π€ Encoder (BERT-like)",
|
167 |
+
interactive=True,
|
168 |
+
)
|
169 |
|
170 |
submit_button = gr.Button("Soumettre l'Γ©valuation")
|
171 |
submission_result = gr.Markdown()
|
|
|
175 |
model_name_textbox,
|
176 |
revision_name_textbox,
|
177 |
precision,
|
178 |
+
model_type,
|
179 |
],
|
180 |
submission_result,
|
181 |
)
|
src/display/utils.py
CHANGED
@@ -49,6 +49,7 @@ class EvalQueueColumn: # Queue column
|
|
49 |
model = ColumnContent("model", "markdown", True)
|
50 |
revision = ColumnContent("revision", "str", True)
|
51 |
precision = ColumnContent("precision", "str", True)
|
|
|
52 |
status = ColumnContent("status", "str", True)
|
53 |
|
54 |
## All the model information that we might need
|
@@ -60,23 +61,75 @@ class ModelDetails:
|
|
60 |
|
61 |
|
62 |
class ModelType(Enum):
|
63 |
-
|
64 |
-
|
|
|
|
|
65 |
|
66 |
def to_str(self, separator=" "):
|
67 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
68 |
|
69 |
@staticmethod
|
70 |
-
def from_str(
|
71 |
-
if "
|
72 |
-
return ModelType.
|
|
|
|
|
|
|
|
|
73 |
return ModelType.Unknown
|
74 |
|
75 |
@staticmethod
|
76 |
def from_config(config):
|
77 |
-
"""
|
78 |
-
if hasattr(config, '
|
79 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
80 |
return ModelType.Unknown
|
81 |
|
82 |
class WeightType(Enum):
|
|
|
49 |
model = ColumnContent("model", "markdown", True)
|
50 |
revision = ColumnContent("revision", "str", True)
|
51 |
precision = ColumnContent("precision", "str", True)
|
52 |
+
model_type = ColumnContent("model_type", "str", True)
|
53 |
status = ColumnContent("status", "str", True)
|
54 |
|
55 |
## All the model information that we might need
|
|
|
61 |
|
62 |
|
63 |
class ModelType(Enum):
|
64 |
+
ENCODER = ModelDetails(name="encoder", symbol="π€") # BERT-like
|
65 |
+
DECODER = ModelDetails(name="decoder", symbol="π½") # GPT-like
|
66 |
+
ENCODER_DECODER = ModelDetails(name="encoder-decoder", symbol="π") # T5-like
|
67 |
+
Unknown = ModelDetails(name="unknown", symbol="?")
|
68 |
|
69 |
def to_str(self, separator=" "):
|
70 |
return f"{self.value.symbol}{separator}{self.value.name}"
|
71 |
|
72 |
@staticmethod
|
73 |
+
def from_str(type_str):
|
74 |
+
if "encoder-decoder" in type_str.lower() or "π" in type_str:
|
75 |
+
return ModelType.ENCODER_DECODER
|
76 |
+
elif "encoder" in type_str.lower() or "π€" in type_str:
|
77 |
+
return ModelType.ENCODER
|
78 |
+
elif "decoder" in type_str.lower() or "π½" in type_str:
|
79 |
+
return ModelType.DECODER
|
80 |
return ModelType.Unknown
|
81 |
|
82 |
@staticmethod
|
83 |
def from_config(config):
|
84 |
+
"""Detect model architecture type from config"""
|
85 |
+
if hasattr(config, 'model_type'):
|
86 |
+
model_type = config.model_type.lower()
|
87 |
+
|
88 |
+
# Encoder-decoder models
|
89 |
+
if model_type in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot', 'bigbird_pegasus']:
|
90 |
+
return ModelType.ENCODER_DECODER
|
91 |
+
|
92 |
+
# Decoder-only models (GPT-like)
|
93 |
+
elif model_type in ['gpt', 'gpt2', 'gpt_neo', 'gpt_neox', 'gptj', 'bloom', 'llama', 'mistral', 'qwen']:
|
94 |
+
return ModelType.DECODER
|
95 |
+
|
96 |
+
# Encoder-only models (BERT-like)
|
97 |
+
elif model_type in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert']:
|
98 |
+
return ModelType.ENCODER
|
99 |
+
|
100 |
+
# Fallback: detect from architecture class name
|
101 |
+
if hasattr(config, 'architectures') and config.architectures:
|
102 |
+
arch_name = config.architectures[0].lower()
|
103 |
+
|
104 |
+
if any(name in arch_name for name in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot']):
|
105 |
+
return ModelType.ENCODER_DECODER
|
106 |
+
elif any(name in arch_name for name in ['gpt', 'bloom', 'llama', 'mistral', 'qwen']):
|
107 |
+
return ModelType.DECODER
|
108 |
+
elif any(name in arch_name for name in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert']):
|
109 |
+
return ModelType.ENCODER
|
110 |
+
|
111 |
+
return ModelType.Unknown
|
112 |
+
|
113 |
+
@staticmethod
|
114 |
+
def from_architecture(architecture):
|
115 |
+
"""Detect model type from architecture string"""
|
116 |
+
if not architecture or architecture == "?":
|
117 |
+
return ModelType.Unknown
|
118 |
+
|
119 |
+
arch_lower = architecture.lower()
|
120 |
+
|
121 |
+
# Encoder-decoder patterns
|
122 |
+
if any(pattern in arch_lower for pattern in ['t5', 'bart', 'pegasus', 'mbart', 'blenderbot']):
|
123 |
+
return ModelType.ENCODER_DECODER
|
124 |
+
|
125 |
+
# Decoder patterns (GPT-like)
|
126 |
+
elif any(pattern in arch_lower for pattern in ['gpt', 'bloom', 'llama', 'mistral', 'qwen', 'causal']):
|
127 |
+
return ModelType.DECODER
|
128 |
+
|
129 |
+
# Encoder patterns (BERT-like)
|
130 |
+
elif any(pattern in arch_lower for pattern in ['bert', 'roberta', 'camembert', 'distilbert', 'electra', 'deberta', 'albert', 'formaskedlm', 'fortokenclassification', 'forsequenceclassification']):
|
131 |
+
return ModelType.ENCODER
|
132 |
+
|
133 |
return ModelType.Unknown
|
134 |
|
135 |
class WeightType(Enum):
|
src/leaderboard/read_evals.py
CHANGED
@@ -114,7 +114,18 @@ class EvalResult:
|
|
114 |
try:
|
115 |
with open(request_file, "r") as f:
|
116 |
request = json.load(f)
|
117 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
118 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
119 |
self.license = request.get("license", "?")
|
120 |
self.likes = request.get("likes", 0)
|
|
|
114 |
try:
|
115 |
with open(request_file, "r") as f:
|
116 |
request = json.load(f)
|
117 |
+
|
118 |
+
# Convert emoji symbol to ModelType
|
119 |
+
model_type_symbol = request.get("model_type", "?")
|
120 |
+
if model_type_symbol == "π€":
|
121 |
+
self.model_type = ModelType.ENCODER
|
122 |
+
elif model_type_symbol == "π½":
|
123 |
+
self.model_type = ModelType.DECODER
|
124 |
+
elif model_type_symbol == "π":
|
125 |
+
self.model_type = ModelType.ENCODER_DECODER
|
126 |
+
else:
|
127 |
+
self.model_type = ModelType.Unknown
|
128 |
+
|
129 |
self.weight_type = WeightType[request.get("weight_type", "Original")]
|
130 |
self.license = request.get("license", "?")
|
131 |
self.likes = request.get("likes", 0)
|
src/submission/submit.py
CHANGED
@@ -18,6 +18,7 @@ def add_new_eval(
|
|
18 |
model: str,
|
19 |
revision: str,
|
20 |
precision: str,
|
|
|
21 |
):
|
22 |
global REQUESTED_MODELS
|
23 |
global USERS_TO_SUBMISSION_DATES
|
@@ -31,6 +32,8 @@ def add_new_eval(
|
|
31 |
model_path = model.split("/")[1]
|
32 |
|
33 |
precision = precision.split(" ")[0]
|
|
|
|
|
34 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
35 |
|
36 |
# Does the model actually exist?
|
@@ -71,6 +74,7 @@ def add_new_eval(
|
|
71 |
"model": model,
|
72 |
"revision": revision,
|
73 |
"precision": precision,
|
|
|
74 |
"status": "PENDING",
|
75 |
"submitted_time": current_time,
|
76 |
"likes": model_info.likes,
|
|
|
18 |
model: str,
|
19 |
revision: str,
|
20 |
precision: str,
|
21 |
+
model_type: str,
|
22 |
):
|
23 |
global REQUESTED_MODELS
|
24 |
global USERS_TO_SUBMISSION_DATES
|
|
|
32 |
model_path = model.split("/")[1]
|
33 |
|
34 |
precision = precision.split(" ")[0]
|
35 |
+
# Extract just the emoji from model_type (e.g., "π€ Encoder (BERT-like)" -> "π€")
|
36 |
+
model_type_symbol = model_type.split(" ")[0] if model_type else "?"
|
37 |
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
|
38 |
|
39 |
# Does the model actually exist?
|
|
|
74 |
"model": model,
|
75 |
"revision": revision,
|
76 |
"precision": precision,
|
77 |
+
"model_type": model_type_symbol,
|
78 |
"status": "PENDING",
|
79 |
"submitted_time": current_time,
|
80 |
"likes": model_info.likes,
|