Spaces:
Running
Running
handling_the_model_alias_field
#6
by
fzoll
- opened
- app/backend/data_engine.py +22 -2
app/backend/data_engine.py
CHANGED
@@ -20,7 +20,8 @@ COLUMNS_TYPES = ["markdown",
|
|
20 |
|
21 |
]
|
22 |
|
23 |
-
|
|
|
24 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
25 |
MODEL_URL = f"{GIT_URL}models.json"
|
26 |
RESULT_URL = f"{GIT_URL}results.json"
|
@@ -104,6 +105,25 @@ class DataEngine:
|
|
104 |
|
105 |
df_model = pd.DataFrame(models_list)
|
106 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
107 |
df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
|
108 |
|
109 |
# set dataset default value to 0
|
@@ -132,7 +152,7 @@ class DataEngine:
|
|
132 |
|
133 |
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
|
134 |
|
135 |
-
df = pd.merge(
|
136 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
137 |
|
138 |
if df.empty:
|
|
|
20 |
|
21 |
]
|
22 |
|
23 |
+
BRANCH = 'main'
|
24 |
+
GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
|
25 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
26 |
MODEL_URL = f"{GIT_URL}models.json"
|
27 |
RESULT_URL = f"{GIT_URL}results.json"
|
|
|
105 |
|
106 |
df_model = pd.DataFrame(models_list)
|
107 |
|
108 |
+
# Create mapping for model names/aliases
|
109 |
+
if 'alias' in df_model.columns:
|
110 |
+
# Create a lookup table for alias to model_name mapping
|
111 |
+
alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
|
112 |
+
|
113 |
+
# Add rows for aliases to enable joining
|
114 |
+
alias_rows = []
|
115 |
+
for _, row in df_model[df_model['alias'].notna()].iterrows():
|
116 |
+
alias_row = row.copy()
|
117 |
+
alias_row['model_name'] = row['alias']
|
118 |
+
alias_rows.append(alias_row)
|
119 |
+
|
120 |
+
if alias_rows:
|
121 |
+
df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
|
122 |
+
else:
|
123 |
+
df_model_extended = df_model
|
124 |
+
else:
|
125 |
+
df_model_extended = df_model
|
126 |
+
|
127 |
df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
|
128 |
|
129 |
# set dataset default value to 0
|
|
|
152 |
|
153 |
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
|
154 |
|
155 |
+
df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
|
156 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
157 |
|
158 |
if df.empty:
|