Spaces:
Running
Running
Handling the model alias field
Browse files- app/backend/data_engine.py +24 -8
app/backend/data_engine.py
CHANGED
@@ -20,7 +20,8 @@ COLUMNS_TYPES = ["markdown",
|
|
20 |
|
21 |
]
|
22 |
|
23 |
-
|
|
|
24 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
25 |
MODEL_URL = f"{GIT_URL}models.json"
|
26 |
RESULT_URL = f"{GIT_URL}results.json"
|
@@ -104,11 +105,26 @@ class DataEngine:
|
|
104 |
|
105 |
df_model = pd.DataFrame(models_list)
|
106 |
|
107 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
108 |
|
109 |
-
|
110 |
-
df = df.pivot(index=["model_name", "embd_dim", "embd_dtype", "group_name"], columns="dataset_name",
|
111 |
-
values=["ndcg_at_10"]).fillna(0).stack(level=1).reset_index()
|
112 |
# df = pd.merge(df, df_model, on=["model_name"], how="inner")
|
113 |
|
114 |
# dataset_num_map = {}
|
@@ -124,15 +140,15 @@ class DataEngine:
|
|
124 |
}).reset_index()
|
125 |
|
126 |
pivot = grouped_model.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="group_name",
|
127 |
-
values=["ndcg_at_10"]).round(2)
|
128 |
|
129 |
# Rename columns
|
130 |
pivot.columns = list(
|
131 |
map(lambda x: f"{x[1].capitalize()} Average" if x[1] != 'text' else f"Average", pivot.columns))
|
132 |
|
133 |
-
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10")
|
134 |
|
135 |
-
df = pd.merge(
|
136 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
137 |
|
138 |
if df.empty:
|
|
|
20 |
|
21 |
]
|
22 |
|
23 |
+
BRANCH = 'updating_the_results'
|
24 |
+
GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
|
25 |
DATASET_URL = f"{GIT_URL}datasets.json"
|
26 |
MODEL_URL = f"{GIT_URL}models.json"
|
27 |
RESULT_URL = f"{GIT_URL}results.json"
|
|
|
105 |
|
106 |
df_model = pd.DataFrame(models_list)
|
107 |
|
108 |
+
# Create mapping for model names/aliases
|
109 |
+
if 'alias' in df_model.columns:
|
110 |
+
# Create a lookup table for alias to model_name mapping
|
111 |
+
alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
|
112 |
+
|
113 |
+
# Add rows for aliases to enable joining
|
114 |
+
alias_rows = []
|
115 |
+
for _, row in df_model[df_model['alias'].notna()].iterrows():
|
116 |
+
alias_row = row.copy()
|
117 |
+
alias_row['model_name'] = row['alias']
|
118 |
+
alias_rows.append(alias_row)
|
119 |
+
|
120 |
+
if alias_rows:
|
121 |
+
df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
|
122 |
+
else:
|
123 |
+
df_model_extended = df_model
|
124 |
+
else:
|
125 |
+
df_model_extended = df_model
|
126 |
|
127 |
+
df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
|
|
|
|
|
128 |
# df = pd.merge(df, df_model, on=["model_name"], how="inner")
|
129 |
|
130 |
# dataset_num_map = {}
|
|
|
140 |
}).reset_index()
|
141 |
|
142 |
pivot = grouped_model.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="group_name",
|
143 |
+
values=["ndcg_at_10"]).round(2)
|
144 |
|
145 |
# Rename columns
|
146 |
pivot.columns = list(
|
147 |
map(lambda x: f"{x[1].capitalize()} Average" if x[1] != 'text' else f"Average", pivot.columns))
|
148 |
|
149 |
+
pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10")
|
150 |
|
151 |
+
df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
|
152 |
df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
|
153 |
|
154 |
if df.empty:
|