handling_the_model_alias_field

#6
by fzoll - opened
Files changed (1) hide show
  1. app/backend/data_engine.py +22 -2
app/backend/data_engine.py CHANGED
@@ -20,7 +20,8 @@ COLUMNS_TYPES = ["markdown",
20
 
21
  ]
22
 
23
- GIT_URL = "https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/main/results/"
 
24
  DATASET_URL = f"{GIT_URL}datasets.json"
25
  MODEL_URL = f"{GIT_URL}models.json"
26
  RESULT_URL = f"{GIT_URL}results.json"
@@ -104,6 +105,25 @@ class DataEngine:
104
 
105
  df_model = pd.DataFrame(models_list)
106
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
108
 
109
  # set dataset default value to 0
@@ -132,7 +152,7 @@ class DataEngine:
132
 
133
  pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
134
 
135
- df = pd.merge(df_model, pivot, on=["model_name", "embd_dim", "embd_dtype"])
136
  df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
137
 
138
  if df.empty:
 
20
 
21
  ]
22
 
23
+ BRANCH = 'main'
24
+ GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
25
  DATASET_URL = f"{GIT_URL}datasets.json"
26
  MODEL_URL = f"{GIT_URL}models.json"
27
  RESULT_URL = f"{GIT_URL}results.json"
 
105
 
106
  df_model = pd.DataFrame(models_list)
107
 
108
+ # Create mapping for model names/aliases
109
+ if 'alias' in df_model.columns:
110
+ # Create a lookup table for alias to model_name mapping
111
+ alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
112
+
113
+ # Add rows for aliases to enable joining
114
+ alias_rows = []
115
+ for _, row in df_model[df_model['alias'].notna()].iterrows():
116
+ alias_row = row.copy()
117
+ alias_row['model_name'] = row['alias']
118
+ alias_rows.append(alias_row)
119
+
120
+ if alias_rows:
121
+ df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
122
+ else:
123
+ df_model_extended = df_model
124
+ else:
125
+ df_model_extended = df_model
126
+
127
  df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
128
 
129
  # set dataset default value to 0
 
152
 
153
  pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
154
 
155
+ df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
156
  df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
157
 
158
  if df.empty: