fzoll commited on
Commit
602cef6
·
1 Parent(s): 9eae046

Handling the model alias field

Browse files
Files changed (1) hide show
  1. app/backend/data_engine.py +24 -8
app/backend/data_engine.py CHANGED
@@ -20,7 +20,8 @@ COLUMNS_TYPES = ["markdown",
20
 
21
  ]
22
 
23
- GIT_URL = "https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/main/results/"
 
24
  DATASET_URL = f"{GIT_URL}datasets.json"
25
  MODEL_URL = f"{GIT_URL}models.json"
26
  RESULT_URL = f"{GIT_URL}results.json"
@@ -104,11 +105,26 @@ class DataEngine:
104
 
105
  df_model = pd.DataFrame(models_list)
106
 
107
- df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
 
109
- # set dataset default value to 0
110
- df = df.pivot(index=["model_name", "embd_dim", "embd_dtype", "group_name"], columns="dataset_name",
111
- values=["ndcg_at_10"]).fillna(0).stack(level=1).reset_index()
112
  # df = pd.merge(df, df_model, on=["model_name"], how="inner")
113
 
114
  # dataset_num_map = {}
@@ -124,15 +140,15 @@ class DataEngine:
124
  }).reset_index()
125
 
126
  pivot = grouped_model.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="group_name",
127
- values=["ndcg_at_10"]).round(2).fillna(0)
128
 
129
  # Rename columns
130
  pivot.columns = list(
131
  map(lambda x: f"{x[1].capitalize()} Average" if x[1] != 'text' else f"Average", pivot.columns))
132
 
133
- pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10").fillna(0)
134
 
135
- df = pd.merge(df_model, pivot, on=["model_name", "embd_dim", "embd_dtype"])
136
  df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
137
 
138
  if df.empty:
 
20
 
21
  ]
22
 
23
+ BRANCH = 'updating_the_results'
24
+ GIT_URL = f"https://raw.githubusercontent.com/embedding-benchmark/rteb/refs/heads/{BRANCH}/results/"
25
  DATASET_URL = f"{GIT_URL}datasets.json"
26
  MODEL_URL = f"{GIT_URL}models.json"
27
  RESULT_URL = f"{GIT_URL}results.json"
 
105
 
106
  df_model = pd.DataFrame(models_list)
107
 
108
+ # Create mapping for model names/aliases
109
+ if 'alias' in df_model.columns:
110
+ # Create a lookup table for alias to model_name mapping
111
+ alias_mapping = df_model[df_model['alias'].notna()].set_index('alias')['model_name'].to_dict()
112
+
113
+ # Add rows for aliases to enable joining
114
+ alias_rows = []
115
+ for _, row in df_model[df_model['alias'].notna()].iterrows():
116
+ alias_row = row.copy()
117
+ alias_row['model_name'] = row['alias']
118
+ alias_rows.append(alias_row)
119
+
120
+ if alias_rows:
121
+ df_model_extended = pd.concat([df_model, pd.DataFrame(alias_rows)], ignore_index=True)
122
+ else:
123
+ df_model_extended = df_model
124
+ else:
125
+ df_model_extended = df_model
126
 
127
+ df = pd.merge(df_result, df_dataset, on=["dataset_name"], how="inner")
 
 
128
  # df = pd.merge(df, df_model, on=["model_name"], how="inner")
129
 
130
  # dataset_num_map = {}
 
140
  }).reset_index()
141
 
142
  pivot = grouped_model.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="group_name",
143
+ values=["ndcg_at_10"]).round(2)
144
 
145
  # Rename columns
146
  pivot.columns = list(
147
  map(lambda x: f"{x[1].capitalize()} Average" if x[1] != 'text' else f"Average", pivot.columns))
148
 
149
+ pivot_dataset = df_result.pivot(index=["model_name", "embd_dim", "embd_dtype"], columns="dataset_name", values="ndcg_at_10")
150
 
151
+ df = pd.merge(df_model_extended, pivot, on=["model_name", "embd_dim", "embd_dtype"])
152
  df = pd.merge(df, pivot_dataset, on=["model_name", "embd_dim", "embd_dtype"])
153
 
154
  if df.empty: