Artur Słomowski commited on
Commit
ee78c3a
·
1 Parent(s): 3c26d4f

columns visibility

Browse files
Files changed (2) hide show
  1. .gitignore +1 -0
  2. app.py +100 -45
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ venv
app.py CHANGED
@@ -23,12 +23,15 @@ def load_data(file_path):
23
  # Function to style the DataFrame
24
  @st.cache_data
25
  def style_dataframe(df: pd.DataFrame):
26
- df[RESULTS_COLUMN_NAME] = df.apply(lambda row: [row[SENTIMENT_COLUMN_NAME], row[UNDERSTANDING_COLUMN_NAME], row[PHRASEOLOGY_COLUMN_NAME]], axis=1)
27
- # Insert the new column after the 'Average' column
28
- cols = list(df.columns)
29
- cols.insert(cols.index(AVERAGE_COLUMN_NAME) + 1, cols.pop(cols.index(RESULTS_COLUMN_NAME)))
30
- df = df[cols]
31
- # Create a color ramp using Seaborn
 
 
 
32
  return df
33
 
34
  def styler(df: pd.DataFrame):
@@ -149,7 +152,7 @@ with tab1:
149
 
150
  # Closing filters in a expander
151
  with st.expander("Filtering benchmark data", icon='🔍'):
152
- # Filtering data, e.g. slider for params, average score, etc.
153
  col_filter_params, col_filter_average, col_filter_sentiment, col_filter_understanding, col_filter_phraseology = st.columns(5, gap='medium')
154
 
155
  with col_filter_params:
@@ -173,21 +176,29 @@ with tab1:
173
  )
174
  ]
175
 
176
- with col_filter_average:
177
- average_slider = st.slider("Average score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
178
- data = data[(data[AVERAGE_COLUMN_NAME] >= average_slider[0]) & (data[AVERAGE_COLUMN_NAME] <= average_slider[1])]
179
-
180
- with col_filter_sentiment:
181
- sentiment_slider = st.slider("Sentiment score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
182
- data = data[(data[SENTIMENT_COLUMN_NAME] >= sentiment_slider[0]) & (data[SENTIMENT_COLUMN_NAME] <= sentiment_slider[1])]
183
-
184
- with col_filter_understanding:
185
- understanding_slider = st.slider("Understanding score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
186
- data = data[(data[UNDERSTANDING_COLUMN_NAME] >= understanding_slider[0]) & (data[UNDERSTANDING_COLUMN_NAME] <= understanding_slider[1])]
187
-
188
- with col_filter_phraseology:
189
- phraseology_slider = st.slider("Phraseology score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
190
- data = data[(data[PHRASEOLOGY_COLUMN_NAME] >= phraseology_slider[0]) & (data[PHRASEOLOGY_COLUMN_NAME] <= phraseology_slider[1])]
 
 
 
 
 
 
 
 
191
 
192
  # Extract unique provider names from the "Model" column
193
  providers = data["Model"].apply(lambda x: x.split('/')[0].lower()).unique()
@@ -195,42 +206,84 @@ with tab1:
195
  # Filter data based on selected providers
196
  data = data[data["Model"].apply(lambda x: x.split('/')[0].lower()).isin(selected_providers)]
197
 
 
 
 
 
 
 
198
  ### Display data
 
199
  styled_df_show = style_dataframe(data)
200
  styled_df_show = styler(styled_df_show)
201
 
202
- st.data_editor(styled_df_show, column_config={
 
203
  "Model": st.column_config.TextColumn("Model", help="Model name", width="large"),
204
- "Params": st.column_config.NumberColumn("Params [B]"),
205
- AVERAGE_COLUMN_NAME: st.column_config.NumberColumn(AVERAGE_COLUMN_NAME),
206
- RESULTS_COLUMN_NAME: st.column_config.BarChartColumn(
 
 
 
 
 
 
 
 
 
207
  "Bar chart of results", help="Summary of the results of each task",
208
- y_min=0,y_max=5,),
209
- SENTIMENT_COLUMN_NAME: st.column_config.NumberColumn(SENTIMENT_COLUMN_NAME, help='Ability to analyze sentiment'),
210
- UNDERSTANDING_COLUMN_NAME: st.column_config.NumberColumn(UNDERSTANDING_COLUMN_NAME, help='Ability to understand language'),
211
- PHRASEOLOGY_COLUMN_NAME: st.column_config.NumberColumn(PHRASEOLOGY_COLUMN_NAME, help='Ability to understand phraseological compounds'),
212
- }, hide_index=True, disabled=True, height=500)
213
-
214
- # Add selection for models and create a bar chart for selected models using the AVERAGE_COLUMN_NAME, SENTIMENT_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME
215
- # Add default selection of 3 best models from AVERAGE_COLUMN_NAME and 1 best model with "Bielik" in Model column
216
- default_models = list(data.sort_values(AVERAGE_COLUMN_NAME, ascending=False)['Model'].head(3))
217
- bielik_model = data[data['Model'].str.contains('Bielik')].sort_values(AVERAGE_COLUMN_NAME, ascending=False)['Model'].iloc[0]
218
- if bielik_model not in default_models:
219
- default_models.append(bielik_model)
220
- selected_models = st.multiselect("Select models to compare", data["Model"].unique(), default=default_models)
221
- selected_data = data[data["Model"].isin(selected_models)]
222
- categories = [AVERAGE_COLUMN_NAME, SENTIMENT_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME]
223
-
224
- if selected_models:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  # Kolorki do wyboru:
226
  # colors = px.colors.sample_colorscale("viridis", len(selected_models)+1)
227
  colors = px.colors.qualitative.G10[:len(selected_models)]
228
 
229
  fig_bars = go.Figure()
230
  for model, color in zip(selected_models, colors):
231
- values = selected_data[selected_data['Model'] == model][categories].values.flatten().tolist()
232
  fig_bars.add_trace(go.Bar(
233
- x=categories,
234
  y=values,
235
  name=model,
236
  marker_color=color
@@ -246,6 +299,8 @@ with tab1:
246
  )
247
  fig_bars.update_yaxes(range=[0, 5.1])
248
  st.plotly_chart(fig_bars)
 
 
249
 
250
 
251
  ### Zakładka 2 --> Opis
 
23
  # Function to style the DataFrame
24
  @st.cache_data
25
  def style_dataframe(df: pd.DataFrame):
26
+ # Sprawdzamy, czy kolumna Average istnieje w ramce danych
27
+ if AVERAGE_COLUMN_NAME in df.columns:
28
+ # Jeśli wszystkie potrzebne kolumny są dostępne, tworzymy Results
29
+ if all(col in df.columns for col in [SENTIMENT_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME]):
30
+ df[RESULTS_COLUMN_NAME] = df.apply(lambda row: [row[SENTIMENT_COLUMN_NAME], row[UNDERSTANDING_COLUMN_NAME], row[PHRASEOLOGY_COLUMN_NAME]], axis=1)
31
+ # Insert the new column after the 'Average' column
32
+ cols = list(df.columns)
33
+ cols.insert(cols.index(AVERAGE_COLUMN_NAME) + 1, cols.pop(cols.index(RESULTS_COLUMN_NAME)))
34
+ df = df[cols]
35
  return df
36
 
37
  def styler(df: pd.DataFrame):
 
152
 
153
  # Closing filters in a expander
154
  with st.expander("Filtering benchmark data", icon='🔍'):
155
+ # Filtry powinny być dostępne tylko dla kolumn, które w danych
156
  col_filter_params, col_filter_average, col_filter_sentiment, col_filter_understanding, col_filter_phraseology = st.columns(5, gap='medium')
157
 
158
  with col_filter_params:
 
176
  )
177
  ]
178
 
179
+ # Sprawdzamy, czy kolumna Average jest dostępna przed dodaniem filtra
180
+ if AVERAGE_COLUMN_NAME in data.columns:
181
+ with col_filter_average:
182
+ average_slider = st.slider("Average score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
183
+ data = data[(data[AVERAGE_COLUMN_NAME] >= average_slider[0]) & (data[AVERAGE_COLUMN_NAME] <= average_slider[1])]
184
+
185
+ # Sprawdzamy, czy kolumna Sentiment jest dostępna przed dodaniem filtra
186
+ if SENTIMENT_COLUMN_NAME in data.columns:
187
+ with col_filter_sentiment:
188
+ sentiment_slider = st.slider("Sentiment score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
189
+ data = data[(data[SENTIMENT_COLUMN_NAME] >= sentiment_slider[0]) & (data[SENTIMENT_COLUMN_NAME] <= sentiment_slider[1])]
190
+
191
+ # Sprawdzamy, czy kolumna Understanding jest dostępna przed dodaniem filtra
192
+ if UNDERSTANDING_COLUMN_NAME in data.columns:
193
+ with col_filter_understanding:
194
+ understanding_slider = st.slider("Understanding score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
195
+ data = data[(data[UNDERSTANDING_COLUMN_NAME] >= understanding_slider[0]) & (data[UNDERSTANDING_COLUMN_NAME] <= understanding_slider[1])]
196
+
197
+ # Sprawdzamy, czy kolumna Phraseology jest dostępna przed dodaniem filtra
198
+ if PHRASEOLOGY_COLUMN_NAME in data.columns:
199
+ with col_filter_phraseology:
200
+ phraseology_slider = st.slider("Phraseology score", step=0.1, min_value=0.0, max_value=5.0, value=(0.0, 5.0))
201
+ data = data[(data[PHRASEOLOGY_COLUMN_NAME] >= phraseology_slider[0]) & (data[PHRASEOLOGY_COLUMN_NAME] <= phraseology_slider[1])]
202
 
203
  # Extract unique provider names from the "Model" column
204
  providers = data["Model"].apply(lambda x: x.split('/')[0].lower()).unique()
 
206
  # Filter data based on selected providers
207
  data = data[data["Model"].apply(lambda x: x.split('/')[0].lower()).isin(selected_providers)]
208
 
209
+ # Dodanie filtra widocznych kolumn - bez kolumny Results, która jest tworzona później
210
+ base_columns = ["Model", "Params", AVERAGE_COLUMN_NAME,
211
+ SENTIMENT_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME]
212
+ default_columns = base_columns.copy()
213
+ selected_columns = st.multiselect("Visible columns", base_columns, default=default_columns)
214
+
215
  ### Display data
216
+ # Przygotowujemy pełne dane ze wszystkimi kolumnami
217
  styled_df_show = style_dataframe(data)
218
  styled_df_show = styler(styled_df_show)
219
 
220
+ # Dostosowanie column_config do wybranych kolumn
221
+ column_config = {
222
  "Model": st.column_config.TextColumn("Model", help="Model name", width="large"),
223
+ }
224
+
225
+ # Ustawiamy konfigurację dla wszystkich kolumn
226
+ if "Params" in styled_df_show.columns:
227
+ column_config["Params"] = st.column_config.NumberColumn("Params [B]") if "Params" in selected_columns else None
228
+
229
+ if AVERAGE_COLUMN_NAME in styled_df_show.columns:
230
+ column_config[AVERAGE_COLUMN_NAME] = st.column_config.NumberColumn(AVERAGE_COLUMN_NAME) if AVERAGE_COLUMN_NAME in selected_columns else None
231
+
232
+ if RESULTS_COLUMN_NAME in styled_df_show.columns:
233
+ # Results pokazujemy tylko jeśli Average jest wybrane
234
+ column_config[RESULTS_COLUMN_NAME] = st.column_config.BarChartColumn(
235
  "Bar chart of results", help="Summary of the results of each task",
236
+ y_min=0, y_max=5) if AVERAGE_COLUMN_NAME in selected_columns else None
237
+
238
+ if SENTIMENT_COLUMN_NAME in styled_df_show.columns:
239
+ column_config[SENTIMENT_COLUMN_NAME] = st.column_config.NumberColumn(SENTIMENT_COLUMN_NAME, help='Ability to analyze sentiment') if SENTIMENT_COLUMN_NAME in selected_columns else None
240
+
241
+ if UNDERSTANDING_COLUMN_NAME in styled_df_show.columns:
242
+ column_config[UNDERSTANDING_COLUMN_NAME] = st.column_config.NumberColumn(UNDERSTANDING_COLUMN_NAME, help='Ability to understand language') if UNDERSTANDING_COLUMN_NAME in selected_columns else None
243
+
244
+ if PHRASEOLOGY_COLUMN_NAME in styled_df_show.columns:
245
+ column_config[PHRASEOLOGY_COLUMN_NAME] = st.column_config.NumberColumn(PHRASEOLOGY_COLUMN_NAME, help='Ability to understand phraseological compounds') if PHRASEOLOGY_COLUMN_NAME in selected_columns else None
246
+
247
+ st.data_editor(styled_df_show, column_config=column_config, hide_index=True, disabled=True, height=500)
248
+
249
+ # Załaduj oryginalne dane do filtrowania i sortowania
250
+ original_data = load_data('data.json')
251
+
252
+ # Używamy oryginalnych danych do wyboru modeli i tworzenia wykresu
253
+ try:
254
+ default_models = list(data.sort_values(AVERAGE_COLUMN_NAME, ascending=False)['Model'].head(3))
255
+ if 'Bielik' in ' '.join(data['Model'].tolist()):
256
+ bielik_model = data[data['Model'].str.contains('Bielik')].sort_values(AVERAGE_COLUMN_NAME, ascending=False)['Model'].iloc[0]
257
+ if bielik_model not in default_models:
258
+ default_models.append(bielik_model)
259
+ except (KeyError, ValueError):
260
+ # Jeśli Average nie jest dostępne, używamy pierwszych 3 modeli bez sortowania
261
+ default_models = list(data['Model'].head(3))
262
+ if 'Bielik' in ' '.join(data['Model'].tolist()):
263
+ try:
264
+ bielik_model = data[data['Model'].str.contains('Bielik')]['Model'].iloc[0]
265
+ if bielik_model not in default_models:
266
+ default_models.append(bielik_model)
267
+ except (IndexError, KeyError):
268
+ pass
269
+
270
+ selected_models = st.multiselect("Select models to compare", original_data["Model"].unique(), default=default_models)
271
+ selected_data = original_data[original_data["Model"].isin(selected_models)]
272
+
273
+ # Dostosowanie kategorii do dostępnych kolumn
274
+ available_categories = [col for col in [AVERAGE_COLUMN_NAME, SENTIMENT_COLUMN_NAME, PHRASEOLOGY_COLUMN_NAME, UNDERSTANDING_COLUMN_NAME]
275
+ if col in selected_data.columns]
276
+
277
+ if selected_models and available_categories:
278
  # Kolorki do wyboru:
279
  # colors = px.colors.sample_colorscale("viridis", len(selected_models)+1)
280
  colors = px.colors.qualitative.G10[:len(selected_models)]
281
 
282
  fig_bars = go.Figure()
283
  for model, color in zip(selected_models, colors):
284
+ values = selected_data[selected_data['Model'] == model][available_categories].values.flatten().tolist()
285
  fig_bars.add_trace(go.Bar(
286
+ x=available_categories,
287
  y=values,
288
  name=model,
289
  marker_color=color
 
299
  )
300
  fig_bars.update_yaxes(range=[0, 5.1])
301
  st.plotly_chart(fig_bars)
302
+ elif selected_models:
303
+ st.warning("Brak dostępnych kategorii do porównania. Upewnij się, że wybrano przynajmniej jedną z kolumn: Average, Sentiment, Phraseology, Language understanding.")
304
 
305
 
306
  ### Zakładka 2 --> Opis