karimouda commited on
Commit
bcbf716
·
1 Parent(s): c8c4e79

Contamination + main page styling

Browse files
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import gradio as gr
2
- from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  #from huggingface_hub import snapshot_download
@@ -62,25 +62,16 @@ LEADERBOARD_DF = get_leaderboard_df(EVAL_RESULTS_PATH, EVAL_REQUESTS_PATH, COLS,
62
  def init_leaderboard(dataframe):
63
  #if dataframe is None or dataframe.empty:
64
  #raise ValueError("Leaderboard DataFrame is empty or None.")
65
- return Leaderboard(
66
- value=dataframe,
67
- datatype=[c.type for c in fields(AutoEvalColumn)],
68
- select_columns=SelectColumns(
69
- default_selection=[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default],
70
- cant_deselect=[c.name for c in fields(AutoEvalColumn) if c.never_hidden],
71
- label="Select Columns to Display:",
72
- ),
73
- search_columns=[AutoEvalColumn.model.name],
74
- hide_columns=[c.name for c in fields(AutoEvalColumn) if c.hidden],
75
- filter_columns=[
76
-
77
- ColumnFilter(AutoEvalColumn.model_source.name, type="checkboxgroup", label="Model Source"),
78
- ColumnFilter(AutoEvalColumn.model_category.name, type="checkboxgroup", label="Model Category"),
79
-
80
- ],
81
- bool_checkboxgroup_label="Hide models",
82
- interactive=True,
83
- )
84
 
85
 
86
  demo = gr.Blocks(css=custom_css)
 
1
  import gradio as gr
2
+ from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns,SearchColumns
3
  import pandas as pd
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
  #from huggingface_hub import snapshot_download
 
62
  def init_leaderboard(dataframe):
63
  #if dataframe is None or dataframe.empty:
64
  #raise ValueError("Leaderboard DataFrame is empty or None.")
65
+ return gr.Dataframe(
66
+ value=dataframe[[c.name for c in fields(AutoEvalColumn) if c.displayed_by_default]],
67
+ datatype="markdown",
68
+ wrap=True,
69
+ show_fullscreen_button=False,
70
+ interactive=False,
71
+ column_widths=[20,60,40,150,60,70,70],
72
+ max_height=420,
73
+ elem_classes="leaderboard_col_style"
74
+ )
 
 
 
 
 
 
 
 
 
75
 
76
 
77
  demo = gr.Blocks(css=custom_css)
src/about.py CHANGED
@@ -10,7 +10,7 @@ class EvalDimension:
10
  # Select your tasks here
11
  # ---------------------------------------------------
12
  class EvalDimensions(Enum):
13
- d0 = EvalDimension("speed", "Speed (s/q)")
14
  d1 = EvalDimension("contamination_score", "Contamination Score")
15
 
16
  NUM_FEWSHOT = 0 # Change with your few shot
@@ -23,8 +23,10 @@ TITLE = """<img src='https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Lea
23
 
24
  # What does your leaderboard evaluate?
25
  INTRODUCTION_TEXT = """
26
- <h1 style='width: 100%;text-align: center;' id="space-title">The First Comprehensive Leaderboard for Arabic LLMs</h1>
27
- Welcome to the official Leaderboard of the unique meta benchmark Arabic Board Benchmark (ABB), ABB combines 464 questions sampled from 63 Arabic benchmarking datasets on the internet, evaluating 22 categories and 174 sub-categories.
 
 
28
  """
29
 
30
  # Which evaluations are you running? how can people reproduce what you have?
@@ -66,8 +68,17 @@ Make sure you have followed the above steps first.
66
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
67
  """
68
 
69
- CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
70
  CITATION_BUTTON_TEXT = r"""
 
 
 
 
 
 
 
 
 
71
  """
72
 
73
- FOOTER_TEXT = """<div style='display:flex;justify-content:center;align-items:center;'><span style='font-size:40px;font-weight:bold;margin-right:20px;'>Sponsored By</span><a href='https://silma.ai/?ref=abl' target='_blank'><img src='https://blog.silma.ai/wp-content/uploads/2024/10/cropped-silma-logo-box.png' style='height:60px'></a></div>"""
 
10
  # Select your tasks here
11
  # ---------------------------------------------------
12
  class EvalDimensions(Enum):
13
+ d0 = EvalDimension("speed", "Speed (words/sec)")
14
  d1 = EvalDimension("contamination_score", "Contamination Score")
15
 
16
  NUM_FEWSHOT = 0 # Change with your few shot
 
23
 
24
  # What does your leaderboard evaluate?
25
  INTRODUCTION_TEXT = """
26
+ <h1 style='width: 100%;text-align: center;' id="space-title">Arabic Board Benchmark (ABL) is the first comprehensive Leaderboard for Arabic LLMs</h1>
27
+ ABL is the official leaderboard of <a href='https://huggingface.co/datasets/silma-ai/arabic-broad-benchmark' target='_blank'>Arabic Board Benchmark (ABB)</a>. ABB is a compact, yet comprehensive benchmark aiming to evaluate Arabic LLMs from all angels. The benchmark consists of <b>450</b> high quality questions sampled from <b>63</b> Arabic benchmarking datasets, evaluating <b>22 categories</b> some of which are unique to Arabic language such as Diacritization and Dialects. Find more details in the about Tab.
28
+ <br/>
29
+ <br/>
30
  """
31
 
32
  # Which evaluations are you running? how can people reproduce what you have?
 
68
  If everything is done, check you can launch the EleutherAIHarness on your model locally, using the above command without modifications (you can add `--limit` to limit the number of examples per task).
69
  """
70
 
71
+ CITATION_BUTTON_LABEL = "Copy the following snippet to cite the Leaderboard"
72
  CITATION_BUTTON_TEXT = r"""
73
+
74
+ @misc{ABL,
75
+ author = {SILMA AI Team},
76
+ title = {Arabic Broad Leaderboard},
77
+ year = {2025},
78
+ publisher = {SILMA.AI},
79
+ howpublished = "{\url{https://huggingface.co/spaces/silma-ai/Arabic-LLM-Broad-Leaderboard}}"
80
+ }
81
+
82
  """
83
 
84
+ FOOTER_TEXT = """<div style='display:flex;justify-content:center;align-items:center;'><span style='font-size:36px;font-weight:bold;margin-right:20px;'>Sponsored By</span><a href='https://silma.ai/?ref=abl' target='_blank'><img src='https://blog.silma.ai/wp-content/uploads/2024/10/cropped-silma-logo-box.png' style='height:60px'></a></div>"""
src/display/css_html_js.py CHANGED
@@ -47,7 +47,7 @@ custom_css = """
47
  }
48
 
49
  .tab-buttons button {
50
- font-size: 20px;
51
  }
52
 
53
  #scale-logo {
@@ -94,6 +94,14 @@ custom_css = """
94
  #box-filter > .form{
95
  border: 0
96
  }
 
 
 
 
 
 
 
 
97
  """
98
 
99
  get_window_url_params = """
 
47
  }
48
 
49
  .tab-buttons button {
50
+ font-size:20px;
51
  }
52
 
53
  #scale-logo {
 
94
  #box-filter > .form{
95
  border: 0
96
  }
97
+
98
+ .leaderboard_col_style{
99
+
100
+ }
101
+ .leaderboard_col_style th button {
102
+ font-size:14px !important
103
+ }
104
+
105
  """
106
 
107
  get_window_url_params = """
src/display/formatting.py CHANGED
@@ -6,6 +6,11 @@ def make_clickable_model(model_name):
6
  link = f"https://huggingface.co/{model_name}"
7
  return model_hyperlink(link, model_name)
8
 
 
 
 
 
 
9
 
10
  def styled_error(error):
11
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
 
6
  link = f"https://huggingface.co/{model_name}"
7
  return model_hyperlink(link, model_name)
8
 
9
+ def make_contamination_red(contamination_score):
10
+ if contamination_score <=0:
11
+ return f"<div style='background-color:green;padding:5px;color: white; text-align: center;margin:0px' title='Clean model!'>{round((contamination_score))}</div>"
12
+ else:
13
+ return f"<div style='background-color:red;padding:5px;color: white; text-align: center;margin:0px' title='Contaminated model!'>{round((contamination_score),2)}</div>"
14
 
15
  def styled_error(error):
16
  return f"<p style='color: red; font-size: 20px; text-align: center;'>{error}</p>"
src/leaderboard/read_evals.py CHANGED
@@ -7,7 +7,7 @@ from dataclasses import dataclass
7
  import dateutil
8
  import numpy as np
9
 
10
- from src.display.formatting import make_clickable_model
11
  from src.display.utils import AutoEvalColumn, EvalDimensions#, ModelType, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
@@ -135,7 +135,11 @@ class EvalResult:
135
  }
136
 
137
  for eval_dim in EvalDimensions:
138
- data_dict[eval_dim.value.col_name] = self.results[eval_dim.value.metric]
 
 
 
 
139
 
140
  return data_dict
141
 
 
7
  import dateutil
8
  import numpy as np
9
 
10
+ from src.display.formatting import make_clickable_model, make_contamination_red
11
  from src.display.utils import AutoEvalColumn, EvalDimensions#, ModelType, Precision, WeightType
12
  from src.submission.check_validity import is_model_on_hub
13
 
 
135
  }
136
 
137
  for eval_dim in EvalDimensions:
138
+ dimension_name = eval_dim.value.col_name
139
+ dimension_value = self.results[eval_dim.value.metric]
140
+ if dimension_name == "Contamination Score":
141
+ dimension_value = make_contamination_red(dimension_value)
142
+ data_dict[dimension_name] = dimension_value
143
 
144
  return data_dict
145
 
src/populate.py CHANGED
@@ -25,6 +25,8 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
25
  df.insert(0, "Rank", range(1, len(df) + 1))
26
  df = df[cols].round(decimals=2)
27
  print(df)
 
 
28
  return df
29
  else:
30
  return pd.DataFrame(columns=cols)
 
25
  df.insert(0, "Rank", range(1, len(df) + 1))
26
  df = df[cols].round(decimals=2)
27
  print(df)
28
+
29
+
30
  return df
31
  else:
32
  return pd.DataFrame(columns=cols)