cwchen-cm commited on
Commit
a10fd08
·
1 Parent(s): 949853a

Initial commit

Browse files
Files changed (2) hide show
  1. app.py +40 -0
  2. results.csv +28 -0
app.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+
3
+ import gradio as gr
4
+ import pandas as pd
5
+
6
+ # CSS for layout styling
7
+ css = """
8
+ table > thead {
9
+ white-space: normal
10
+ }
11
+ table {
12
+ --cell-width-1: 250px
13
+ }
14
+ table > tbody > tr > td:nth-child(2) > div {
15
+ overflow-x: auto
16
+ }
17
+ .filter-checkbox-group {
18
+ max-width: max-content;
19
+ }
20
+ """
21
+
22
+ # Load dataset
23
+ def load_data():
24
+ # load dataset from csv file
25
+ df = pd.read_csv("results.csv")
26
+ return df
27
+
28
+ df = load_data()
29
+
30
+ with gr.Blocks(css=css) as demo:
31
+ gr.Markdown("# In-Context Learning Embedding and Reranker Benchmark (ICLERB) Leaderboard")
32
+ gr.Markdown("## Introduction\nIn-Context Learning Embedding and Rerankers Benchmark (ICLERB) is a benchmark to evaluate embedding and reranking models used to retrieve examples for In-Context Learning (ICL). The methodology is described in this [paper](). ")
33
+ gr.Markdown("## Leaderboard")
34
+ gr.Dataframe(df)
35
+ gr.Markdown("## Replicating results\nThe code used to generate these results will be shared on Github soon.")
36
+ gr.Markdown("## Acknowledgements\nICLERB was developed at [Crossing Minds](https://www.crossingminds.com/iclerb) by:")
37
+ gr.Markdown("- [Marie Al Ghossein](https://www.linkedin.com/in/mariealghossein/)")
38
+ gr.Markdown("- [Emile Contal](https://www.linkedin.com/in/emile-contal-72837652/)")
39
+
40
+ demo.launch()
results.csv ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Organization,Model,Model Size (Parameters),nDCG@10,nDCG@50
2
+ Crossing Minds,cm-rerank-mxbai-rlaif-v0.1,335M,0.7238,0.7225
3
+ BAAI,bge-en-icl,7.1B,0.7192,0.7081
4
+ nvidia,NV-Embed-v2,7.85B,0.7078,0.6998
5
+ Salesforce,SFR-Embedding-2_R,7.1B,0.6925,0.6859
6
+ Alibaba NLP,gte-Qwen2-7B-instruct,7.6B,0.6877,0.6837
7
+ Cohere,embed-english-v3.0,N/A,0.6876,0.6831
8
+ nvidia,NV-Retriever-v1,7.1B,0.6878,0.6829
9
+ dunzhang,stella_en_1.5B_v5,1.5B,0.6885,0.6828
10
+ Alibaba NLP,gte-Qwen2-1.5B-instruct,1.8B,0.6865,0.6825
11
+ mixedbread ai,mxbai-embed-large-v1,335M,0.6787,0.6782
12
+ OpenAI,text-embedding-3-large,N/A,0.6818,0.6774
13
+ mixedbread ai,mxbai-embed-large-v1@384,335M,0.6757,0.6756
14
+ OpenAI,text-embedding-3-small,N/A,0.6787,0.6740
15
+ Linq AI Research,Linq-Embed-Mistral,7.1B,0.6793,0.6731
16
+ Snowflake,snowflake-arctic-embed-s,33M,0.6715,0.6684
17
+ zeta alpha ai,Zeta-Alpha-E5-Mistral,7.1B,0.6704,0.6672
18
+ sentence transformers,sentence-transformers/all-MiniLM-L6-v2,23M,0.6672,0.6665
19
+ Voyage AI,voyage-3-lite,N/A,0.6681,0.6660
20
+ sentence transformers,multi-qa-distilbert-cos-v1,66M,0.6660,0.6657
21
+ sentence transformers,multi-qa-MiniLM-L6-cos-v1,23M,0.6650,0.6652
22
+ sentence transformers,all-mpnet-base-v2,110M,0.6655,0.6650
23
+ sentence transformers,all-MiniLM-L12-v2,33M,0.6651,0.6646
24
+ Snowflake,snowflake-arctic-embed-l,334M,0.6686,0.6641
25
+ sentence transformers,multi-qa-mpnet-base-dot-v1,109M,0.6645,0.6639
26
+ Snowflake,snowflake-arctic-embed-m-v1.5,109M,0.6646,0.6630
27
+ Cohere,rerank-english-v3.0,N/A,0.6679,0.6604
28
+ Voyage AI,rerank-2,N/A,0.6386,0.6432