Spaces:
Running
Running
v2 update new
#44
by
MINGYISU
- opened
- app.py +2 -3
- results.jsonl +30 -30
- utils.py +46 -61
app.py
CHANGED
@@ -52,11 +52,10 @@ with gr.Blocks() as block:
|
|
52 |
label="Maximum number of parameters (B)",
|
53 |
)
|
54 |
|
55 |
-
task_choices = [col for col in COLUMN_NAMES if col not in BASE_COLS]
|
56 |
with gr.Row():
|
57 |
tasks_select = gr.CheckboxGroup(
|
58 |
-
choices=
|
59 |
-
value=
|
60 |
label="Select tasks to Display",
|
61 |
elem_id="tasks-select"
|
62 |
)
|
|
|
52 |
label="Maximum number of parameters (B)",
|
53 |
)
|
54 |
|
|
|
55 |
with gr.Row():
|
56 |
tasks_select = gr.CheckboxGroup(
|
57 |
+
choices=TASKS_V1 + TASKS_V2,
|
58 |
+
value=TASKS_V1,
|
59 |
label="Select tasks to Display",
|
60 |
elem_id="tasks-select"
|
61 |
)
|
results.jsonl
CHANGED
@@ -1,30 +1,30 @@
|
|
1 |
-
{"Models":"B3","Model Size(B)":8.29,"Data Source":"Self-Reported","Overall":72.0,"
|
2 |
-
{"Models":"CLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":45.4,"
|
3 |
-
{"Models":"LLaVE-0.5B","Model Size(B)":0.894,"Data Source":"Self-Reported","Overall":59.1,"
|
4 |
-
{"Models":"LLaVE-2B","Model Size(B)":1.95,"Data Source":"Self-Reported","Overall":65.2,"
|
5 |
-
{"Models":"LLaVE-7B","Model Size(B)":8.03,"Data Source":"Self-Reported","Overall":70.3,"
|
6 |
-
{"Models":"MM-Embed","Model Size(B)":8.18,"Data Source":"Self-Reported","Overall":50.0,"
|
7 |
-
{"Models":"MMRet-MLLM (FT)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":64.1,"
|
8 |
-
{"Models":"MMRet-MLLM (LLaVA-1.6)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":44.0,"
|
9 |
-
{"Models":"Magiclens","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":27.8,"
|
10 |
-
{"Models":"OpenCLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":47.2,"
|
11 |
-
{"Models":"QQMM-embed","Model Size(B)":8.297,"Data Source":"Self-Reported","Overall":72.175,"
|
12 |
-
{"Models":"UniIR (BLIP_FF)","Model Size(B)":0.247,"Data Source":"TIGER-Lab","Overall":42.8,"
|
13 |
-
{"Models":"UniIR (CLIP_SF)","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":44.7,"
|
14 |
-
{"Models":"UniME(LLaVA-1.6-7B-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"Self-Reported","Overall":66.6,"
|
15 |
-
{"Models":"UniME(LLaVA-OneVision-7B-LoRA-Res336)","Model Size(B)":8.03,"Data Source":"Self-Reported","Overall":70.7,"
|
16 |
-
{"Models":"UniME(Phi-3.5-V-LoRA)","Model Size(B)":4.2,"Data Source":"Self-Reported","Overall":64.2,"
|
17 |
-
{"Models":"VLM2Vec (LLaVA-1.6-LoRA-HighRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","Overall":62.9,"
|
18 |
-
{"Models":"VLM2Vec (LLaVA-1.6-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","Overall":55.0,"
|
19 |
-
{"Models":"VLM2Vec (Phi-3.5-V-FT)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","Overall":55.9,"
|
20 |
-
{"Models":"VLM2Vec (Phi-3.5-V-LoRA)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","Overall":60.1,"
|
21 |
-
{"Models":"VLM2Vec (Qwen2-VL-2B-LoRA-HighRes)","Model Size(B)":2.21,"Data Source":"TIGER-Lab","Overall":59.3,"
|
22 |
-
{"Models":"VLM2Vec (Qwen2-VL-7B-LoRA-HighRes)","Model Size(B)":8.29,"Data Source":"TIGER-Lab","Overall":65.8,"
|
23 |
-
{"Models":"blip2-opt-2.7b","Model Size(B)":3.74,"Data Source":"TIGER-Lab","Overall":25.2,"
|
24 |
-
{"Models":"clip-vit-large-patch14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":37.8,"
|
25 |
-
{"Models":"e5-v","Model Size(B)":8.36,"Data Source":"TIGER-Lab","Overall":13.3,"
|
26 |
-
{"Models":"gme-Qwen2-VL-2B-Instruct","Model Size(B)":2.21,"Data Source":"Self-Reported","Overall":55.8,"
|
27 |
-
{"Models":"mmE5 (w\/ 560K synthetic data)","Model Size(B)":10.6,"Data Source":"Self-Reported","Overall":58.6,"
|
28 |
-
{"Models":"mmE5-mllama-11b-instruct","Model Size(B)":10.6,"Data Source":"Self-Reported","Overall":69.8,"
|
29 |
-
{"Models":"open_clip-ViT-L\/14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","Overall":39.7,"
|
30 |
-
{"Models":"siglip-base-patch16-224","Model Size(B)":0.203,"Data Source":"TIGER-Lab","Overall":34.8,"
|
|
|
1 |
+
{"Models":"B3","Model Size(B)":8.29,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":72.0,"I-CLS":70.0,"I-QA":66.5,"I-RET":74.1,"I-VG":84.6,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/raghavlite\/B3_Qwen2_7B"}
|
2 |
+
{"Models":"CLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":45.4,"I-CLS":55.2,"I-QA":19.7,"I-RET":53.2,"I-VG":62.2,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/doi.org\/10.48550\/arXiv.2103.00020"}
|
3 |
+
{"Models":"LLaVE-0.5B","Model Size(B)":0.894,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":59.1,"I-CLS":57.4,"I-QA":50.3,"I-RET":59.8,"I-VG":82.9,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-0.5B"}
|
4 |
+
{"Models":"LLaVE-2B","Model Size(B)":1.95,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":65.2,"I-CLS":62.1,"I-QA":60.2,"I-RET":65.2,"I-VG":84.9,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-2B"}
|
5 |
+
{"Models":"LLaVE-7B","Model Size(B)":8.03,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":70.3,"I-CLS":65.7,"I-QA":65.4,"I-RET":70.9,"I-VG":91.9,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/zhibinlan\/LLaVE-7B"}
|
6 |
+
{"Models":"MM-Embed","Model Size(B)":8.18,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":50.0,"I-CLS":48.1,"I-QA":32.3,"I-RET":63.8,"I-VG":57.8,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/nvidia\/MM-Embed"}
|
7 |
+
{"Models":"MMRet-MLLM (FT)","Model Size(B)":7.57,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":64.1,"I-CLS":56.0,"I-QA":57.4,"I-RET":69.9,"I-VG":83.6,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large"}
|
8 |
+
{"Models":"MMRet-MLLM (LLaVA-1.6)","Model Size(B)":7.57,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":44.0,"I-CLS":47.2,"I-QA":18.4,"I-RET":56.5,"I-VG":62.2,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/JUNJIE99\/MMRet-large"}
|
9 |
+
{"Models":"Magiclens","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":27.8,"I-CLS":38.8,"I-QA":8.3,"I-RET":35.4,"I-VG":26.0,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/github.com\/google-deepmind\/magiclens"}
|
10 |
+
{"Models":"OpenCLIP-FT","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":47.2,"I-CLS":56.0,"I-QA":21.9,"I-RET":55.4,"I-VG":64.1,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/doi.org\/10.48550\/arXiv.2212.07143"}
|
11 |
+
{"Models":"QQMM-embed","Model Size(B)":8.297,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":72.175,"I-CLS":70.07,"I-QA":69.52,"I-RET":71.175,"I-VG":87.075,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/github.com\/QQ-MM\/QQMM-embed"}
|
12 |
+
{"Models":"UniIR (BLIP_FF)","Model Size(B)":0.247,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":42.8,"I-CLS":42.1,"I-QA":15.0,"I-RET":60.1,"I-VG":62.2,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/UniIR"}
|
13 |
+
{"Models":"UniIR (CLIP_SF)","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":44.7,"I-CLS":44.3,"I-QA":16.2,"I-RET":61.8,"I-VG":65.3,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/UniIR"}
|
14 |
+
{"Models":"UniME(LLaVA-1.6-7B-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":66.6,"I-CLS":60.6,"I-QA":52.9,"I-RET":67.9,"I-VG":85.1,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-1.6-7B"}
|
15 |
+
{"Models":"UniME(LLaVA-OneVision-7B-LoRA-Res336)","Model Size(B)":8.03,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":70.7,"I-CLS":66.8,"I-QA":66.6,"I-RET":70.5,"I-VG":90.9,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-LLaVA-OneVision-7B"}
|
16 |
+
{"Models":"UniME(Phi-3.5-V-LoRA)","Model Size(B)":4.2,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":64.2,"I-CLS":54.8,"I-QA":55.9,"I-RET":64.5,"I-VG":81.8,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/DeepGlint-AI\/UniME-Phi3.5-V-4.2B"}
|
17 |
+
{"Models":"VLM2Vec (LLaVA-1.6-LoRA-HighRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":62.9,"I-CLS":61.2,"I-QA":49.9,"I-RET":67.4,"I-VG":86.1,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-LLaVa-Next"}
|
18 |
+
{"Models":"VLM2Vec (LLaVA-1.6-LoRA-LowRes)","Model Size(B)":7.57,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":55.0,"I-CLS":54.7,"I-QA":50.3,"I-RET":56.2,"I-VG":64.0,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-LLaVa-Next"}
|
19 |
+
{"Models":"VLM2Vec (Phi-3.5-V-FT)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":55.9,"I-CLS":52.8,"I-QA":50.3,"I-RET":57.8,"I-VG":72.3,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full"}
|
20 |
+
{"Models":"VLM2Vec (Phi-3.5-V-LoRA)","Model Size(B)":4.15,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":60.1,"I-CLS":54.8,"I-QA":54.9,"I-RET":62.3,"I-VG":79.5,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Full"}
|
21 |
+
{"Models":"VLM2Vec (Qwen2-VL-2B-LoRA-HighRes)","Model Size(B)":2.21,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":59.3,"I-CLS":59.0,"I-QA":49.4,"I-RET":65.4,"I-VG":73.4,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-2B"}
|
22 |
+
{"Models":"VLM2Vec (Qwen2-VL-7B-LoRA-HighRes)","Model Size(B)":8.29,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":65.8,"I-CLS":62.6,"I-QA":57.8,"I-RET":69.9,"I-VG":81.7,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/TIGER-Lab\/VLM2Vec-Qwen2VL-7B"}
|
23 |
+
{"Models":"blip2-opt-2.7b","Model Size(B)":3.74,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":25.2,"I-CLS":27.0,"I-QA":4.2,"I-RET":33.9,"I-VG":47.0,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/Salesforce\/blip2-opt-2.7b"}
|
24 |
+
{"Models":"clip-vit-large-patch14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":37.8,"I-CLS":42.8,"I-QA":9.1,"I-RET":53.0,"I-VG":51.8,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/openai\/clip-vit-large-patch14"}
|
25 |
+
{"Models":"e5-v","Model Size(B)":8.36,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":13.3,"I-CLS":21.8,"I-QA":4.9,"I-RET":11.5,"I-VG":19.0,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/royokong\/e5-v"}
|
26 |
+
{"Models":"gme-Qwen2-VL-2B-Instruct","Model Size(B)":2.21,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":55.8,"I-CLS":56.9,"I-QA":41.2,"I-RET":67.8,"I-VG":53.4,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/Alibaba-NLP\/gme-Qwen2-VL-2B-Instruct"}
|
27 |
+
{"Models":"mmE5 (w\/ 560K synthetic data)","Model Size(B)":10.6,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":58.6,"I-CLS":60.6,"I-QA":55.7,"I-RET":54.7,"I-VG":72.4,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct"}
|
28 |
+
{"Models":"mmE5-mllama-11b-instruct","Model Size(B)":10.6,"Data Source":"Self-Reported","V2-Overall":null,"V1-Overall":69.8,"I-CLS":67.6,"I-QA":62.6,"I-RET":71.0,"I-VG":89.6,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/intfloat\/mmE5-mllama-11b-instruct"}
|
29 |
+
{"Models":"open_clip-ViT-L\/14","Model Size(B)":0.428,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":39.7,"I-CLS":47.8,"I-QA":10.9,"I-RET":52.3,"I-VG":53.3,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/github.com\/mlfoundations\/open_clip"}
|
30 |
+
{"Models":"siglip-base-patch16-224","Model Size(B)":0.203,"Data Source":"TIGER-Lab","V2-Overall":null,"V1-Overall":34.8,"I-CLS":40.3,"I-QA":8.4,"I-RET":31.6,"I-VG":59.5,"V-CLS":null,"V-QA":null,"V-RET":null,"V-MRET":null,"VisDoc":null,"URL":"https:\/\/huggingface.co\/google\/siglip-base-patch16-224"}
|
utils.py
CHANGED
@@ -10,24 +10,13 @@ from huggingface_hub import Repository
|
|
10 |
|
11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
12 |
|
13 |
-
|
|
|
|
|
|
|
14 |
|
15 |
-
|
16 |
-
|
17 |
-
"Overall",
|
18 |
-
"Classification", "VQA", "Retrieval", "Grounding"
|
19 |
-
]
|
20 |
-
|
21 |
-
BASE_COLS = [col for col in MODEL_INFO if col not in TASKS]
|
22 |
-
|
23 |
-
DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown', 'number', 'number', 'number', 'number', 'number', 'number', 'number']
|
24 |
-
|
25 |
-
SUBMISSION_NAME = "MMEB"
|
26 |
-
SUBMISSION_URL = os.path.join("https://huggingface.co/spaces/TIGER-Lab/", SUBMISSION_NAME)
|
27 |
-
FILE_NAME = "results.csv"
|
28 |
-
CSV_DIR = "results.csv"
|
29 |
-
|
30 |
-
COLUMN_NAMES = MODEL_INFO
|
31 |
|
32 |
LEADERBOARD_INTRODUCTION = """
|
33 |
# MMEB Leaderboard
|
@@ -49,25 +38,7 @@ TABLE_INTRODUCTION = """"""
|
|
49 |
|
50 |
LEADERBOARD_INFO = """
|
51 |
## Dataset Summary
|
52 |
-
|
53 |
-
- **Classification**: This category comprises 5 in-distribution and 5 out-of-distribution datasets. Queries
|
54 |
-
consist of instructions and images, optionally accompanied by related text. Targets are class labels,
|
55 |
-
and the number of class labels corresponds to the number of classes in the dataset. \n
|
56 |
-
- IND: ImageNet-1k, N24News, HatefulMemes, VOC2007, SUN397 \n
|
57 |
-
- OOD: Place365, ImageNet-A, ImageNet-R, ObjectNet, Country-211 \n
|
58 |
-
- **Visual Question Answering**: This category includes 6 in-distribution and 4 out-of-distribution
|
59 |
-
datasets. The query consists of an instruction, an image, and a piece of text as the question, while
|
60 |
-
the target is the answer. Each query has 1,000 target candidates: 1 ground truth and 999 distractors. \n
|
61 |
-
- IND: OK-VQA, A-OKVQA, DocVQA, InfographicVQA, ChartQA, Visual7W \n
|
62 |
-
- OOD: ScienceQA, VizWiz, GQA, TextVQA \n
|
63 |
-
- **Information Retrieval**: This category contains 8 in-distribution and 4 out-of-distribution datasets.
|
64 |
-
Both the query and target sides can involve a combination of text, images, and instructions. Similar
|
65 |
-
to the VQA task, each query has 1,000 candidates, with 1 ground truth and 999 distractors. \n
|
66 |
-
- IND: VisDial, CIRR, VisualNews_t2i, VisualNews_i2t, MSCOCO_t2i, MSCOCO_i2t, NIGHTS, WebQA \n
|
67 |
-
- OOD: OVEN, FashionIQ, EDIS, Wiki-SS-NQ \n
|
68 |
-
- **Visual Grounding**: This category includes 1 in-distribution and 3 out-of-distribution datasets, which are adapted from object detection tasks. Queries consist of an instruction, an image, and text referring to a specific region or object within the image. The target may include a cropped image of the object or text describing the same region. Each query includes 1,000 candidates: 1 ground truth and 999 distractors. These distractors may include hard negatives from the same object class, other objects in the image, or random objects from different images. \n
|
69 |
-
- IND: MSCOCO \n
|
70 |
-
- OOD: Visual7W-Pointing, RefCOCO, RefCOCO-Matching \n
|
71 |
"""
|
72 |
|
73 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
@@ -81,6 +52,8 @@ CITATION_BUTTON_TEXT = r"""@article{jiang2024vlm2vec,
|
|
81 |
SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
|
82 |
|
83 |
## ⚠ Please note that you need to submit the JSON file with the following format:
|
|
|
|
|
84 |
```json
|
85 |
[
|
86 |
{
|
@@ -88,15 +61,34 @@ SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
|
|
88 |
<Optional>"URL": "<Model URL>",
|
89 |
"Model Size(B)": 1000,
|
90 |
"Data Source": Self-Reported,
|
91 |
-
"Overall": 50.0,
|
92 |
-
"
|
93 |
-
"
|
94 |
-
"
|
95 |
-
"
|
96 |
},
|
97 |
]
|
98 |
```
|
99 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
Github link: https://github.com/TIGER-AI-Lab/VLM2Vec. \n
|
101 |
Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly.
|
102 |
"""
|
@@ -113,24 +105,21 @@ def create_hyperlinked_names(df):
|
|
113 |
df = df.apply(add_link_to_model_name, axis=1)
|
114 |
return df
|
115 |
|
116 |
-
def fetch_data(file: str) -> pd.DataFrame:
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
|
127 |
def get_df(file="results.jsonl"):
|
128 |
-
df =
|
129 |
-
print(df.columns)
|
130 |
-
print('URL' in df.columns)
|
131 |
-
print(df)
|
132 |
df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
|
133 |
-
df = df.sort_values(by=['Overall'], ascending=False)
|
134 |
df = create_hyperlinked_names(df)
|
135 |
df['Rank'] = range(1, len(df) + 1)
|
136 |
return df
|
@@ -185,7 +174,3 @@ def filter_columns_by_tasks(df, selected_tasks=None):
|
|
185 |
|
186 |
available_columns = [col for col in selected_columns if col in df.columns]
|
187 |
return df[available_columns]
|
188 |
-
|
189 |
-
def get_task_choices():
|
190 |
-
return TASKS
|
191 |
-
|
|
|
10 |
|
11 |
HF_TOKEN = os.environ.get("HF_TOKEN")
|
12 |
|
13 |
+
BASE_COLS = ["Rank", "Models", "Model Size(B)", "Data Source"]
|
14 |
+
TASKS_V1 = ["V1-Overall", "I-CLS", "I-QA", "I-RET", "I-VG"]
|
15 |
+
TASKS_V2 = ["V2-Overall", "V-CLS", "V-QA", "V-RET", "V-MRET", "VisDoc"]
|
16 |
+
COLUMN_NAMES = BASE_COLS + TASKS_V1 + TASKS_V2
|
17 |
|
18 |
+
DATA_TITLE_TYPE = ['number', 'markdown', 'str', 'markdown'] + \
|
19 |
+
['number'] * (len(TASKS_V1) + len(TASKS_V2))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
LEADERBOARD_INTRODUCTION = """
|
22 |
# MMEB Leaderboard
|
|
|
38 |
|
39 |
LEADERBOARD_INFO = """
|
40 |
## Dataset Summary
|
41 |
+
<img width="900" alt="abs" src="overview.png">
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
"""
|
43 |
|
44 |
CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results"
|
|
|
52 |
SUBMIT_INTRODUCTION = """# Submit on MMEB Leaderboard Introduction
|
53 |
|
54 |
## ⚠ Please note that you need to submit the JSON file with the following format:
|
55 |
+
|
56 |
+
### **TO SUBMIT V1 ONLY**
|
57 |
```json
|
58 |
[
|
59 |
{
|
|
|
61 |
<Optional>"URL": "<Model URL>",
|
62 |
"Model Size(B)": 1000,
|
63 |
"Data Source": Self-Reported,
|
64 |
+
"V1-Overall": 50.0,
|
65 |
+
"I-CLS": 50.0,
|
66 |
+
"I-QA": 50.0,
|
67 |
+
"I-RET": 50.0,
|
68 |
+
"I-VG": 50.0
|
69 |
},
|
70 |
]
|
71 |
```
|
72 |
+
|
73 |
+
### **TO SUBMIT V2 ONLY**
|
74 |
+
```json
|
75 |
+
[
|
76 |
+
{
|
77 |
+
"Model": "<Model Name>",
|
78 |
+
<Optional>"URL": "<Model URL>",
|
79 |
+
"Model Size(B)": 1000,
|
80 |
+
"Data Source": Self-Reported,
|
81 |
+
"V2-Overall": 50.0,
|
82 |
+
"V-CLS": 50.0,
|
83 |
+
"V-QA": 50.0,
|
84 |
+
"V-RET": 50.0,
|
85 |
+
"V-VG": 50.0,
|
86 |
+
"VisDoc": 50.0
|
87 |
+
},
|
88 |
+
]
|
89 |
+
```
|
90 |
+
You are also welcome to submit both versions by including all the fields above! :) \n
|
91 |
+
You may refer to the Github page for instructions about evaluating your model. \n
|
92 |
Github link: https://github.com/TIGER-AI-Lab/VLM2Vec. \n
|
93 |
Please send us an email at [email protected], attaching the JSON file. We will review your submission and update the leaderboard accordingly.
|
94 |
"""
|
|
|
105 |
df = df.apply(add_link_to_model_name, axis=1)
|
106 |
return df
|
107 |
|
108 |
+
# def fetch_data(file: str) -> pd.DataFrame:
|
109 |
+
# # fetch the leaderboard data from remote
|
110 |
+
# if file is None:
|
111 |
+
# raise ValueError("URL Not Provided")
|
112 |
+
# url = f"https://huggingface.co/spaces/TIGER-Lab/MMEB/resolve/main/{file}"
|
113 |
+
# print(f"Fetching data from {url}")
|
114 |
+
# response = requests.get(url)
|
115 |
+
# if response.status_code != 200:
|
116 |
+
# raise requests.HTTPError(f"Failed to fetch data: HTTP status code {response.status_code}")
|
117 |
+
# return pd.read_json(io.StringIO(response.text), orient='records', lines=True)
|
118 |
|
119 |
def get_df(file="results.jsonl"):
|
120 |
+
df = pd.read_json(file, orient='records', lines=True)
|
|
|
|
|
|
|
121 |
df['Model Size(B)'] = df['Model Size(B)'].apply(process_model_size)
|
122 |
+
df = df.sort_values(by=['V1-Overall'], ascending=False)
|
123 |
df = create_hyperlinked_names(df)
|
124 |
df['Rank'] = range(1, len(df) + 1)
|
125 |
return df
|
|
|
174 |
|
175 |
available_columns = [col for col in selected_columns if col in df.columns]
|
176 |
return df[available_columns]
|
|
|
|
|
|
|
|