sharafetdinov42 commited on
Commit
622bd1a
·
verified ·
1 Parent(s): d7e6665

Upload ITOG.csv

Browse files
Files changed (1) hide show
  1. ITOG.csv +47 -47
ITOG.csv CHANGED
@@ -1,47 +1,47 @@
1
- model,NUM_Q_multich_EM,NUM_Q_multich_CC,NUM_Q_multich_PM,NUM_Q_onech_EM,NUM_Q_seq_EM,NUM_Q_seq_CC,NUM_Q_seq_PM,NUM_Q_map_EM,NUM_Q_map_CC,NUM_Q_map_PM,OPEN_Q_EM,OPEN_Q_F1,OPEN_Q_LR,LEADERBOARD
2
- GigaChat-2-Max,81.0,84.0,88.0,91.0,75.0,75.0,75.0,66.0,66.0,72.0,52.0,60.0,73.0,73.6923076923077
3
- claude-3-5-sonnet,84.0,85.0,90.0,94.0,73.0,73.0,73.0,43.0,44.0,46.0,63.0,69.0,78.0,70.38461538461539
4
- gemini-pro-1.5,73.0,78.0,84.0,91.0,66.0,66.0,66.0,59.0,60.0,66.0,55.00000000000001,67.0,77.0,69.84615384615384
5
- deepseek-r1,79.0,82.0,86.0,67.0,82.0,82.0,82.0,57.99999999999999,61.0,59.0,22.0,61.0,69.0,68.46153846153847
6
- qwen2.5:72b-instruct-q4_0,59.0,75.0,74.0,90.0,68.0,68.0,68.0,38.0,40.0,44.0,39.0,50.0,63.0,59.69230769230769
7
- t-tech/T-pro-it-1.0,64.0,69.0,78.0,89.0,66.0,66.0,66.0,41.0,42.0,48.0,34.0,49.0,64.0,59.69230769230769
8
- mistral-123b,69.0,74.0,82.0,89.0,63.0,63.0,63.0,24.0,24.0,28.000000000000004,54.0,62.0,71.0,58.92307692307692
9
- gpt-4o,77.0,81.0,86.0,94.0,64.0,64.0,66.0,22.0,22.0,28.999999999999996,11.0,64.0,72.0,57.84615384615385
10
- qwen2.5:32b-instruct-q4_0,59.0,66.0,75.0,86.0,60.0,60.0,60.0,35.0,36.0,42.0,31.0,41.0,55.00000000000001,54.30769230769231
11
- qwen2:72b-instruct-q4_0,56.99999999999999,70.0,73.0,86.0,63.0,63.0,63.0,33.0,34.0,39.0,12.0,45.0,55.00000000000001,53.30769230769231
12
- rscr/ruadapt_qwen2.5_32b:Q4_K_M,54.0,56.99999999999999,72.0,86.0,60.0,60.0,60.0,35.0,35.0,44.0,31.0,39.0,56.00000000000001,53.0
13
- GigaChat-Pro,61.0,64.0,76.0,79.0,45.0,45.0,45.0,25.0,25.0,30.0,48.0,55.00000000000001,68.0,51.23076923076923
14
- GigaChat-Max,73.0,76.0,83.0,85.0,38.0,38.0,38.0,16.0,16.0,23.0,48.0,55.00000000000001,69.0,50.61538461538461
15
- YandexGPT4-Pro,31.0,35.0,77.0,85.0,35.0,35.0,54.0,21.0,21.0,47.0,45.0,56.00000000000001,69.0,47.0
16
- YandexGPT4-Pro-32k,31.0,34.0,77.0,85.0,34.0,34.0,55.00000000000001,21.0,21.0,47.0,45.0,56.99999999999999,69.0,46.92307692307692
17
- gemma2:27b-instruct-q4_0,49.0,56.99999999999999,69.0,82.0,48.0,48.0,48.0,13.0,13.0,18.0,39.0,48.0,63.0,45.76923076923077
18
- GigaChat-Lite,56.99999999999999,61.0,72.0,75.0,38.0,38.0,38.0,13.0,13.0,17.0,35.0,42.0,56.99999999999999,42.76923076923077
19
- ai-sage/GigaChat-20B-A3B-instruct,30.0,33.0,44.0,76.0,42.0,42.0,42.0,12.0,12.0,16.0,38.0,47.0,61.0,38.07692307692308
20
- llama405,21.0,62.0,77.0,60.0,24.0,33.0,51.0,5.0,7.000000000000001,20.0,9.0,56.00000000000001,66.0,37.76923076923077
21
- gemma2:9b-instruct-q4_0,41.0,54.0,64.0,77.0,32.0,33.0,35.0,5.0,5.0,10.0,25.0,34.0,49.0,35.69230769230769
22
- llama3.1:70b-instruct-q4_0,42.0,57.99999999999999,68.0,49.0,14.000000000000002,17.0,33.0,1.0,2.0,14.000000000000002,11.0,54.0,64.0,32.84615384615385
23
- llama3:70b-instruct-q4_0,35.0,63.0,60.0,79.0,12.0,14.000000000000002,25.0,4.0,5.0,6.0,7.000000000000001,47.0,57.99999999999999,31.923076923076923
24
- YandexGPT4-Lite,33.0,38.0,57.99999999999999,74.0,6.0,6.0,7.000000000000001,2.0,2.0,5.0,35.0,52.0,66.0,29.53846153846154
25
- qwen2.5:7b-instruct-q4_0,27.0,36.0,57.99999999999999,71.0,30.0,30.0,30.0,5.0,6.0,10.0,15.0,19.0,38.0,28.846153846153847
26
- mistral-nemo:12b-instruct-2407-q4_0,10.0,11.0,38.0,68.0,23.0,23.0,23.0,0.0,0.0,8.0,27.0,37.0,51.0,24.53846153846154
27
- rscr/vikhr_nemo_12b:latest,23.0,31.0,53.0,56.00000000000001,11.0,19.0,20.0,2.0,3.0,11.0,12.0,28.999999999999996,41.0,23.923076923076923
28
- ilyagusev/saiga_nemo_12b,3.0,4.0,59.0,70.0,17.0,17.0,18.0,1.0,1.0,10.0,23.0,34.0,48.0,23.46153846153846
29
- qwen2:7b-instruct-q4_0,11.0,13.0,55.00000000000001,67.0,22.0,22.0,23.0,2.0,2.0,8.0,5.0,13.0,28.999999999999996,20.923076923076923
30
- phi3:14b-medium-4k-instruct-q4_0,0.0,0.0,60.0,70.0,3.0,4.0,41.0,0.0,0.0,9.0,7.000000000000001,20.0,31.0,18.846153846153847
31
- owl/t-lite:q4_0-instruct,3.0,6.0,42.0,66.0,10.0,11.0,12.0,1.0,1.0,3.0,4.0,35.0,48.0,18.615384615384617
32
- ilyagusev/saiga_llama3,2.0,9.0,52.0,65.0,9.0,9.0,20.0,0.0,0.0,4.0,8.0,24.0,38.0,18.46153846153846
33
- mistral:7b-instruct-v0.3-q4_0,0.0,0.0,44.0,54.0,0.0,0.0,31.0,0.0,0.0,4.0,3.0,14.000000000000002,24.0,13.384615384615385
34
- yi:9b,8.0,17.0,39.0,39.0,12.0,14.000000000000002,14.000000000000002,0.0,0.0,1.0,2.0,6.0,14.000000000000002,12.76923076923077
35
- mixtral:8x7b-instruct-v0.1-q4_0,0.0,13.0,53.0,16.0,2.0,15.0,27.0,0.0,2.0,12.0,2.0,9.0,12.0,12.538461538461538
36
- solar:10.7b-instruct-v1-q4_0,0.0,0.0,49.0,50.0,0.0,0.0,19.0,0.0,0.0,3.0,4.0,13.0,22.0,12.307692307692308
37
- wavecut/vikhr:7b-instruct_0.4-Q4_1,0.0,0.0,39.0,41.0,2.0,3.0,7.000000000000001,0.0,0.0,1.0,10.0,19.0,30.0,11.692307692307692
38
- random,4.04341349223239,7.59736114066823,32.698446477974,24.5103137458832,14.0740740740741,14.0740740740741,14.0740740740741,0.828500414250207,0.828500414250207,3.23115161557581,,,,11.595990952305634
39
- llama3.1:8b-instruct-q4_0,0.0,0.0,50.0,4.0,0.0,1.0,3.0,0.0,0.0,6.0,0.0,24.0,40.0,9.846153846153847
40
- qwen:7b,0.0,0.0,30.0,36.0,12.0,12.0,14.000000000000002,0.0,0.0,1.0,0.0,3.0,16.0,9.538461538461538
41
- llama3:8b-instruct-q4_0,0.0,0.0,50.0,12.0,0.0,1.0,3.0,0.0,0.0,4.0,0.0,20.0,33.0,9.461538461538462
42
- gemma:7b-instruct-v1.1-q4_0,2.0,9.0,39.0,13.0,7.000000000000001,10.0,11.0,1.0,1.0,4.0,0.0,4.0,16.0,9.0
43
- yi:6b,1.0,18.0,13.0,28.000000000000004,0.0,6.0,4.0,0.0,1.0,1.0,1.0,3.0,10.0,6.615384615384615
44
- llama3.2:3b-instruct-q4_0,0.0,0.0,30.0,0.0,0.0,0.0,10.0,0.0,0.0,1.0,0.0,6.0,19.0,5.076923076923077
45
- llama2:13b,0.0,0.0,25.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,7.000000000000001,12.0,3.4615384615384617
46
- llama3.2:1b-instruct-q4_0,0.0,1.0,14.000000000000002,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,10.0,2.3076923076923075
47
- Среднее значение,30.089853633160722,36.25771913645929,58.615521032843866,60.43356252768629,28.49053497942387,29.71275720164609,34.6238683127572,13.440633342538895,13.862855564761116,19.42735892479057,20.727272727272727,35.25,47.13636363636363,32.811534841675154
 
1
+ model,NUM_Q_multich_EM,NUM_Q_multich_CC,NUM_Q_multich_PM,NUM_Q_onech_EM,NUM_Q_seq_EM,NUM_Q_seq_CC,NUM_Q_seq_PM,NUM_Q_map_EM,NUM_Q_map_CC,NUM_Q_map_PM,OPEN_Q_EM,OPEN_Q_F1,OPEN_Q_LR,LEADERBOARD,Доступ,Команда/страна
2
+ GigaChat-2-Max,81.0,84.0,88.0,91.0,75.0,75.0,75.0,66.0,66.0,72.0,52.0,60.0,73.0,73.6923076923077,API,"СБЕР, Россия"
3
+ claude-3-5-sonnet,84.0,85.0,90.0,94.0,73.0,73.0,73.0,43.0,44.0,46.0,63.0,69.0,78.0,70.38461538461539,API,"Anthropic, США "
4
+ gemini-pro-1.5,73.0,78.0,84.0,91.0,66.0,66.0,66.0,59.0,60.0,66.0,55.00000000000001,67.0,77.0,69.84615384615384,API,"Google DeepMind, США "
5
+ deepseek-r1,79.0,82.0,86.0,67.0,82.0,82.0,82.0,57.99999999999999,61.0,59.0,22.0,61.0,69.0,68.46153846153847,API,"DeepSeek, Китай "
6
+ qwen2.5:72b-instruct-q4_0,59.0,75.0,74.0,90.0,68.0,68.0,68.0,38.0,40.0,44.0,39.0,50.0,63.0,59.69230769230769,OS,"Alibaba, Китай "
7
+ t-tech/T-pro-it-1.0,64.0,69.0,78.0,89.0,66.0,66.0,66.0,41.0,42.0,48.0,34.0,49.0,64.0,59.69230769230769,OS,"Т-Банк, Россия"
8
+ mistral-123b,69.0,74.0,82.0,89.0,63.0,63.0,63.0,24.0,24.0,28.000000000000004,54.0,62.0,71.0,58.92307692307692,OS,"Mistral AI, Франция "
9
+ gpt-4o,77.0,81.0,86.0,94.0,64.0,64.0,66.0,22.0,22.0,29.0,11.0,64.0,72.0,57.84615384615385,API,"OpenAI, США "
10
+ qwen2.5:32b-instruct-q4_0,59.0,66.0,75.0,86.0,60.0,60.0,60.0,35.0,36.0,42.0,31.0,41.0,55.00000000000001,54.30769230769231,OS,"Alibaba, Китай "
11
+ qwen2:72b-instruct-q4_0,56.99999999999999,70.0,73.0,86.0,63.0,63.0,63.0,33.0,34.0,39.0,12.0,45.0,55.00000000000001,53.30769230769231,OS,"Alibaba, Китай "
12
+ rscr/ruadapt_qwen2.5_32b:Q4_K_M,54.0,56.99999999999999,72.0,86.0,60.0,60.0,60.0,35.0,35.0,44.0,31.0,39.0,56.00000000000001,53.0,OS,"Михаил Тихомиров, Россия "
13
+ GigaChat-Pro,61.0,64.0,76.0,79.0,45.0,45.0,45.0,25.0,25.0,30.0,48.0,55.00000000000001,68.0,51.23076923076923,API,"СБЕР, Россия"
14
+ GigaChat-Max,73.0,76.0,83.0,85.0,38.0,38.0,38.0,16.0,16.0,23.0,48.0,55.00000000000001,69.0,50.61538461538461,API,"СБЕР, Россия"
15
+ YandexGPT4-Pro,31.0,35.0,77.0,85.0,35.0,35.0,54.0,21.0,21.0,47.0,45.0,56.00000000000001,69.0,47.0,API,"Яндекс, Россия "
16
+ YandexGPT4-Pro-32k,31.0,34.0,77.0,85.0,34.0,34.0,55.00000000000001,21.0,21.0,47.0,45.0,56.99999999999999,69.0,46.92307692307692,API,"Яндекс, Россия "
17
+ gemma2:27b-instruct-q4_0,49.0,56.99999999999999,69.0,82.0,48.0,48.0,48.0,13.0,13.0,18.0,39.0,48.0,63.0,45.76923076923077,OS,"Google, США"
18
+ GigaChat-Lite,56.99999999999999,61.0,72.0,75.0,38.0,38.0,38.0,13.0,13.0,17.0,35.0,42.0,56.99999999999999,42.76923076923077,API,"СБЕР, Россия"
19
+ ai-sage/GigaChat-20B-A3B-instruct,30.0,33.0,44.0,76.0,42.0,42.0,42.0,12.0,12.0,16.0,38.0,47.0,61.0,38.07692307692308,OS,"Яндекс, Россия "
20
+ llama405,21.0,62.0,77.0,60.0,24.0,33.0,51.0,5.0,7.000000000000001,20.0,9.0,56.00000000000001,66.0,37.76923076923077,OS,"Mistral AI, Франция "
21
+ gemma2:9b-instruct-q4_0,41.0,54.0,64.0,77.0,32.0,33.0,35.0,5.0,5.0,10.0,25.0,34.0,49.0,35.69230769230769,OS,"Google, США"
22
+ llama3.1:70b-instruct-q4_0,42.0,57.99999999999999,68.0,49.0,14.000000000000002,17.0,33.0,1.0,2.0,14.000000000000002,11.0,54.0,64.0,32.84615384615385,OS,"Meta, США "
23
+ llama3:70b-instruct-q4_0,35.0,63.0,60.0,79.0,12.0,14.000000000000002,25.0,4.0,5.0,6.0,7.000000000000001,47.0,57.99999999999999,31.923076923076923,OS,"Meta, США "
24
+ YandexGPT4-Lite,33.0,38.0,57.99999999999999,74.0,6.0,6.0,7.000000000000001,2.0,2.0,5.0,35.0,52.0,66.0,29.53846153846154,API,"Яндекс, Россия "
25
+ qwen2.5:7b-instruct-q4_0,27.0,36.0,57.99999999999999,71.0,30.0,30.0,30.0,5.0,6.0,10.0,15.0,19.0,38.0,28.846153846153847,OS,"Alibaba, Китай "
26
+ mistral-nemo:12b-instruct-2407-q4_0,10.0,11.0,38.0,68.0,23.0,23.0,23.0,0.0,0.0,8.0,27.0,37.0,51.0,24.53846153846154,OS,"Mistral AI, Франция "
27
+ rscr/vikhr_nemo_12b:latest,23.0,31.0,53.0,56.00000000000001,11.0,19.0,20.0,2.0,3.0,11.0,12.0,29.0,41.0,23.923076923076923,OS,"Vikhr Team, Россия"
28
+ ilyagusev/saiga_nemo_12b,3.0,4.0,59.0,70.0,17.0,17.0,18.0,1.0,1.0,10.0,23.0,34.0,48.0,23.46153846153846,OS,"Илья Гусев, Россия "
29
+ qwen2:7b-instruct-q4_0,11.0,13.0,55.00000000000001,67.0,22.0,22.0,23.0,2.0,2.0,8.0,5.0,13.0,29.0,20.923076923076923,OS,"Alibaba, Китай "
30
+ phi3:14b-medium-4k-instruct-q4_0,0.0,0.0,60.0,70.0,3.0,4.0,41.0,0.0,0.0,9.0,7.000000000000001,20.0,31.0,18.846153846153847,OS,"Microsoft, США"
31
+ owl/t-lite:q4_0-instruct,3.0,6.0,42.0,66.0,10.0,11.0,12.0,1.0,1.0,3.0,4.0,35.0,48.0,18.615384615384617,OS,"Т-Банк, Россия"
32
+ ilyagusev/saiga_llama3,2.0,9.0,52.0,65.0,9.0,9.0,20.0,0.0,0.0,4.0,8.0,24.0,38.0,18.46153846153846,OS,"Илья Гусев, Россия "
33
+ mistral:7b-instruct-v0.3-q4_0,0.0,0.0,44.0,54.0,0.0,0.0,31.0,0.0,0.0,4.0,3.0,14.000000000000002,24.0,13.384615384615383,OS,"Mistral AI, Франция "
34
+ yi:9b,8.0,17.0,39.0,39.0,12.0,14.000000000000002,14.000000000000002,0.0,0.0,1.0,2.0,6.0,14.000000000000002,12.76923076923077,OS,"YI Technology inc., Китай"
35
+ mixtral:8x7b-instruct-v0.1-q4_0,0.0,13.0,53.0,16.0,2.0,15.0,27.0,0.0,2.0,12.0,2.0,9.0,12.0,12.538461538461538,OS,"Mistral AI, Франция "
36
+ solar:10.7b-instruct-v1-q4_0,0.0,0.0,49.0,50.0,0.0,0.0,19.0,0.0,0.0,3.0,4.0,13.0,22.0,12.307692307692308,OS,"Upstage AI, Южная Корея"
37
+ wavecut/vikhr:7b-instruct_0.4-Q4_1,0.0,0.0,39.0,41.0,2.0,3.0,7.000000000000001,0.0,0.0,1.0,10.0,19.0,30.0,11.692307692307692,OS,"Vikhr Team, Россия"
38
+ random,4.04341349223239,7.59736114066823,32.698446477974,24.5103137458832,14.0740740740741,14.0740740740741,14.0740740740741,0.828500414250207,0.828500414250207,3.23115161557581,,,,11.595990952305634,,
39
+ llama3.1:8b-instruct-q4_0,0.0,0.0,50.0,4.0,0.0,1.0,3.0,0.0,0.0,6.0,0.0,24.0,40.0,9.846153846153848,OS,"Meta, США "
40
+ qwen:7b,0.0,0.0,30.0,36.0,12.0,12.0,14.000000000000002,0.0,0.0,1.0,0.0,3.0,16.0,9.538461538461538,OS,"Alibaba, Китай "
41
+ llama3:8b-instruct-q4_0,0.0,0.0,50.0,12.0,0.0,1.0,3.0,0.0,0.0,4.0,0.0,20.0,33.0,9.461538461538462,OS,"Meta, США "
42
+ gemma:7b-instruct-v1.1-q4_0,2.0,9.0,39.0,13.0,7.000000000000001,10.0,11.0,1.0,1.0,4.0,0.0,4.0,16.0,9.0,OS,"Google, США"
43
+ yi:6b,1.0,18.0,13.0,28.000000000000004,0.0,6.0,4.0,0.0,1.0,1.0,1.0,3.0,10.0,6.615384615384615,OS,"YI Technology inc., Китай"
44
+ llama3.2:3b-instruct-q4_0,0.0,0.0,30.0,0.0,0.0,0.0,10.0,0.0,0.0,1.0,0.0,6.0,19.0,5.076923076923077,OS,"Meta, США "
45
+ llama2:13b,0.0,0.0,25.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,7.000000000000001,12.0,3.4615384615384617,OS,"Meta, США "
46
+ llama3.2:1b-instruct-q4_0,0.0,1.0,14.000000000000002,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,2.0,10.0,2.3076923076923075,OS,"Meta, США "
47
+ Среднее значение,30.08985363316072,36.25771913645929,58.615521032843866,60.43356252768629,28.49053497942387,29.71275720164609,34.6238683127572,13.440633342538897,13.862855564761116,19.42735892479057,20.727272727272727,35.25,47.13636363636363,32.811534841675154,,