diff --git a/README.md b/README.md index 6499bfc64dd049ac0b5d0d2f19cc66f65cae4218..75454b82f6ff88213243f96358fa4e9b358ce3ef 100644 --- a/README.md +++ b/README.md @@ -17,6 +17,12 @@ This repository contains the frontend code to display TabArena leaderboard. The leaderboard is hosted on a HuggingFace space. Reference: -* Website: tabarena.ai +* Website: https://tabarena.ai * Paper: TBA * TabArena Codebase: https://tabarena.ai/code + +# Install LB Code for Development + +```bash +pip install -e ".[dev]" +``` \ No newline at end of file diff --git a/data/full-cls/figures/critical-diagram.pdf b/data/full-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1daefa080392772c3b1a2ed5670c4da458f152fc Binary files /dev/null and b/data/full-cls/figures/critical-diagram.pdf differ diff --git a/data/full-cls/figures/critical-diagram.png.zip b/data/full-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..944b931e98d50e422b34fb2f61567f648bdbbe71 --- /dev/null +++ b/data/full-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:308a9d416aaaff64659e2ed5f56ed2cf782ca266a5ef0ded86ed835659327f4d +size 295101 diff --git a/data/full-cls/leaderboard.tex b/data/full-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..e3dd1a6a7d505b882f934ad3a4150b2ca8842a80 --- /dev/null +++ b/data/full-cls/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabM (T+E) & \textcolor{gold}{\textbf{1612${}_{-32,+43}$}} & \textcolor{silver}{\textbf{0.492}} & \textcolor{gold}{\textbf{7.8}} & \textcolor{silver}{\textbf{3.0}} & \textcolor{silver}{\textbf{6}} & \textcolor{silver}{\textbf{5.7\%}} & 2466.21 & 1.50 \\ +AutoGluon 1.3 (4h) & \textcolor{silver}{\textbf{1611${}_{-34,+36}$}} & \textcolor{gold}{\textbf{0.564}} & \textcolor{gold}{\textbf{7.8}} & \textcolor{gold}{\textbf{2.4}} & \textcolor{gold}{\textbf{11}} & \textcolor{gold}{\textbf{4.6\%}} & 1322.72 & 2.36 \\ +LightGBM (T+E) & \textcolor{bronze}{\textbf{1570${}_{-31,+37}$}} & 0.404 & \textcolor{bronze}{\textbf{9.1}} & 5.4 & 1 & 7.7\% & 382.05 & 1.49 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1570${}_{-32,+41}$}} & \textcolor{bronze}{\textbf{0.419}} & 9.2 & \textcolor{bronze}{\textbf{5.2}} & 1 & 7.1\% & 5534.77 & 3.87 \\ +TabM (T) & 1515${}_{-24,+40}$ & 0.407 & 11.0 & \textcolor{bronze}{\textbf{5.2}} & 1 & 6.8\% & 2466.21 & 0.18 \\ +CatBoost (T+E) & 1503${}_{-25,+33}$ & 0.379 & 11.5 & 6.9 & 1 & \textcolor{bronze}{\textbf{6.7\%}} & 1372.94 & 0.56 \\ +CatBoost (T) & 1498${}_{-35,+36}$ & 0.365 & 11.8 & 6.1 & 1 & 6.9\% & 1372.94 & 0.07 \\ +LightGBM (T) & 1486${}_{-27,+41}$ & 0.318 & 12.1 & 10.2 & 0 & 8.5\% & 382.05 & 0.25 \\ +XGBoost (T+E) & 1478${}_{-31,+37}$ & 0.321 & 12.5 & 8.0 & 0 & 8.5\% & 685.87 & 1.45 \\ +CatBoost (D) & 1475${}_{-33,+42}$ & 0.345 & 12.6 & 5.9 & 2 & 8.2\% & 5.72 & 0.08 \\ +ModernNCA (T) & 1434${}_{-25,+45}$ & 0.278 & 14.2 & 6.8 & 2 & 8.3\% & 4879.89 & 0.52 \\ +ModernNCA (T+E) & 1433${}_{-39,+32}$ & 0.373 & 14.4 & \textcolor{bronze}{\textbf{5.2}} & 2 & 8.1\% & 4879.89 & 8.74 \\ +XGBoost (T) & 1431${}_{-29,+44}$ & 0.266 & 14.3 & 11.6 & 0 & 9.0\% & 685.87 & 0.21 \\ +TabM (D) & 1395${}_{-30,+35}$ & 0.271 & 16.0 & 9.9 & 0 & 10.6\% & 10.21 & 0.14 \\ +TorchMLP (T+E) & 1377${}_{-34,+33}$ & 0.222 & 16.7 & 11.1 & 0 & 9.6\% & 2389.22 & 2.16 \\ +RealMLP (T) & 1373${}_{-30,+41}$ & 0.202 & 16.8 & 12.3 & 0 & 10.3\% & 5534.77 & 0.19 \\ +EBM (T+E) & 1372${}_{-25,+31}$ & 0.180 & 16.8 & 10.3 & 0 & 12.7\% & 914.23 & 0.22 \\ +FastaiMLP (T+E) & 1332${}_{-33,+35}$ & 0.196 & 18.6 & 10.6 & 0 & 13.1\% & 618.90 & 4.77 \\ +ModernNCA (D) & 1323${}_{-34,+33}$ & 0.134 & 19.1 & 10.2 & 1 & 12.6\% & 14.78 & 0.35 \\ +EBM (T) & 1306${}_{-29,+36}$ & 0.122 & 19.7 & 14.4 & 0 & 13.5\% & 914.23 & 0.03 \\ +EBM (D) & 1274${}_{-31,+39}$ & 0.136 & 21.1 & 7.8 & 3 & 14.6\% & 4.31 & 0.05 \\ +XGBoost (D) & 1264${}_{-29,+31}$ & 0.105 & 21.6 & 16.6 & 0 & 12.1\% & 1.77 & 0.12 \\ +ExtraTrees (T+E) & 1259${}_{-26,+38}$ & 0.104 & 21.8 & 13.1 & 0 & 14.1\% & 189.76 & 0.74 \\ +TorchMLP (T) & 1250${}_{-33,+36}$ & 0.096 & 22.2 & 18.0 & 0 & 12.3\% & 2389.22 & 0.15 \\ +TabDPT (D) & 1244${}_{-31,+39}$ & 0.176 & 22.4 & 5.7 & \textcolor{bronze}{\textbf{4}} & 13.6\% & 22.61 & 8.55 \\ +RealMLP (D) & 1243${}_{-30,+39}$ & 0.090 & 22.5 & 18.5 & 0 & 12.6\% & 35.38 & 0.20 \\ +FastaiMLP (T) & 1233${}_{-35,+32}$ & 0.089 & 23.0 & 17.9 & 0 & 15.2\% & 618.90 & 0.30 \\ +RandomForest (T+E) & 1221${}_{-37,+35}$ & 0.107 & 23.5 & 12.6 & 0 & 14.6\% & 323.74 & 0.74 \\ +ExtraTrees (T) & 1207${}_{-32,+37}$ & 0.079 & 24.1 & 14.9 & 0 & 15.5\% & 189.76 & 0.08 \\ +LightGBM (D) & 1206${}_{-31,+34}$ & 0.076 & 24.0 & 21.1 & 0 & 13.5\% & 1.79 & 0.12 \\ +RandomForest (T) & 1166${}_{-33,+35}$ & 0.076 & 25.8 & 15.1 & 0 & 16.0\% & 323.74 & 0.08 \\ +TorchMLP (D) & 1080${}_{-37,+35}$ & 0.021 & 28.9 & 26.2 & 0 & 17.8\% & 6.83 & 0.15 \\ +FastaiMLP (D) & 1052${}_{-33,+32}$ & 0.026 & 29.9 & 26.8 & 0 & 20.8\% & 2.91 & 0.37 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.011 & 31.4 & 29.3 & 0 & 22.6\% & 0.38 & 0.04 \\ +Linear (T+E) & 996${}_{-32,+37}$ & 0.042 & 31.6 & 22.6 & 0 & 27.7\% & 51.79 & 0.22 \\ +Linear (T) & 958${}_{-32,+36}$ & 0.026 & 32.6 & 24.7 & 0 & 28.7\% & 51.79 & 0.08 \\ +Linear (D) & 950${}_{-35,+34}$ & 0.019 & 32.8 & 17.7 & 1 & 29.9\% & 1.61 & 0.10 \\ +ExtraTrees (D) & 926${}_{-40,+43}$ & 0.010 & 33.4 & 30.0 & 0 & 25.4\% & 0.25 & 0.04 \\ +KNN (T+E) & 695${}_{-49,+48}$ & 0.000 & 37.7 & 37.4 & 0 & 47.7\% & 3.57 & 0.19 \\ +KNN (T) & 606${}_{-51,+45}$ & 0.000 & 38.7 & 38.6 & 0 & 49.6\% & 3.57 & 0.04 \\ +KNN (D) & 451${}_{-61,+67}$ & 0.000 & 40.0 & 39.9 & 0 & 58.1\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full-cls/tabarena_leaderboard.csv b/data/full-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..c67221bbadf4981061f87049214644c56600510d --- /dev/null +++ b/data/full-cls/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABM (tuned + ensemble),34341.376551290836,9.60910518720136,4223.58680397795,3.4317121842487635,0.5077918677314276,0.4683217107084951,0.05713135019989292,0.0464892851251854,45864.929770826835,134.55326674312073,7.828947368421052,0.17047,8245.070318195554,2.6588550435172187,2466.2108648716858,1.5022465694891438,0.47136645123500964,0.4458821771498494,0.018407448383057634,0.00923027101272119,39661.48674821285,121.43882105497326,5.5,6,7,2,23,1612.0,43.0,31.6,0.8292763157894737,0.3331708131050236 +AutoGluon 1.3 (4h),7918.9967654708535,21.9962719316371,2834.6060407147975,3.0873029091200266,0.43572604440948826,0.37369319622384795,0.04552082156008848,0.02756565625162793,31011.170971980166,263.893057970457,7.842105263157895,0.159155,7086.632828738954,3.4718479580349393,1322.7174946761893,2.35500290690449,0.3593825434326649,0.3179495368931894,0.020277285974108883,0.008552420217732355,23372.022224152715,142.27612475951975,4.0,11,1,4,22,1611.2,35.5,33.4,0.8289473684210527,0.40970136828052783 +GBM (tuned + ensemble),2957.8209862183407,11.98416593290909,759.1477152793035,2.5417865978850473,0.5957286671834613,0.5543276443877001,0.0768692051258278,0.04754330288453222,8568.794143617388,184.93302208876983,9.131578947368421,0.16698000000000002,1552.8742877377404,3.4757837878333198,382.05361557599804,1.4876036641335277,0.6251845339504845,0.567482310945806,0.035330419910613864,0.01740708281579771,7514.342598912193,103.35126359750211,7.75,1,3,1,33,1570.0,36.6,30.1,0.7967105263157894,0.18453518660562004 +REALMLP (tuned + ensemble),75228.3606640392,17.6439790335315,5880.304462786179,8.382833590222491,0.5811880944579212,0.5195249746896198,0.0707111257986013,0.04178949688851757,134045.19581697704,292.40009500207566,9.157894736842104,0.166985,21596.535681260957,5.895895957946777,5534.7676914745925,3.874032586779933,0.5453504649932683,0.5412877121937096,0.030830693333615045,0.017527041551138753,90524.58108841689,250.02806627683862,8.0,1,3,4,30,1569.6,40.5,31.7,0.7960526315789473,0.1918840950297688 +TABM (tuned),34341.376551290836,1.0683778852050068,4223.58680397795,0.37888409513825755,0.5928893193157706,0.5290373167203445,0.0681053050300204,0.060722902369039045,45864.929770826835,13.890136033418576,11.039473684210526,0.172585,8245.070318195554,0.2723346948623657,2466.2108648716858,0.17557376557934842,0.5877464635288101,0.5713169031682397,0.03216876796042978,0.018156210036345113,39661.48674821285,10.546435553124088,11.0,1,5,3,29,1515.2,40.0,23.2,0.7490131578947369,0.1928119823795554 +CAT (tuned + ensemble),16504.07579820414,2.581779239609925,3201.0265707590415,0.8915752484646539,0.6209138650557025,0.5659344601903572,0.06713271363366402,0.0405214229704263,27193.868813721343,44.08651163500922,11.486842105263158,0.16040500000000002,5046.900271190538,1.2630292971928916,1372.9411122807264,0.5562989902961428,0.6153632608570143,0.6088415859515164,0.03472659799727995,0.022138825584558906,20541.46486167592,37.76547447057217,10.0,1,1,1,35,1502.8,32.3,24.1,0.7378289473684211,0.14412801379906642 +CAT (tuned),16504.07579820414,0.42638187025025576,3201.0265707590415,0.12024042770987867,0.6346801229162512,0.5780524851760976,0.06937153661241705,0.04017281020087998,27193.868813721343,6.229177098348047,11.75,0.161705,5046.900271190538,0.12877851062350804,1372.9411122807264,0.07385371803542701,0.6314852452199684,0.6377829597443649,0.036574094595854534,0.02550717929301198,20541.46486167592,4.9725379405984995,12.0,1,3,2,32,1498.2,35.5,34.4,0.73125,0.16319274327461483 +GBM (tuned),2957.8209862183407,1.875632255676894,759.1477152793035,0.5049369881097493,0.6821201235665096,0.6172522665748253,0.08458189231987852,0.057321874164711774,8568.794143617388,32.37091249654915,12.092105263157896,0.16877999999999999,1552.8742877377404,0.5045170254177518,382.05361557599804,0.2538713244201605,0.7168421399910502,0.6480642307818962,0.039126655178203906,0.024837453552070406,7514.342598912193,15.175591971121808,11.0,0,0,0,38,1486.3,40.1,26.7,0.7226973684210526,0.097905840634771 +XGB (tuned + ensemble),5957.374200563333,6.456417350253166,1167.526219278486,2.7907218103162132,0.6794057655741552,0.6314178768057309,0.08548974406302369,0.05849041431964196,10832.944520166355,137.58836217438702,12.513157894736842,0.165745,1680.0658507664998,2.2809726662105985,685.86510540535,1.4547593315263065,0.7126419091535116,0.6462162901594695,0.05102562935485577,0.027448154499449852,8251.297786111467,74.47019952683146,10.5,0,2,0,36,1478.0,36.4,30.3,0.712171052631579,0.12563292181614538 +CAT (default),220.2627424415789,0.2710267032099049,109.9732905896213,0.13662992734425208,0.6551829671744798,0.6178972125596192,0.08194323858116616,0.044223095820168916,404.9759898295662,6.476751807794606,12.552631578947368,0.16373,18.264583627382912,0.17750852637820774,5.723546572951673,0.0761539571798428,0.6872314742824219,0.6578183507970544,0.04295775865131041,0.020841879336463684,103.69107151573829,5.120402547941957,14.0,2,2,2,32,1475.0,41.3,32.8,0.7111842105263158,0.1691212806814804 +MNCA (tuned),57356.284085895306,20.170870465214488,5990.817791505437,2.0485901061394993,0.721681429841744,0.6240148041756003,0.08261656685639085,0.061724748753513047,80456.91211763977,129.4389431731015,14.197368421052632,0.17054999999999998,14186.536935488384,0.6020842525694106,4879.890404506269,0.5247194359730172,0.7744612480983963,0.6453947144753973,0.05917481703324834,0.03283118689868241,66956.02864547497,27.735396922747526,13.0,2,1,1,34,1434.4,44.3,24.2,0.6700657894736842,0.14750425482010807 +XGB (tuned),5957.374200563333,1.301870340352867,1167.526219278486,0.6678561539347552,0.7344435643624297,0.6738277145612771,0.0896002321196869,0.06302079730298164,10832.944520166355,28.47499330573137,14.289473684210526,0.16848000000000002,1680.0658507664998,0.3827125522825453,685.86510540535,0.2050912539994952,0.7444722954009122,0.6783301736911117,0.05914830069662402,0.031553385715146064,8251.297786111467,11.461364611937853,13.0,0,0,0,38,1430.7,43.2,29.0,0.6677631578947368,0.08653175576997406 +MNCA (tuned + ensemble),57356.284085895306,531.7593351699455,5990.817791505437,50.07821547982156,0.6269167185273185,0.5688952055274242,0.08060765708238908,0.06906071635245807,80456.91211763977,3301.6213661597712,14.421052631578947,0.183035,14186.536935488384,14.17156207561493,4879.890404506269,8.743516387788919,0.6265388929939845,0.5384271404386443,0.05139790610389516,0.020567711460507468,66956.02864547497,548.1975258046007,10.5,2,3,3,30,1432.9,31.2,38.5,0.6644736842105263,0.191685722237091 +TABM (default),150.0762373017289,1.2507152029645372,19.886819500646006,0.46487430096802723,0.7289050360420961,0.6965301783060158,0.10592279294349498,0.08101332667275364,189.61145131944258,14.042337078320946,15.986842105263158,0.17246,31.126562476158142,0.20260944763819377,10.213381764059356,0.1381032773929915,0.8404844924976476,0.7209220250926951,0.04116190641707207,0.026799112086523788,144.96049255349743,10.968725907290855,15.5,0,1,0,37,1394.6,34.3,29.3,0.625328947368421,0.1012312665218443 +NN_TORCH (tuned + ensemble),24331.947126566527,16.31368946276213,3050.9102763481856,4.325404104362444,0.77831117981961,0.7280978052549151,0.09611794604638334,0.06918787987081731,56038.718047349466,227.24142067178357,16.657894736842106,0.17071999999999998,9097.789536105262,5.056680162747702,2389.2199648500327,2.157502904371376,0.9031626632034824,0.7953860237848694,0.057815073109235726,0.04001808164772668,44480.90343297912,173.17543399472862,16.0,0,1,0,37,1377.4,32.9,33.9,0.6085526315789473,0.08992242121508189 +REALMLP (tuned),75228.3606640392,0.8387730677922566,5880.304462786179,0.4572085739395715,0.7977234575542793,0.7117970907679741,0.10277317429931199,0.07240302920804677,134045.19581697704,13.981658721822997,16.763157894736842,0.16874,21596.535681260957,0.2674341731601291,5534.7676914745925,0.19066645138299287,0.8787741747777617,0.7098417849798737,0.06513447344574208,0.030556202222580157,90524.58108841689,11.215626382435188,15.0,0,0,1,37,1372.9,40.7,29.8,0.6059210526315789,0.08155448908452341 +EBM (tuned + ensemble),36729.440274559965,1.3371900389766136,6141.104384884199,0.5036823041953499,0.8200760362636922,0.7880989833945884,0.12732954487559783,0.09980891630857357,25271.27145534202,19.518642702178116,16.842105263157894,0.17122500000000002,2366.879786974854,0.4240463972091675,914.2329798556116,0.21634762578811195,0.9326307523783561,0.8541119710126825,0.06421221295803392,0.029878206588838063,15273.913117491418,11.254406005139426,18.0,0,1,2,35,1371.9,30.9,25.0,0.6039473684210527,0.09727298932291582 +FASTAI (tuned + ensemble),7309.51755415473,18.692008190266574,1376.1098802486467,8.473426897342513,0.8036000089866165,0.7693689222034934,0.13128262599690715,0.07635843392131615,18964.092381812123,455.5897128961598,18.605263157894736,0.178125,3087.37076303694,11.787012616793314,618.8953909329178,4.7655686359255345,1.0,0.8392150561095604,0.06050481172224609,0.04265820858044293,15284.817189242676,443.8905026950689,19.5,0,1,2,35,1332.4,34.1,32.1,0.5598684210526316,0.09458192988823165 +MNCA (default),304.22019695963775,10.484658345144394,17.60828380729721,1.3061868722894623,0.8657835470756838,0.7849609325396195,0.12636324016621214,0.08578266113037257,254.62493914649235,74.63976386946672,19.105263157894736,0.18519,31.500762327512106,0.5732622504234314,14.777266169000828,0.34634581634079226,1.0,0.8538610003193423,0.0758470874396368,0.033508778383699894,209.09978531409226,23.91921150117286,20.0,1,0,0,37,1323.4,32.2,33.7,0.5473684210526316,0.09850033776496639 +EBM (tuned),36729.440274559965,0.18373215324000308,6141.104384884199,0.08212434505580339,0.8777961641049835,0.8290253290078035,0.13507481091711523,0.10751000517976379,25271.27145534202,2.5019643980329747,19.736842105263158,0.17203000000000002,2366.879786974854,0.0449512971772088,914.2329798556116,0.02528246646038782,1.0,0.8880009340179814,0.0693392859381734,0.029722096900963217,15273.913117491418,1.252412448907763,20.0,0,0,0,38,1306.1,35.2,28.6,0.531578947368421,0.06937660396340457 +EBM (default),119.68166406294058,0.19009479611937763,11.429209024387047,0.10185762188607354,0.8643875945345197,0.8392043943375042,0.1458450211334854,0.11432893157179487,109.6134925105603,3.4110931424674527,21.13157894736842,0.17447000000000001,9.92618230978648,0.06371633741590713,4.31382445805991,0.0475851422516083,1.0,0.9361430448754253,0.08168138296631317,0.030962308088007345,60.70178540099199,2.7834768100426253,22.5,3,0,0,35,1274.4,38.7,30.3,0.4967105263157895,0.12864894343930836 +XGB (default),13.116732126927516,0.5742131232518202,3.202512268619222,0.2981186068489448,0.8951319461246384,0.8336038539032103,0.12066752719587548,0.10651436302184289,31.46292861336276,13.4756369742995,21.57894736842105,0.17317,5.653352538744608,0.30113152662913,1.771208861779989,0.11707781619763814,1.0,0.9042877342540053,0.09274125507684994,0.05498474330695087,28.26053761224749,9.127662964815336,20.0,0,0,1,37,1264.1,30.4,28.1,0.4855263157894737,0.060179048737854506 +XT (tuned + ensemble),1317.4209560674533,2.9519454171085915,472.65138083581655,1.3657584923642982,0.8963117961824938,0.8348074090017465,0.1405782550641364,0.10675622457060303,4571.615940832833,75.60244638381447,21.763157894736842,0.17667500000000003,756.8230986197789,1.8136235740449693,189.76252609436062,0.7431041876698922,1.0,0.925198807356131,0.08047883805391481,0.04755805472989214,2805.66154207989,66.39288968996527,24.5,0,0,2,36,1258.6,37.4,25.9,0.48092105263157897,0.07618129671484886 +NN_TORCH (tuned),24331.947126566527,0.8716282456241854,3050.9102763481856,0.24396545036982029,0.903659699715494,0.8318056659819181,0.12257145815909602,0.09467286855145444,56038.718047349466,12.138164397560297,22.19736842105263,0.17446499999999998,9097.789536105262,0.29952494303385413,2389.2199648500327,0.15177921475257505,1.0,0.8931127230162494,0.08158404258521362,0.058129310984625954,44480.90343297912,9.196372470124006,23.0,0,0,0,38,1249.8,35.6,32.3,0.4700657894736842,0.05547138225619901 +TABDPT (default),171.71139350780967,66.09824930987163,27.724576482795502,22.626481529214185,0.8235947911156236,0.7824186460895173,0.13606715011080084,0.10520495211781336,481.10896045076544,1338.9171702872993,22.36842105263158,0.190385,97.80311637454562,28.07416233751509,22.609050986069803,8.552450841932743,1.0,0.9381135551650721,0.09923475589347508,0.036528422149403744,400.67828468381333,1123.8959746745188,26.0,4,1,2,31,1243.9,38.8,30.1,0.46578947368421053,0.17594401965774872 +REALMLP (default),478.8822499715096,0.8300057720022592,36.68387630133338,0.4476794774606417,0.9101816949003365,0.8422140679435889,0.12642753491681138,0.09299113474461743,851.5414617865436,13.994120873198558,22.460526315789473,0.17601499999999998,136.44702684879303,0.27011087603039213,35.38408676659952,0.2030042614924829,1.0,0.8962648981995617,0.09831433782888765,0.05040272045303727,578.3100219480689,11.599510934138282,24.0,0,0,0,38,1243.0,38.1,29.3,0.46348684210526314,0.05407580940063217 +FASTAI (tuned),7309.51755415473,1.0496307073977955,1376.1098802486467,0.623937267000547,0.9108193738616642,0.8433783973317445,0.1518634764907348,0.09831442850495511,18964.092381812123,32.00078545496663,23.039473684210527,0.18023499999999998,3087.37076303694,0.8054822285970051,618.8953909329178,0.2978802219128553,1.0,0.8980232157250716,0.07025074812243065,0.058521388869295116,15284.817189242676,26.536834640421894,23.0,0,0,0,38,1233.3,31.8,34.2,0.44901315789473684,0.05582114757877186 +RF (tuned + ensemble),2309.3465478268977,2.3587986137434753,541.3031953907538,1.2662572218585502,0.8931944011226663,0.8558927648574508,0.1459939192257476,0.11861627805797718,5371.163113535875,67.79453318661523,23.526315789473685,0.177925,871.1966819789675,1.9029027620951335,323.74369638605225,0.7428875097152683,1.0,0.981324029272762,0.09322736498620338,0.06550559444306445,4278.677975908691,61.67862848692378,26.0,0,1,2,35,1221.4,34.1,36.4,0.4368421052631579,0.07938525193850257 +GBM (default),7.9087888545460165,0.5753568479889317,2.951996847158713,0.17011749256975625,0.923947709053949,0.8737571231597056,0.13539212068477616,0.10486902039951279,31.85523047906877,10.81395814590769,24.0,0.172975,5.532836645179325,0.2585195038053725,1.7913477923414471,0.12049981156984965,1.0,0.9188736260466357,0.10176889478951551,0.05903445835201446,25.045220341015206,6.549914554959342,23.5,0,0,0,38,1206.2,33.7,30.8,0.425,0.04730189953336336 +XT (tuned),1317.4209560674533,0.3043035832762021,472.65138083581655,0.1722314796858175,0.9205594165466939,0.8736080955439065,0.15476770307289778,0.11743362385671523,4571.615940832833,8.349379059762423,24.06578947368421,0.17796,756.8230986197789,0.18769407272338867,189.76252609436062,0.07878183958882805,1.0,0.9572374911700607,0.0959142774003483,0.052196444737844405,2805.66154207989,8.013491457037578,28.0,0,1,0,37,1206.6,36.9,31.5,0.42335526315789473,0.0671365562475414 +RF (tuned),2309.3465478268977,0.23789871352457861,541.3031953907538,0.15647241398955916,0.9239206793690314,0.8829729917217929,0.15967745362634034,0.13085809308763893,5371.163113535875,7.241889916329925,25.776315789473685,0.178915,871.1966819789675,0.17230602105458576,323.74369638605225,0.07643497412773152,1.0,0.9922206683839487,0.10190746366276843,0.0730160635989342,4278.677975908691,6.216263662191708,28.0,0,1,1,36,1165.9,34.3,32.2,0.3805921052631579,0.06602164552633603 +NN_TORCH (default),48.66164081361559,0.6504810580733227,11.536659167937149,0.237589986723434,0.9794017223291384,0.9468958024782982,0.17787410882723695,0.13771350690183579,155.18773864824473,10.878820110101595,28.855263157894736,0.180355,26.916632894674937,0.2675716214709811,6.83469910157457,0.14703020953097523,1.0,1.0,0.11760977491757879,0.0802335689086278,137.05069981706868,8.619821917518276,29.0,0,0,0,38,1079.8,34.2,37.0,0.3036184210526316,0.03810259007354377 +FASTAI (default),31.12571889106293,1.103970385504048,5.0919225272079345,0.5139922583887925,0.9739593024362323,0.9384236129261808,0.208368110921483,0.16210807510159564,74.47424680202033,27.97257070113272,29.86842105263158,0.19183499999999998,12.713354892200893,0.7982388072543674,2.9120182447539116,0.36810695156439827,1.0,1.0,0.15842934034818912,0.11423976777713259,60.1261932941261,24.301698162325565,32.5,0,0,0,38,1052.4,31.3,32.2,0.27828947368421053,0.03726976081535309 +RF (default),3.928520040972191,0.1435067986187182,0.8021093004373405,0.07125612411976835,0.9894800211663467,0.9597397470174345,0.2262777412236202,0.2238535750194389,6.102631035159317,3.7416299644570024,31.43421052631579,0.21025,1.1966572999954224,0.08589340580834282,0.3813090053938437,0.03721195658349352,1.0,1.0,0.17586669886220047,0.11508233454921707,5.480722222590385,3.4579154094346745,33.75,0,0,0,38,1000.0,0.0,0.0,0.23914473684210527,0.03408922058843364 +LR (tuned + ensemble),310.9206490230839,1.856038474618343,112.43523004573552,0.6244822981818133,0.95841567098672,0.9468661893017276,0.2771489697764033,0.25616106870038724,1090.6543235756506,24.154836057241496,31.55263157894737,0.20569500000000002,172.99803659651013,0.33067578077316284,51.78500762114817,0.22385943240534312,1.0,1.0,0.21624639780784433,0.13955605246047054,696.2608974821628,13.25530093456624,34.5,0,0,1,37,995.6,36.9,31.3,0.2361842105263158,0.044206695428419006 +LR (tuned),310.9206490230839,0.5155129476597434,112.43523004573552,0.17331679222553462,0.9738366106666893,0.9540848137730095,0.28722955851815496,0.26622817101149315,1090.6543235756506,6.903044838007482,32.578947368421055,0.20751,172.99803659651013,0.12874411212073433,51.78500762114817,0.07805190196778514,1.0,1.0,0.23103244715285115,0.1400511647789566,696.2608974821628,4.422167155622043,35.5,0,0,1,37,958.2,35.1,31.1,0.21052631578947367,0.0404226332874587 +LR (default),7.59960079534709,0.5285763780973111,2.68894957171299,0.18897855365485863,0.9806670457343031,0.9603029869880186,0.29905331478343605,0.29059096925971106,27.747857838408066,7.876763349331487,32.8421052631579,0.21230500000000002,5.359276652336121,0.13779839674631755,1.6116061178401027,0.09774404154712064,1.0,1.0,0.23702577885376547,0.1541261802297627,18.023625361427616,4.735441569338942,36.0,1,0,0,37,949.7,33.7,34.1,0.20394736842105263,0.056652505447899036 +XT (default),2.7359117016457675,0.18087529669031066,0.7550887156747417,0.07422230753863952,0.9898367767783371,0.9684993030328994,0.2540897022242163,0.2541707312202283,5.173741811459425,4.243951698357253,33.44736842105263,0.21292,1.01790091726515,0.09051434199015299,0.24605929188859293,0.04072451222400395,1.0,1.0,0.18129521821713152,0.12375220501674408,4.456264068369281,3.7424721126192138,35.0,0,0,0,38,925.5,42.7,39.1,0.18881578947368421,0.03328575040660496 +KNN (tuned + ensemble),210.21058051676778,11.66904854404996,28.3636621606113,0.77695539036087,1.0,0.9950511983082982,0.47685866576144204,0.591834145685782,79.33933274576572,77.9475913060204,37.73684210526316,0.318405,17.435897601975334,0.2367298404375712,3.56631606767575,0.18997109296167614,1.0,1.0,0.40612853980290964,0.6594749568861824,57.50317350047459,12.487074071232104,39.0,0,0,0,38,695.4,47.8,48.1,0.08157894736842106,0.02674015281074318 +KNN (tuned),210.21058051676778,1.8054817143936601,28.3636621606113,0.13106223823409818,1.0,0.997541520007056,0.49580085237079846,0.6380122373615625,79.33933274576572,12.511864612142434,38.73684210526316,0.322975,17.435897601975334,0.0851174063152737,3.56631606767575,0.040417757021976156,1.0,1.0,0.44242485760502603,0.6943833905865349,57.50317350047459,2.342964973116946,40.0,0,0,0,38,605.7,44.3,50.4,0.056578947368421055,0.025912413932803406 +KNN (default),1.7449495283483762,0.22627578220869365,0.489346568018936,0.038714559202156204,1.0,1.0,0.5811563583799122,0.9385425054610625,1.0055419249185589,2.3649007838074403,40.03947368421053,0.382765,0.27595198154449463,0.036337282922532826,0.07126887487893994,0.021006283652748647,1.0,1.0,0.5374127836675906,1.0,1.0,1.2165713596834893,41.0,0,0,0,38,450.7,66.1,60.9,0.02401315789473684,0.025076581229724376 diff --git a/data/full-cls/time_plot.pdf b/data/full-cls/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..87c552f8f53d3e24fe5b646b299638ba5a0269c0 Binary files /dev/null and b/data/full-cls/time_plot.pdf differ diff --git a/data/full-cls/time_plot.png.zip b/data/full-cls/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..37122cdeb1b531b39b2d2530daa036e5ebf8752f --- /dev/null +++ b/data/full-cls/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1425f58f7556e77814ca63062d6ffbc1d9f5f0b02dfed773d91d750d54675e9 +size 338592 diff --git a/data/full-cls/tuning-impact-elo-horizontal.pdf b/data/full-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8673de4bd9a473b784efe27f2176b597f1eacb43 Binary files /dev/null and b/data/full-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full-cls/tuning-impact-elo-horizontal.png.zip b/data/full-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..5ef3fa53850b2d68b328dc51473147be47ff79e0 --- /dev/null +++ b/data/full-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37484f6a707cfe666b8b94eeb6f054fbe0d08aa807104a6d7ee3e03f4849e4ab +size 129171 diff --git a/data/full-cls/tuning-impact-elo.pdf b/data/full-cls/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cc9b8ee783509145f7f040ecd35942ff9ffd0cf2 Binary files /dev/null and b/data/full-cls/tuning-impact-elo.pdf differ diff --git a/data/full-cls/tuning-impact-elo.png.zip b/data/full-cls/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..19dfcaca01d3368e328477a6b19468a4ea73716f --- /dev/null +++ b/data/full-cls/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a3ecf402181f96fdf8623fbd8c1a34553df3cee6308ad17c0be48f153266a2a7 +size 126363 diff --git a/data/full-imputed-cls/figures/critical-diagram.pdf b/data/full-imputed-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e5bedda14e922e30e837de6f9751c4e999fc16d8 Binary files /dev/null and b/data/full-imputed-cls/figures/critical-diagram.pdf differ diff --git a/data/full-imputed-cls/figures/critical-diagram.png.zip b/data/full-imputed-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..05a97a2c8242d216adca63720b87600448ffba24 --- /dev/null +++ b/data/full-imputed-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3f39791709f73b5e54c7c8a889281edd9c1b1f4dedddaa62e35ea066760eb0f2 +size 319938 diff --git a/data/full-imputed-cls/leaderboard.tex b/data/full-imputed-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..0c107618a84cb104f30ecd22332d1c5a0783b668 --- /dev/null +++ b/data/full-imputed-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1570${}_{-31,+30}$}} & \textcolor{gold}{\textbf{0.564}} & \textcolor{gold}{\textbf{9.7}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{6}} & \textcolor{gold}{\textbf{6.9\%}} & 1322.72 & 2.36 \\ +TabM (T+E) & \textcolor{gold}{\textbf{1570${}_{-29,+28}$}} & \textcolor{bronze}{\textbf{0.492}} & \textcolor{gold}{\textbf{9.7}} & 4.5 & 3 & \textcolor{bronze}{\textbf{8.2\%}} & 2466.21 & 1.50 \\ +LightGBM (T+E) & \textcolor{bronze}{\textbf{1534${}_{-30,+29}$}} & 0.404 & \textcolor{bronze}{\textbf{11.1}} & 6.3 & 1 & 9.8\% & 382.05 & 1.49 \\ +RealMLP (T+E) & 1531${}_{-30,+31}$ & 0.419 & 11.2 & 7.7 & 0 & 9.2\% & 5534.77 & 3.87 \\ +TabICL (D) & 1525${}_{-31,+22}$ & \textcolor{silver}{\textbf{0.501}} & 11.4 & \textcolor{bronze}{\textbf{3.5}} & \textcolor{silver}{\textbf{6}} & \textcolor{silver}{\textbf{7.9\%}} & 8.68 & 1.74 \\ +TabM (T) & 1484${}_{-30,+28}$ & 0.407 & 13.2 & 6.4 & 1 & 9.3\% & 2466.21 & 0.18 \\ +CatBoost (T+E) & 1476${}_{-26,+24}$ & 0.379 & 13.6 & 8.8 & 0 & 9.1\% & 1372.94 & 0.56 \\ +CatBoost (T) & 1468${}_{-29,+23}$ & 0.365 & 13.8 & 7.0 & 1 & 9.3\% & 1372.94 & 0.07 \\ +LightGBM (T) & 1460${}_{-28,+29}$ & 0.318 & 14.3 & 12.0 & 0 & 10.6\% & 382.05 & 0.25 \\ +XGBoost (T+E) & 1451${}_{-28,+28}$ & 0.321 & 14.7 & 9.3 & 0 & 10.7\% & 685.87 & 1.45 \\ +CatBoost (D) & 1450${}_{-27,+25}$ & 0.345 & 14.7 & 7.1 & 1 & 10.3\% & 5.72 & 0.08 \\ +TabPFNv2 (T+E) & 1450${}_{-32,+26}$ & 0.486 & 14.7 & \textcolor{gold}{\textbf{3.1}} & \textcolor{gold}{\textbf{8}} & 9.7\% & 3008.22 & 20.85 \\ +ModernNCA (T) & 1412${}_{-31,+25}$ & 0.278 & 16.5 & 9.7 & 1 & 10.5\% & 4879.89 & 0.52 \\ +ModernNCA (T+E) & 1410${}_{-30,+26}$ & 0.373 & 16.6 & 7.5 & 0 & 10.4\% & 4879.89 & 8.74 \\ +XGBoost (T) & 1408${}_{-26,+30}$ & 0.266 & 16.6 & 13.2 & 0 & 11.1\% & 685.87 & 0.21 \\ +TabPFNv2 (T) & 1384${}_{-29,+34}$ & 0.379 & 17.8 & 5.1 & 1 & 12.1\% & 3008.22 & 0.51 \\ +TabM (D) & 1373${}_{-27,+30}$ & 0.271 & 18.3 & 12.1 & 0 & 12.6\% & 10.21 & 0.14 \\ +TabPFNv2 (D) & 1363${}_{-32,+27}$ & 0.348 & 18.8 & 4.7 & 4 & 13.0\% & 3.37 & 0.32 \\ +TorchMLP (T+E) & 1358${}_{-26,+29}$ & 0.222 & 19.2 & 14.9 & 0 & 11.6\% & 2389.22 & 2.16 \\ +RealMLP (T) & 1354${}_{-27,+31}$ & 0.202 & 19.3 & 15.2 & 0 & 12.3\% & 5534.77 & 0.19 \\ +EBM (T+E) & 1352${}_{-28,+27}$ & 0.180 & 19.4 & 13.5 & 0 & 14.9\% & 914.23 & 0.22 \\ +FastaiMLP (T+E) & 1318${}_{-28,+24}$ & 0.196 & 21.1 & 12.7 & 0 & 14.6\% & 618.90 & 4.77 \\ +ModernNCA (D) & 1306${}_{-27,+27}$ & 0.134 & 21.7 & 12.3 & 1 & 14.7\% & 14.78 & 0.35 \\ +EBM (T) & 1293${}_{-31,+24}$ & 0.122 & 22.4 & 17.9 & 0 & 15.6\% & 914.23 & 0.03 \\ +EBM (D) & 1262${}_{-36,+27}$ & 0.136 & 23.9 & 11.4 & 1 & 16.6\% & 4.31 & 0.05 \\ +XGBoost (D) & 1255${}_{-32,+28}$ & 0.105 & 24.3 & 19.1 & 0 & 14.1\% & 1.77 & 0.12 \\ +ExtraTrees (T+E) & 1250${}_{-28,+27}$ & 0.104 & 24.5 & 16.7 & 0 & 15.8\% & 189.76 & 0.74 \\ +TorchMLP (T) & 1238${}_{-27,+28}$ & 0.096 & 25.0 & 21.5 & 0 & 14.1\% & 2389.22 & 0.15 \\ +RealMLP (D) & 1236${}_{-25,+24}$ & 0.090 & 25.2 & 21.0 & 0 & 14.6\% & 35.38 & 0.20 \\ +TabDPT (D) & 1236${}_{-35,+30}$ & 0.176 & 25.1 & 8.2 & 2 & 15.5\% & 22.61 & 8.55 \\ +FastaiMLP (T) & 1221${}_{-26,+28}$ & 0.089 & 25.9 & 21.0 & 0 & 16.6\% & 618.90 & 0.30 \\ +RandomForest (T+E) & 1213${}_{-22,+26}$ & 0.107 & 26.2 & 14.3 & 0 & 16.6\% & 323.74 & 0.74 \\ +LightGBM (D) & 1197${}_{-30,+28}$ & 0.076 & 26.9 & 23.5 & 0 & 15.4\% & 1.79 & 0.12 \\ +ExtraTrees (T) & 1196${}_{-34,+25}$ & 0.079 & 27.0 & 17.1 & 0 & 17.2\% & 189.76 & 0.08 \\ +RandomForest (T) & 1160${}_{-34,+35}$ & 0.076 & 28.6 & 16.4 & 0 & 17.8\% & 323.74 & 0.08 \\ +TorchMLP (D) & 1081${}_{-27,+24}$ & 0.021 & 31.9 & 29.1 & 0 & 19.3\% & 6.83 & 0.15 \\ +FastaiMLP (D) & 1052${}_{-33,+29}$ & 0.026 & 33.1 & 29.9 & 0 & 22.1\% & 2.91 & 0.37 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.011 & 34.8 & 32.8 & 0 & 24.1\% & 0.38 & 0.04 \\ +Linear (T+E) & 998${}_{-37,+25}$ & 0.042 & 34.9 & 25.5 & 0 & 29.2\% & 51.79 & 0.22 \\ +Linear (T) & 962${}_{-28,+24}$ & 0.026 & 36.0 & 30.5 & 0 & 30.1\% & 51.79 & 0.08 \\ +Linear (D) & 951${}_{-29,+27}$ & 0.019 & 36.3 & 27.7 & 0 & 31.1\% & 1.61 & 0.10 \\ +ExtraTrees (D) & 915${}_{-30,+32}$ & 0.010 & 37.2 & 34.3 & 0 & 26.7\% & 0.25 & 0.04 \\ +KNN (T+E) & 687${}_{-45,+40}$ & 0.000 & 41.7 & 41.4 & 0 & 48.5\% & 3.57 & 0.19 \\ +KNN (T) & 604${}_{-44,+49}$ & 0.000 & 42.7 & 42.5 & 0 & 50.3\% & 3.57 & 0.04 \\ +KNN (D) & 462${}_{-71,+70}$ & 0.000 & 43.9 & 43.7 & 0 & 58.7\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full-imputed-cls/tabarena_leaderboard.csv b/data/full-imputed-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..8387fe09f12392be3b2fb1fc5b405e2964554ed0 --- /dev/null +++ b/data/full-imputed-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7918.9967654708535,21.9962719316371,2834.6060407147975,3.0873029091200266,0.43572604440948826,0.43087386935667604,0.06891082959979876,0.03901043077138905,31011.170971980166,263.893057970457,9.68421052631579,0.159155,7086.632828738954,3.4718479580349393,1322.7174946761893,2.35500290690449,0.3593825434326649,0.428274830438652,0.03226297629046343,0.018303111934730745,23372.022224152715,142.27612475951975,6.5,6,3,1,28,1570.2,29.8,31.0,0.8026315789473685,0.3000744837987672 +TABM (tuned + ensemble),34341.376551290836,9.60910518720136,4223.58680397795,3.4317121842487635,0.5077918677314276,0.529397806419036,0.082399577608605,0.058005296379994874,45864.929770826835,134.55326674312073,9.697368421052632,0.17047,8245.070318195554,2.6588550435172187,2466.2108648716858,1.5022465694891438,0.47136645123500964,0.5457081140897149,0.0374695276183053,0.02990990162448854,39661.48674821285,121.43882105497326,8.0,3,2,2,31,1569.7,27.6,28.1,0.8023325358851675,0.22185652754538204 +GBM (tuned + ensemble),2957.8209862183407,11.98416593290909,759.1477152793035,2.5417865978850473,0.5957286671834613,0.6001432201748355,0.09787392914230995,0.058771855309898056,8568.794143617388,184.93302208876983,11.105263157894736,0.16698000000000002,1552.8742877377404,3.4757837878333198,382.05361557599804,1.4876036641335277,0.6251845339504845,0.6321931999484582,0.05068192653455761,0.02199552512560315,7514.342598912193,103.35126359750211,10.5,1,1,3,33,1533.5,28.5,29.1,0.7703349282296651,0.15791570680263833 +REALMLP (tuned + ensemble),75228.3606640392,17.6439790335315,5880.304462786179,8.382833590222491,0.5811880944579212,0.5735730659101549,0.09241691273181556,0.05324352029880537,134045.19581697704,292.40009500207566,11.18421052631579,0.166985,21596.535681260957,5.895895957946777,5534.7676914745925,3.874032586779933,0.5453504649932683,0.5623887498677591,0.04983329446999113,0.029565742001882556,90524.58108841689,250.02806627683862,9.0,0,0,3,35,1530.9,30.9,30.0,0.7685406698564593,0.1300254201050957 +TABICL (default),107.96023476381747,19.280086713506464,9.625347263659664,2.230965763361112,0.49900795570726975,0.5465023740990022,0.07942229531219994,0.05664735640934706,168.76491644336255,230.09998232516136,11.43421052631579,0.17207,25.732762111557854,3.449363695250617,8.684246340890724,1.7433667301105085,0.5497919378000328,0.567554590317891,0.03858333399786745,0.018532235328146802,137.81489160855577,127.65482709424812,10.0,6,4,1,27,1525.0,21.6,30.2,0.7628588516746412,0.28522470585642895 +TABM (tuned),34341.376551290836,1.0683778852050068,4223.58680397795,0.37888409513825755,0.5928893193157706,0.5852501756907074,0.09275955631038968,0.07202518870644577,45864.929770826835,13.890136033418576,13.197368421052632,0.172585,8245.070318195554,0.2723346948623657,2466.2108648716858,0.17557376557934842,0.5877464635288101,0.634980455939971,0.04800849593484757,0.03209545871697756,39661.48674821285,10.546435553124088,13.75,1,3,2,32,1484.0,28.0,29.4,0.7227870813397129,0.15576586816238858 +CAT (tuned + ensemble),16504.07579820414,2.581779239609925,3201.0265707590415,0.8915752484646539,0.6209138650557025,0.6100271864514402,0.09115568143278008,0.05171286523442811,27193.868813721343,44.08651163500922,13.56578947368421,0.16040500000000002,5046.900271190538,1.2630292971928916,1372.9411122807264,0.5562989902961428,0.6153632608570143,0.669054345099479,0.05787378892595674,0.023358363194257874,20541.46486167592,37.76547447057217,12.5,0,1,2,35,1475.5,23.8,25.7,0.7144138755980861,0.11335488880780162 +CAT (tuned),16504.07579820414,0.42638187025025576,3201.0265707590415,0.12024042770987867,0.6346801229162512,0.6217367947492924,0.0933016487821738,0.051383766225333326,27193.868813721343,6.229177098348047,13.828947368421053,0.161705,5046.900271190538,0.12877851062350804,1372.9411122807264,0.07385371803542701,0.6314852452199684,0.6847359221621947,0.05645008601114826,0.027822328565894415,20541.46486167592,4.9725379405984995,13.5,1,2,1,34,1467.9,22.9,28.9,0.708433014354067,0.14338794640411837 +GBM (tuned),2957.8209862183407,1.875632255676894,759.1477152793035,0.5049369881097493,0.6821201235665096,0.6578465553035225,0.10551186244180051,0.06848969819422726,8568.794143617388,32.37091249654915,14.276315789473685,0.16877999999999999,1552.8742877377404,0.5045170254177518,382.05361557599804,0.2538713244201605,0.7168421399910502,0.6818100706684505,0.05207128139882822,0.031112468640158004,7514.342598912193,15.175591971121808,13.0,0,0,0,38,1460.5,28.5,27.8,0.6982655502392344,0.08323051382797168 +XGB (tuned + ensemble),5957.374200563333,6.456417350253166,1167.526219278486,2.7907218103162132,0.6794057655741552,0.6681192465092817,0.10670226326209542,0.06938954053725854,10832.944520166355,137.58836217438702,14.68421052631579,0.165745,1680.0658507664998,2.2809726662105985,685.86510540535,1.4547593315263065,0.7126419091535116,0.7253164483987984,0.060361169858999864,0.027448154499449852,8251.297786111467,74.47019952683146,13.5,0,1,1,36,1450.8,27.4,27.3,0.6889952153110048,0.10802493613607358 +CAT (default),220.2627424415789,0.2710267032099049,109.9732905896213,0.13662992734425208,0.6551829671744798,0.6560274611111252,0.10281876446924927,0.055251622712656184,404.9759898295662,6.476751807794606,14.710526315789474,0.16373,18.264583627382912,0.17750852637820774,5.723546572951673,0.0761539571798428,0.6872314742824219,0.6661744510015815,0.05481736123368691,0.025874497523709752,103.69107151573829,5.120402547941957,16.0,1,3,1,33,1450.2,24.8,26.2,0.6883971291866029,0.14001756263385653 +TABPFNV2 (tuned + ensemble),10656.543301447302,101.4355180454533,2744.2670001010792,44.749282859570734,0.5138103973952525,0.5539076178208143,0.09695044510113665,0.08124536736386809,49872.05341476571,3444.4876051657507,14.723684210526315,0.17261500000000002,3494.967008225123,12.760541562239329,3008.2157047151595,20.848616639963154,0.5072451025993939,0.5739389565251636,0.04061436251124223,0.031106953519191773,28624.579287895787,825.5109643363919,9.5,8,4,1,25,1449.9,25.2,31.9,0.6880980861244019,0.32648046139872094 +MNCA (tuned),57356.284085895306,20.170870465214488,5990.817791505437,2.0485901061394993,0.721681429841744,0.6606257243361955,0.10501593687511648,0.07242005802280899,80456.91211763977,129.4389431731015,16.513157894736842,0.17054999999999998,14186.536935488384,0.6020842525694106,4879.890404506269,0.5247194359730172,0.7744612480983963,0.6781072751281167,0.06755654915093501,0.04344719805270737,66956.02864547497,27.735396922747526,15.5,1,0,0,37,1411.7,24.2,30.4,0.6474282296650717,0.10323935747363282 +MNCA (tuned + ensemble),57356.284085895306,531.7593351699455,5990.817791505437,50.07821547982156,0.6269167185273185,0.6106049466828749,0.10389701842794222,0.0798115271970102,80456.91211763977,3301.6213661597712,16.57894736842105,0.183035,14186.536935488384,14.17156207561493,4879.890404506269,8.743516387788919,0.6265388929939845,0.5779679694371859,0.06497465354642928,0.03974745118482769,66956.02864547497,548.1975258046007,12.5,0,2,4,32,1410.1,25.6,29.3,0.645933014354067,0.13333696320265415 +XGB (tuned),5957.374200563333,1.301870340352867,1167.526219278486,0.6678561539347552,0.7344435643624297,0.7080657004391169,0.11080653994457279,0.07384264693213777,10832.944520166355,28.47499330573137,16.63157894736842,0.16848000000000002,1680.0658507664998,0.3827125522825453,685.86510540535,0.2050912539994952,0.7444722954009122,0.7540446689272955,0.06946524781463731,0.034107460094903265,8251.297786111467,11.461364611937853,15.0,0,0,0,38,1408.1,29.3,26.0,0.6447368421052632,0.0754804237905414 +TABPFNV2 (tuned),10656.543301447302,3.410806728176206,2744.2670001010792,1.555769686886014,0.6207514015330685,0.635473706243979,0.1213068605022819,0.09569674055075465,49872.05341476571,114.00749994252256,17.82894736842105,0.1868,3494.967008225123,0.5060818235079447,3008.2157047151595,0.5144277113236544,0.7315480103218319,0.6473273087383917,0.08561151707807774,0.042273708446926114,28624.579287895787,25.570859321617846,13.0,1,8,1,28,1384.2,33.6,28.4,0.6175239234449761,0.19430531047345106 +TABM (default),150.0762373017289,1.2507152029645372,19.886819500646006,0.46487430096802723,0.7289050360420961,0.7331300276199935,0.1257035609004299,0.09205634091522938,189.61145131944258,14.042337078320946,18.30263157894737,0.17246,31.126562476158142,0.20260944763819377,10.213381764059356,0.1381032773929915,0.8404844924976476,0.7991898878806922,0.06154427249600947,0.03281092722346347,144.96049255349743,10.968725907290855,18.0,0,0,0,38,1372.7,29.7,27.0,0.6067583732057417,0.08288591058268369 +TABPFNV2 (default),11.400364582092442,0.8958970822786029,4.227300497658887,0.4575723059305235,0.6515996652210705,0.6914396497170463,0.1303285119303074,0.10434015301735906,53.73712299744585,29.471362460513575,18.842105263157894,0.1886,7.994279013739691,0.2908047080039978,3.368600991426515,0.3152861168047789,0.782945687586422,0.7254516789049502,0.07929605187577571,0.04313507441518306,41.34560285308973,17.75894630310718,17.0,4,1,4,29,1363.4,26.2,31.3,0.5944976076555024,0.21091254835613285 +NN_TORCH (tuned + ensemble),24331.947126566527,16.31368946276213,3050.9102763481856,4.325404104362444,0.77831117981961,0.7537646164336206,0.1163781574015359,0.08020239339635844,56038.718047349466,227.24142067178357,19.210526315789473,0.17071999999999998,9097.789536105262,5.056680162747702,2389.2199648500327,2.157502904371376,0.9031626632034824,0.8115035227779454,0.06782483230561193,0.04484844246885637,44480.90343297912,173.17543399472862,20.0,0,0,0,38,1358.1,28.5,25.2,0.5861244019138756,0.06710526855854969 +REALMLP (tuned),75228.3606640392,0.8387730677922566,5880.304462786179,0.4572085739395715,0.7977234575542793,0.7425716110334656,0.12348440991003401,0.0833370259997771,134045.19581697704,13.981658721822997,19.31578947368421,0.16874,21596.535681260957,0.2674341731601291,5534.7676914745925,0.19066645138299287,0.8787741747777617,0.7462218843777739,0.07816480714694801,0.04156599671061481,90524.58108841689,11.215626382435188,17.0,0,0,0,38,1354.0,30.3,26.4,0.583732057416268,0.06571086364796647 +EBM (tuned + ensemble),36729.440274559965,1.3371900389766136,6141.104384884199,0.5036823041953499,0.8200760362636922,0.8088827670590802,0.14906213620200864,0.10957479574309696,25271.27145534202,19.518642702178116,19.36842105263158,0.17122500000000002,2366.879786974854,0.4240463972091675,914.2329798556116,0.21634762578811195,0.9326307523783561,0.8581162937539966,0.0823298574360965,0.040258063765572304,15273.913117491418,11.254406005139426,19.0,0,0,1,37,1352.5,26.9,27.2,0.5825358851674641,0.07395314423107598 +FASTAI (tuned + ensemble),7309.51755415473,18.692008190266574,1376.1098802486467,8.473426897342513,0.8036000089866165,0.7950880902890147,0.14576373524794659,0.08697141843706842,18964.092381812123,455.5897128961598,21.07894736842105,0.178125,3087.37076303694,11.787012616793314,618.8953909329178,4.7655686359255345,1.0,0.8520647097452839,0.08731644523471122,0.05446587011526682,15284.817189242676,443.8905026950689,22.5,0,1,0,37,1318.4,23.6,27.4,0.5436602870813397,0.07903561014373495 +MNCA (default),304.22019695963775,10.484658345144394,17.60828380729721,1.3061868722894623,0.8657835470756838,0.8129667435543778,0.1468437071724757,0.09623287392908153,254.62493914649235,74.63976386946672,21.68421052631579,0.18519,31.500762327512106,0.5732622504234314,14.777266169000828,0.34634581634079226,1.0,0.8873627778923099,0.092306054050666,0.051103226670839136,209.09978531409226,23.91921150117286,22.0,1,0,0,37,1305.5,26.4,26.6,0.5299043062200957,0.0813133873734871 +EBM (tuned),36729.440274559965,0.18373215324000308,6141.104384884199,0.08212434505580339,0.8777961641049835,0.8466007670201623,0.1561788693733805,0.11711714380422451,25271.27145534202,2.5019643980329747,22.36842105263158,0.17203000000000002,2366.879786974854,0.0449512971772088,914.2329798556116,0.02528246646038782,1.0,0.8902021091214286,0.08835084225135731,0.04420635043937865,15273.913117491418,1.252412448907763,23.25,0,0,0,38,1293.4,24.0,30.1,0.5143540669856459,0.05589023030409936 +EBM (default),119.68166406294058,0.19009479611937763,11.429209024387047,0.10185762188607354,0.8643875945345197,0.8554720279520243,0.1660427740444218,0.12380007201638724,109.6134925105603,3.4110931424674527,23.92105263157895,0.17447000000000001,9.92618230978648,0.06371633741590713,4.31382445805991,0.0475851422516083,1.0,0.9291636979284108,0.09554880640873625,0.03755874966541475,60.70178540099199,2.7834768100426253,23.5,1,0,2,35,1261.7,26.3,35.3,0.47906698564593303,0.08747552817069155 +XGB (default),13.116732126927516,0.5742131232518202,3.202512268619222,0.2981186068489448,0.8951319461246384,0.8517662606396875,0.14066462775166078,0.11671767617274657,31.46292861336276,13.4756369742995,24.289473684210527,0.17317,5.653352538744608,0.30113152662913,1.771208861779989,0.11707781619763814,1.0,0.9333352642247335,0.09869044613168698,0.06054798187983182,28.26053761224749,9.127662964815336,24.0,0,0,0,38,1255.0,27.6,32.0,0.4706937799043062,0.05238109869332483 +XT (tuned + ensemble),1317.4209560674533,2.9519454171085915,472.65138083581655,1.3657584923642982,0.8963117961824938,0.8549584041789441,0.15792711096900047,0.11679184021571258,4571.615940832833,75.60244638381447,24.526315789473685,0.17667500000000003,756.8230986197789,1.8136235740449693,189.76252609436062,0.7431041876698922,1.0,0.927972542444979,0.09299076955407681,0.0685642717298475,2805.66154207989,66.39288968996527,27.5,0,0,1,37,1250.3,26.7,27.4,0.465311004784689,0.059965717626812 +NN_TORCH (tuned),24331.947126566527,0.8716282456241854,3050.9102763481856,0.24396545036982029,0.903659699715494,0.844246940465121,0.1412027905314641,0.10508427961749067,56038.718047349466,12.138164397560297,24.986842105263158,0.17446499999999998,9097.789536105262,0.29952494303385413,2389.2199648500327,0.15177921475257505,1.0,0.9016439793972164,0.10379368194908523,0.06014400941853005,44480.90343297912,9.196372470124006,25.5,0,0,0,38,1238.0,27.5,26.6,0.4548444976076555,0.046571648560440736 +TABDPT (default),171.71139350780967,66.09824930987163,27.724576482795502,22.626481529214185,0.8235947911156236,0.8095518428540334,0.15484004438847293,0.11618704539567898,481.10896045076544,1338.9171702872993,25.13157894736842,0.190385,97.80311637454562,28.07416233751509,22.609050986069803,8.552450841932743,1.0,0.9546223005833383,0.1092338236572048,0.046098002717393685,400.67828468381333,1123.8959746745188,30.0,2,0,3,33,1235.6,29.1,34.6,0.451555023923445,0.12181579800048477 +REALMLP (default),478.8822499715096,0.8300057720022592,36.68387630133338,0.4476794774606417,0.9101816949003365,0.8572978290726969,0.14643998258765722,0.10337066194167087,851.5414617865436,13.994120873198558,25.223684210526315,0.17601499999999998,136.44702684879303,0.27011087603039213,35.38408676659952,0.2030042614924829,1.0,0.9266197803085297,0.12190405737642834,0.05388197306450349,578.3100219480689,11.599510934138282,26.0,0,0,0,38,1236.3,23.3,24.8,0.4494617224880383,0.04751504776941452 +FASTAI (tuned),7309.51755415473,1.0496307073977955,1376.1098802486467,0.623937267000547,0.9108193738616642,0.8627464392392367,0.1657890106274598,0.10852453147986535,18964.092381812123,32.00078545496663,25.907894736842106,0.18023499999999998,3087.37076303694,0.8054822285970051,618.8953909329178,0.2978802219128553,1.0,0.9008669648648755,0.09789642437607732,0.06498589353723835,15284.817189242676,26.536834640421894,26.0,0,0,0,38,1220.9,27.5,25.6,0.4339114832535885,0.0477144255618073 +RF (tuned + ensemble),2309.3465478268977,2.3587986137434753,541.3031953907538,1.2662572218585502,0.8931944011226663,0.8648138716362284,0.16581461227526717,0.12803671283612092,5371.163113535875,67.79453318661523,26.210526315789473,0.177925,871.1966819789675,1.9029027620951335,323.74369638605225,0.7428875097152683,1.0,0.9737920909397468,0.10359622635024984,0.07479057874905146,4278.677975908691,61.67862848692378,28.5,0,1,1,36,1212.7,25.5,22.0,0.4270334928229665,0.06970279051676614 +GBM (default),7.9087888545460165,0.5753568479889317,2.951996847158713,0.17011749256975625,0.923947709053949,0.8891434210575603,0.1537736266611291,0.11510537852691831,31.85523047906877,10.81395814590769,26.94736842105263,0.172975,5.532836645179325,0.2585195038053725,1.7913477923414471,0.12049981156984965,1.0,0.9510718998865025,0.11068698010353623,0.06257356769064609,25.045220341015206,6.549914554959342,27.0,0,0,0,38,1197.3,28.0,29.3,0.4102870813397129,0.04248028712621324 +XT (tuned),1317.4209560674533,0.3043035832762021,472.65138083581655,0.1722314796858175,0.9205594165466939,0.8874484818522351,0.17160734150139612,0.12727957224512434,4571.615940832833,8.349379059762423,27.013157894736842,0.17796,756.8230986197789,0.18769407272338867,189.76252609436062,0.07878183958882805,1.0,0.9680533870550431,0.10587648465771732,0.07275376605805199,2805.66154207989,8.013491457037578,30.0,0,1,0,37,1195.5,24.3,33.7,0.40879186602870815,0.05857289566980501 +RF (tuned),2309.3465478268977,0.23789871352457861,541.3031953907538,0.15647241398955916,0.9239206793690314,0.8911097092103105,0.17793268234950518,0.14008679114783473,5371.163113535875,7.241889916329925,28.63157894736842,0.178915,871.1966819789675,0.17230602105458576,323.74369638605225,0.07643497412773152,1.0,0.9949804676438307,0.11788864699534024,0.07710400978410702,4278.677975908691,6.216263662191708,31.5,0,1,1,36,1160.3,34.2,33.5,0.37200956937799046,0.061067339762616434 +NN_TORCH (default),48.66164081361559,0.6504810580733227,11.536659167937149,0.237589986723434,0.9794017223291384,0.9512565557783356,0.19345936517046047,0.14725825272177415,155.18773864824473,10.878820110101595,31.88157894736842,0.180355,26.916632894674937,0.2675716214709811,6.83469910157457,0.14703020953097523,1.0,0.996424410496547,0.142508632176782,0.09058250288187904,137.05069981706868,8.619821917518276,33.0,0,0,0,38,1081.0,23.8,26.2,0.29814593301435405,0.034317307850256905 +FASTAI (default),31.12571889106293,1.103970385504048,5.0919225272079345,0.5139922583887925,0.9739593024362323,0.9415260580203147,0.22059463150791997,0.17081144656301706,74.47424680202033,27.97257070113272,33.05263157894737,0.19183499999999998,12.713354892200893,0.7982388072543674,2.9120182447539116,0.36810695156439827,1.0,1.0,0.16584224786083723,0.11423976777713259,60.1261932941261,24.301698162325565,36.0,0,0,0,38,1051.9,28.5,32.3,0.2715311004784689,0.03342920484399541 +RF (default),3.928520040972191,0.1435067986187182,0.8021093004373405,0.07125612411976835,0.9894800211663467,0.9673542743999383,0.2414662065185759,0.23100095205706725,6.102631035159317,3.7416299644570024,34.75,0.21025,1.1966572999954224,0.08589340580834282,0.3813090053938437,0.03721195658349352,1.0,1.0,0.17586669886220047,0.11692775501298655,5.480722222590385,3.4579154094346745,36.5,0,0,0,38,1000.0,0.0,0.0,0.23295454545454544,0.030528745581686254 +LR (tuned + ensemble),310.9206490230839,1.856038474618343,112.43523004573552,0.6244822981818133,0.95841567098672,0.9529293340882212,0.2917618963946113,0.26394480043013563,1090.6543235756506,24.154836057241496,34.921052631578945,0.20569500000000002,172.99803659651013,0.33067578077316284,51.78500762114817,0.22385943240534312,1.0,1.0,0.23092002401472933,0.14184523965871282,696.2608974821628,13.25530093456624,38.0,0,0,1,37,997.7,25.0,36.8,0.229066985645933,0.03925268241634177 +LR (tuned),310.9206490230839,0.5155129476597434,112.43523004573552,0.17331679222553462,0.9738366106666893,0.9598346876040978,0.30105734832402986,0.27391130069194186,1090.6543235756506,6.903044838007482,36.0,0.20751,172.99803659651013,0.12874411212073433,51.78500762114817,0.07805190196778514,1.0,1.0,0.23702586233900474,0.14460038903245942,696.2608974821628,4.422167155622043,38.5,0,0,0,38,961.7,23.9,27.2,0.20454545454545456,0.03279384138465713 +LR (default),7.59960079534709,0.5285763780973111,2.68894957171299,0.18897855365485863,0.9806670457343031,0.9654351210981329,0.3109312468288118,0.29800411987362974,27.747857838408066,7.876763349331487,36.28947368421053,0.21230500000000002,5.359276652336121,0.13779839674631755,1.6116061178401027,0.09774404154712064,1.0,1.0,0.23702577885376547,0.1541261802297627,18.023625361427616,4.735441569338942,40.0,0,0,1,37,951.0,26.7,28.6,0.19796650717703348,0.03609254902053897 +XT (default),2.7359117016457675,0.18087529669031066,0.7550887156747417,0.07422230753863952,0.9898367767783371,0.975254507674389,0.26658722612579605,0.2609556022480316,5.173741811459425,4.243951698357253,37.23684210526316,0.21292,1.01790091726515,0.09051434199015299,0.24605929188859293,0.04072451222400395,1.0,1.0,0.18424442460299273,0.13780186094528035,4.456264068369281,3.7424721126192138,39.0,0,0,0,38,914.9,31.1,29.9,0.17643540669856458,0.029194371760752413 +KNN (tuned + ensemble),210.21058051676778,11.66904854404996,28.3636621606113,0.77695539036087,1.0,0.9962724899167298,0.48462822753295687,0.5970228555161268,79.33933274576572,77.9475913060204,41.671052631578945,0.318405,17.435897601975334,0.2367298404375712,3.56631606767575,0.18997109296167614,1.0,1.0,0.4262545079901014,0.6596341867589371,57.50317350047459,12.487074071232104,43.0,0,0,0,38,687.3,39.4,44.8,0.0756578947368421,0.024178278159192945 +KNN (tuned),210.21058051676778,1.8054817143936601,28.3636621606113,0.13106223823409818,1.0,0.9977118342240764,0.5033620318874256,0.6430012942836432,79.33933274576572,12.511864612142434,42.671052631578945,0.322975,17.435897601975334,0.0851174063152737,3.56631606767575,0.040417757021976156,1.0,1.0,0.4549174904387682,0.7006933550921264,57.50317350047459,2.342964973116946,44.0,0,0,0,38,604.1,48.6,43.9,0.05293062200956938,0.02351625016097909 +KNN (default),1.7449495283483762,0.22627578220869365,0.489346568018936,0.038714559202156204,1.0,1.0,0.58716374442683,0.9393138275073817,1.0055419249185589,2.3649007838074403,43.921052631578945,0.382765,0.27595198154449463,0.036337282922532826,0.07126887487893994,0.021006283652748647,1.0,1.0,0.5463314318406584,1.0,1.0,1.2165713596834893,45.0,0,0,0,38,461.6,69.5,70.7,0.02452153110047847,0.022868529201343697 diff --git a/data/full-imputed-cls/time_plot.pdf b/data/full-imputed-cls/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fb4dae9dbb8ee71eef058e44ff0e78395177e841 Binary files /dev/null and b/data/full-imputed-cls/time_plot.pdf differ diff --git a/data/full-imputed-cls/time_plot.png.zip b/data/full-imputed-cls/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..37122cdeb1b531b39b2d2530daa036e5ebf8752f --- /dev/null +++ b/data/full-imputed-cls/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1425f58f7556e77814ca63062d6ffbc1d9f5f0b02dfed773d91d750d54675e9 +size 338592 diff --git a/data/full-imputed-cls/tuning-impact-elo-horizontal.pdf b/data/full-imputed-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6f150df54d2835f8244092073ecb06f54e8fc962 Binary files /dev/null and b/data/full-imputed-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full-imputed-cls/tuning-impact-elo-horizontal.png.zip b/data/full-imputed-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..b731c8016827e3f9e408a807292b2603f5ddf005 --- /dev/null +++ b/data/full-imputed-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a078f7afa2868485715760399c09bfafb613d965d83f6812f69cf97fffa1bf1a +size 157281 diff --git a/data/full-imputed-cls/tuning-impact-elo.pdf b/data/full-imputed-cls/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4e2cd4b8d4769790352e223f79112c3935dc2b61 Binary files /dev/null and b/data/full-imputed-cls/tuning-impact-elo.pdf differ diff --git a/data/full-imputed-cls/tuning-impact-elo.png.zip b/data/full-imputed-cls/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4d334d34109b6667816c7fb12ee7ca4aca8b081a --- /dev/null +++ b/data/full-imputed-cls/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1c0d78234c9387f4f76d66f49e4864c2afceebe0a4eb87becefd80ea57425b17 +size 172675 diff --git a/data/full-imputed-reg/figures/critical-diagram.pdf b/data/full-imputed-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0359158a463986d53b8fd23927d617eeb054045f Binary files /dev/null and b/data/full-imputed-reg/figures/critical-diagram.pdf differ diff --git a/data/full-imputed-reg/figures/critical-diagram.png.zip b/data/full-imputed-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..74782ea5e9693d23eb730d4cf71550b7a7f199aa --- /dev/null +++ b/data/full-imputed-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1927615c20fa7401bab6e3cbd5a4ebb82562901a05a1efe9ad6f00ca041d364d +size 319836 diff --git a/data/full-imputed-reg/leaderboard.tex b/data/full-imputed-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..6c6fa697481b68e5dc4c4aaf6630b570b79365e6 --- /dev/null +++ b/data/full-imputed-reg/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1806${}_{-61,+56}$}} & \textcolor{gold}{\textbf{0.735}} & \textcolor{gold}{\textbf{4.5}} & \textcolor{silver}{\textbf{2.5}} & 2 & \textcolor{silver}{\textbf{2.7\%}} & 1625.74 & 6.76 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1747${}_{-53,+70}$}} & \textcolor{silver}{\textbf{0.734}} & \textcolor{silver}{\textbf{5.5}} & 4.1 & 0 & \textcolor{gold}{\textbf{2.0\%}} & 6567.16 & 2.48 \\ +ModernNCA (T+E) & \textcolor{bronze}{\textbf{1628${}_{-59,+57}$}} & 0.603 & \textcolor{bronze}{\textbf{8.4}} & \textcolor{bronze}{\textbf{2.8}} & \textcolor{silver}{\textbf{3}} & 3.8\% & 3779.52 & 7.69 \\ +TabDPT (D) & 1618${}_{-57,+77}$ & \textcolor{bronze}{\textbf{0.635}} & 8.8 & \textcolor{gold}{\textbf{2.3}} & \textcolor{gold}{\textbf{5}} & \textcolor{bronze}{\textbf{2.9\%}} & 22.53 & 8.55 \\ +LightGBM (T+E) & 1610${}_{-61,+57}$ & 0.531 & 9.1 & 7.1 & 0 & 5.0\% & 700.15 & 9.32 \\ +CatBoost (T+E) & 1608${}_{-50,+65}$ & 0.536 & 9.0 & 7.2 & 0 & 4.5\% & 3552.96 & 0.97 \\ +CatBoost (T) & 1576${}_{-54,+53}$ & 0.512 & 10.3 & 6.9 & 0 & 4.6\% & 3552.96 & 0.10 \\ +TabM (T+E) & 1568${}_{-56,+48}$ & 0.488 & 10.5 & 6.5 & 0 & 3.3\% & 4158.29 & 1.41 \\ +XGBoost (T+E) & 1500${}_{-46,+50}$ & 0.432 & 12.8 & 12.2 & 0 & 5.5\% & 834.93 & 2.61 \\ +LightGBM (T) & 1499${}_{-63,+57}$ & 0.432 & 12.8 & 10.8 & 0 & 5.6\% & 700.15 & 0.97 \\ +XGBoost (T) & 1481${}_{-47,+56}$ & 0.398 & 13.6 & 13.0 & 0 & 5.6\% & 834.93 & 0.39 \\ +ModernNCA (T) & 1449${}_{-60,+50}$ & 0.351 & 14.6 & 6.7 & 0 & 5.9\% & 3779.52 & 0.40 \\ +CatBoost (D) & 1438${}_{-49,+53}$ & 0.389 & 15.1 & 11.6 & 0 & 6.2\% & 10.89 & 0.09 \\ +TabM (T) & 1438${}_{-53,+55}$ & 0.380 & 15.2 & 11.7 & 0 & 4.3\% & 4158.29 & 0.17 \\ +RealMLP (T) & 1395${}_{-53,+82}$ & 0.344 & 16.5 & 13.3 & 0 & 4.6\% & 6567.16 & 0.09 \\ +TabPFNv2 (T+E) & 1378${}_{-65,+56}$ & 0.408 & 17.3 & 3.0 & \textcolor{silver}{\textbf{3}} & 5.1\% & 4223.87 & 27.54 \\ +ModernNCA (D) & 1342${}_{-40,+53}$ & 0.209 & 18.8 & 13.4 & 0 & 7.4\% & 15.50 & 0.30 \\ +TabM (D) & 1329${}_{-55,+52}$ & 0.287 & 19.4 & 15.6 & 0 & 6.0\% & 13.32 & 0.13 \\ +TorchMLP (T+E) & 1303${}_{-53,+58}$ & 0.171 & 20.5 & 14.7 & 0 & 7.6\% & 4608.59 & 1.23 \\ +TabPFNv2 (T) & 1298${}_{-61,+53}$ & 0.282 & 20.8 & 7.6 & 0 & 6.2\% & 4223.87 & 0.45 \\ +RealMLP (D) & 1284${}_{-48,+58}$ & 0.137 & 21.3 & 18.2 & 0 & 7.1\% & 33.36 & 0.08 \\ +ExtraTrees (T+E) & 1273${}_{-51,+55}$ & 0.157 & 21.7 & 14.3 & 0 & 10.0\% & 158.22 & 0.84 \\ +LightGBM (D) & 1261${}_{-50,+49}$ & 0.070 & 22.1 & 21.5 & 0 & 8.1\% & 2.11 & 0.27 \\ +ExtraTrees (T) & 1254${}_{-49,+53}$ & 0.129 & 22.5 & 16.9 & 0 & 10.3\% & 158.22 & 0.15 \\ +TabPFNv2 (D) & 1230${}_{-61,+50}$ & 0.228 & 23.5 & 10.1 & 0 & 7.6\% & 2.80 & 0.31 \\ +TorchMLP (T) & 1224${}_{-52,+55}$ & 0.123 & 23.6 & 20.3 & 0 & 8.4\% & 4608.59 & 0.10 \\ +XGBoost (D) & 1217${}_{-46,+51}$ & 0.111 & 24.1 & 21.4 & 0 & 8.8\% & 2.24 & 0.24 \\ +RandomForest (T+E) & 1206${}_{-51,+43}$ & 0.066 & 24.5 & 22.4 & 0 & 10.9\% & 515.73 & 0.77 \\ +RandomForest (T) & 1153${}_{-47,+47}$ & 0.048 & 26.4 & 24.7 & 0 & 11.4\% & 515.73 & 0.12 \\ +EBM (T+E) & 1120${}_{-47,+61}$ & 0.160 & 27.6 & 13.5 & 0 & 13.7\% & 1890.68 & 0.13 \\ +ExtraTrees (D) & 1107${}_{-56,+48}$ & 0.061 & 28.3 & 25.1 & 0 & 12.2\% & 0.47 & 0.06 \\ +EBM (T) & 1083${}_{-60,+55}$ & 0.137 & 29.2 & 17.2 & 0 & 14.2\% & 1890.68 & 0.01 \\ +FastaiMLP (T+E) & 1039${}_{-46,+56}$ & 0.023 & 30.5 & 28.4 & 0 & 12.2\% & 540.06 & 2.67 \\ +TorchMLP (D) & 1037${}_{-57,+53}$ & 0.016 & 30.6 & 28.4 & 0 & 11.9\% & 20.48 & 0.08 \\ +EBM (D) & 1019${}_{-59,+58}$ & 0.093 & 31.2 & 28.2 & 0 & 15.1\% & 6.33 & 0.04 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 31.8 & 31.2 & 0 & 12.9\% & 0.53 & 0.06 \\ +TabICL (D) & 996${}_{-54,+45}$ & 0.000 & 31.8 & 31.2 & 0 & 12.9\% & 0.53 & 0.06 \\ +FastaiMLP (T) & 984${}_{-56,+46}$ & 0.013 & 32.3 & 30.9 & 0 & 12.7\% & 540.06 & 0.32 \\ +FastaiMLP (D) & 852${}_{-69,+56}$ & 0.000 & 35.7 & 35.0 & 0 & 17.1\% & 2.60 & 0.39 \\ +KNN (T+E) & 520${}_{-88,+72}$ & 0.000 & 40.7 & 40.5 & 0 & 36.1\% & 2.43 & 0.14 \\ +Linear (T+E) & 469${}_{-88,+60}$ & 0.000 & 41.3 & 41.2 & 0 & 35.4\% & 45.74 & 0.11 \\ +KNN (T) & 423${}_{-88,+102}$ & 0.000 & 41.7 & 41.5 & 0 & 36.8\% & 2.43 & 0.03 \\ +Linear (T) & 407${}_{-75,+72}$ & 0.000 & 41.9 & 41.8 & 0 & 35.6\% & 45.74 & 0.05 \\ +Linear (D) & 280${}_{-95,+75}$ & 0.000 & 43.2 & 43.1 & 0 & 38.1\% & 1.19 & 0.09 \\ +KNN (D) & 215${}_{-120,+122}$ & 0.000 & 43.6 & 43.4 & 0 & 40.8\% & 0.04 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full-imputed-reg/tabarena_leaderboard.csv b/data/full-imputed-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..97a24489a71730e7cbbc20cb26714e85f0191da9 --- /dev/null +++ b/data/full-imputed-reg/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),10053.09853225386,65.70725388180496,3138.716102354025,8.201870690838728,0.26461131829744206,0.2910041255880444,0.02699587865316731,0.03762065136585477,79227.35072832079,1344.7158520024777,4.538461538461538,4.16484,12938.261539538702,10.17258334159851,1625.738447135909,6.759745919437983,0.23133371369265834,0.295652367761122,0.015883109174547605,0.016643068102528722,85166.92529682687,517.5224332581762,3.0,2,3,2,6,1806.5,55.2,60.1,0.9195804195804196,0.39930069930069934 +REALMLP (tuned + ensemble),67709.67308180964,12.011677133731354,6963.87401913588,6.973854948645224,0.26596078198184286,0.30562769026149705,0.020013239864383952,0.02890399380839934,303258.6729720159,231.34066267352048,5.538461538461538,4.12234,28517.58209461636,4.232689115736219,6567.164030631503,2.480181700638205,0.19393332805279354,0.3339396668324287,0.017045117638759044,0.024876069447861582,203710.74184331874,227.32654052633927,4.0,0,1,3,9,1746.9,69.9,52.6,0.8968531468531469,0.24096736596736598 +MNCA (tuned + ensemble),31759.101002019288,48.99773550746787,6166.048475961011,17.643273328787647,0.39661026996847265,0.37338402309071167,0.03809303342780051,0.03729025347559432,186080.1526868885,677.6872983460363,8.384615384615385,4.48697,16310.556293937894,10.227302259869045,3779.5248398651206,7.690422738079533,0.35096562881515253,0.3553220810641804,0.019057076597787104,0.02404483039332963,198038.2195884601,507.5335424076248,7.0,3,1,1,8,1628.3,56.7,58.6,0.8321678321678322,0.3570428109901794 +TABDPT (default),150.02111775732448,54.38835432835114,28.728808967170515,25.042434382939593,0.36524485788247774,0.3767809526402384,0.029011919452666072,0.03451421977602538,859.9949957462201,1916.4746059233426,8.846153846153847,4.26747,137.4807067182329,29.239719518025716,22.52690614988171,8.547073882575031,0.28825222902504594,0.3462761515522977,0.024988993973347418,0.022011234994563023,904.9754515353668,1821.0928942426492,8.0,5,0,0,8,1618.2,76.9,56.6,0.8216783216783217,0.44424741924741923 +CAT (tuned + ensemble),30302.612278386874,4.803950041991014,6846.603031359736,1.23549623448159,0.46394270088709505,0.482144954832304,0.04475905931200346,0.05586568212598891,150000.99966111325,116.73562230823504,9.0,4.20863,22090.5574801498,2.685193909539117,3552.958864906998,0.9657383741190037,0.4150140849407447,0.48332107057126966,0.021491024872657194,0.036001021424025365,164350.82166574517,80.04598787510639,9.0,0,0,1,12,1608.1,64.2,49.2,0.8181818181818182,0.1387461503858265 +GBM (tuned + ensemble),3469.6354441180188,51.50608464469258,806.792675338677,8.57889161766087,0.4687885136784251,0.48192601834426685,0.049628720183150093,0.06009435650772031,20529.34803212188,1135.938430886033,9.076923076923077,4.21165,3055.419878217909,17.84196005927192,700.1537746143185,9.321818212785747,0.41525976638432377,0.4124957657121928,0.02669651687138408,0.03428335611022812,21786.570035668443,556.5446710812895,7.0,0,0,0,13,1609.9,56.1,60.6,0.8164335664335665,0.1408062697536382 +CAT (tuned),30302.612278386874,0.5550543483505901,6846.603031359736,0.16909210735792113,0.48768370669415817,0.5006635065398762,0.04596154257880672,0.05949308403285739,150000.99966111325,13.40589862804556,10.26923076923077,4.23064,22090.5574801498,0.3789627022213406,3552.958864906998,0.10450043094654878,0.5120020111687084,0.49061208598211514,0.026006470182449948,0.03741223775007456,164350.82166574517,10.277501137463553,10.0,0,1,1,11,1575.6,52.6,53.3,0.7893356643356644,0.14471447356062742 +TABM (tuned + ensemble),42663.03407314721,6.401903104782104,7337.2216133773,2.4450322937588758,0.5116630985629478,0.5032977898254283,0.032854613342711655,0.07923986201524974,180683.6213219068,120.23686774247648,10.538461538461538,4.1458,20820.912871148852,3.0152715841929116,4158.291053548661,1.4096720886484275,0.43090407292838107,0.4272044906933773,0.031023728849073895,0.02730307361680199,166291.5239595113,107.26839105814781,6.0,0,0,1,12,1568.0,47.3,55.2,0.7832167832167832,0.15363693425074498 +XGB (tuned + ensemble),6384.208144447945,12.513759183476113,1409.0815132938305,4.086239880001324,0.567688570586379,0.5725638306726402,0.055019280156632426,0.0676716205271066,26398.72664659111,353.2622484982949,12.76923076923077,4.22219,2596.930354913076,6.618133616447449,834.9300717202715,2.614265349176195,0.6256631137377601,0.6169840519555478,0.03390767076571366,0.04797658607386891,26370.57574541831,164.97865636299682,13.0,0,0,0,13,1500.2,49.4,45.3,0.7325174825174825,0.08187865427684432 +GBM (tuned),3469.6354441180188,7.385669826034806,806.792675338677,1.273710690539978,0.5677497712335507,0.5646962660086592,0.05572937722518249,0.07273694393515719,20529.34803212188,170.85098129275468,12.846153846153847,4.23482,3055.419878217909,2.690451833936903,700.1537746143185,0.9682498776019389,0.5552965190223861,0.535641070170376,0.027972044011269404,0.03922298758674819,21786.570035668443,130.77892053638826,12.0,0,0,0,13,1498.7,57.0,62.8,0.7307692307692307,0.09274030393939896 +XGB (tuned),6384.208144447945,2.5463058025409016,1409.0815132938305,0.7958717570786976,0.6023900941776976,0.599191708641876,0.05649722265542322,0.07175664058011187,26398.72664659111,74.35901041305158,13.576923076923077,4.23559,2596.930354913076,1.7797584003872342,834.9300717202715,0.3883258596259137,0.6620816436761664,0.6377270710295441,0.03317270939304562,0.04927060675426248,26370.57574541831,37.05707269936294,12.0,0,0,0,13,1481.0,55.7,46.3,0.7141608391608392,0.07698877919466154 +MNCA (tuned),31759.101002019288,2.180037997319148,6166.048475961011,0.7699041848491119,0.6494157477338965,0.5928326047970892,0.05893528444168654,0.07225555938838021,186080.1526868885,32.66971509741366,14.615384615384615,4.74895,16310.556293937894,0.48738079600863987,3779.5248398651206,0.3958729871859153,0.686862000079896,0.6299271517750961,0.027829209062692706,0.05013010527584701,198038.2195884601,22.639832589759934,17.0,0,2,1,10,1449.4,49.9,59.3,0.6905594405594405,0.1501631920773124 +CAT (default),101.69209800418625,0.27084102752881173,23.09400681118272,0.13269262448628016,0.6107641551712003,0.6218727584243557,0.062460236998275034,0.07708318639558216,496.3567653671193,9.26273127760114,15.115384615384615,4.21395,62.699359814325966,0.2736650307973226,10.889876924735699,0.09199146861016888,0.5847419575275037,0.6170852848140443,0.030438989353365198,0.03712451320809096,412.72250349454265,8.72239577968297,13.0,0,0,0,13,1437.7,52.1,48.9,0.6791958041958042,0.08584535972840124 +TABM (tuned),42663.03407314721,0.6790118826760185,7337.2216133773,0.2826346699151617,0.6195303355392976,0.6016841017485475,0.04291459493786008,0.10009773912606208,180683.6213219068,13.142332198971314,15.153846153846153,4.26932,20820.912871148852,0.3543446593814426,4158.291053548661,0.16724776809008424,0.5658117450288995,0.5524525384273968,0.039608740417227284,0.03536573040794217,166291.5239595113,12.524825096296247,13.0,0,0,0,13,1437.7,54.8,52.3,0.6783216783216783,0.08529071847502581 +REALMLP (tuned),67709.67308180964,0.5510838443397457,6963.87401913588,0.35676328285131836,0.655870427194709,0.6176644486616331,0.04609702039794716,0.0810536599656669,303258.6729720159,12.866326813784351,16.53846153846154,4.2931,28517.58209461636,0.2134650813208686,6567.164030631503,0.09160214336768219,0.67949922992921,0.6664835865428769,0.043563415018409435,0.04951944914046118,203710.74184331874,10.089309852682138,17.0,0,0,0,13,1395.2,81.1,52.3,0.6468531468531469,0.07544811900015519 +TABPFNV2 (tuned + ensemble),9087.265872733002,78.03043969614893,3585.89762046501,56.25472027093109,0.5916274908500255,0.6393456642784099,0.050662115797136684,0.0930896683951232,105571.67881602701,4530.894138806997,17.346153846153847,4.11819,4897.1321855386095,13.798940539360046,4223.8673583405725,27.542795487744606,0.7821381201281035,0.7702280073934004,0.034084501561638336,0.03467029575770307,99491.49864758716,2157.1863969834576,19.0,3,2,0,8,1378.5,55.3,64.7,0.6284965034965035,0.33194312310617097 +MNCA (default),99.58075828980176,2.0463109099966847,18.34482305470451,0.7039566701063034,0.7905734390801098,0.7438997183128557,0.07422761695620157,0.09797180414713179,547.7971597202185,27.012858819060174,18.846153846153847,4.94033,49.107323222690155,0.41290783882141113,15.50085128260229,0.29868905742963153,0.9481480616823943,0.8801463886825595,0.03934956674595991,0.09658469319813351,536.5510136544884,15.897990578305649,18.0,0,0,0,13,1341.9,52.5,39.8,0.5944055944055944,0.07456434869101838 +TABM (default),109.08048485323914,0.5565733836247371,30.11061011517182,0.2424694191265854,0.7132806572850755,0.709857402946311,0.06035758501914987,0.13133163540831894,580.6229196238922,9.300928058329742,19.384615384615383,4.27071,65.0270922978719,0.1736939483218723,13.315938751381204,0.13061717308512005,0.6984407490367052,0.6451541074247529,0.05131334419415079,0.05222355068579704,354.8698900002396,7.016702417594013,18.0,0,0,0,13,1328.8,51.7,54.6,0.5821678321678322,0.06409168239581474 +NN_TORCH (tuned + ensemble),44960.17932087947,4.03415755573501,5714.412959426929,1.3298489659738517,0.8294336223884518,0.8020265214949197,0.07610055939510828,0.12013497439285538,220430.49479235077,97.5953017172201,20.53846153846154,4.65351,15497.21247045199,2.8239229255252414,4608.594420268999,1.2325370779691025,0.9419042422995463,0.8805586694349655,0.05961664534372901,0.0834039942118561,177697.9133637009,96.53385472639289,21.0,0,0,0,13,1302.7,57.6,52.9,0.5559440559440559,0.0679103711813622 +TABPFNV2 (tuned),9087.265872733002,3.794137261464045,3585.89762046501,1.977808999126841,0.7178550513336984,0.7399865456800228,0.0618004152523972,0.11851504476590419,105571.67881602701,193.9605838613745,20.807692307692307,4.17059,4897.1321855386095,0.8012150287628174,4223.8673583405725,0.45159866325593434,1.0,1.0,0.05688219535982175,0.06558436878663254,99491.49864758716,19.472884837488856,29.0,0,2,0,11,1298.3,52.4,60.1,0.5498251748251748,0.13197410518116387 +REALMLP (default),360.5571643448284,0.5610987406510572,36.16407680854995,0.35279691100634736,0.8625329178435776,0.8232027718817995,0.07088259095544208,0.12880228463515125,1586.4859687066598,12.051410462198454,21.307692307692307,4.76491,145.05578433142767,0.20847519238789877,33.35619963926919,0.08008880801221668,0.9645369038929024,0.8988813630790086,0.0569453348396638,0.09560361463677074,955.4745851239355,8.821199701887194,20.0,0,0,0,13,1284.1,57.6,47.7,0.5384615384615384,0.05485880857216682 +XT (tuned + ensemble),1206.5194327423715,3.367576224375994,442.36567464742467,1.069316176291616,0.8432098887288747,0.8458357989233521,0.10048529236024564,0.12077554715910149,8220.916376158188,88.72398178178156,21.692307692307693,5.04794,766.4287914435068,3.576531834072537,158.22496863160976,0.8436571643025992,1.0,0.9967778446932313,0.049883734447319306,0.1226940033261134,5379.1926370125775,93.77443700336585,25.0,0,0,0,13,1273.2,54.3,51.0,0.5297202797202797,0.06984441103315876 +GBM (default),10.696644908750159,2.245801542559241,3.3219193518156818,0.4939224454467883,0.9303753212721771,0.8715218477000459,0.08060606648427397,0.11976658014611921,80.53396018410795,58.38494586351267,22.076923076923077,4.4838,7.6057972113291425,0.9335102770063612,2.1107352135741744,0.2745991643955073,0.9919628610131755,0.8948154869292944,0.04937573951369445,0.10636019574143357,91.35379089849596,24.071571166022103,21.0,0,0,0,13,1261.4,48.3,50.0,0.5209790209790209,0.046549947604339434 +XT (tuned),1206.5194327423715,0.45855456478575357,442.36567464742467,0.18106308268317445,0.8708387105454042,0.8657402619375704,0.10266445696219874,0.12783943116298957,8220.916376158188,14.733646953552363,22.5,5.07281,766.4287914435068,0.34996385044521755,158.22496863160976,0.15116311924152026,1.0,0.9892577223175305,0.050434912392149145,0.12806468799723258,5379.1926370125775,14.85253877204164,24.0,0,0,0,13,1254.5,52.4,48.6,0.5113636363636364,0.0591073306868635 +TABPFNV2 (default),13.930354648573786,0.6212240172247602,5.213536664770012,0.4399683707018391,0.7716885347996363,0.7903463980434834,0.07567955744535014,0.13811994798594449,115.71517682132266,30.565681397453538,23.5,4.25916,9.373317972819011,0.4424108028411865,2.803336175829957,0.3130195506083406,1.0,1.0,0.06281203604096552,0.07601826785976806,120.93804022420875,33.21203399192692,29.0,0,0,2,11,1230.5,49.1,60.5,0.48863636363636365,0.09907448586802643 +NN_TORCH (tuned),44960.17932087947,0.2904610134597517,5714.412959426929,0.10578639607158029,0.8767833734464541,0.8570159424101212,0.08428128256238013,0.13884413107168087,220430.49479235077,7.008617614539204,23.615384615384617,4.7148,15497.21247045199,0.18749599986606175,4608.594420268999,0.09690652350320204,1.0,0.9430447630912011,0.0730528979706524,0.0987961550603162,177697.9133637009,6.6162587674624795,23.0,0,0,0,13,1224.1,54.4,51.3,0.486013986013986,0.04915969037435755 +XGB (default),12.109628748486186,0.9366908014330091,3.1242101566662273,0.3058622971724495,0.8890754852284094,0.8575221590560308,0.08752819632277807,0.1257282694284019,67.76669386312491,28.981192639888032,24.115384615384617,4.75458,7.79438853263855,0.5301833947499593,2.2441525977447814,0.24247013095584213,1.0,0.9599615615417688,0.053268364359151055,0.10537256419759194,75.78126190262087,16.70456778360137,24.0,0,0,0,13,1217.1,50.9,45.1,0.47465034965034963,0.0466627803252336 +RF (tuned + ensemble),2056.9684221919783,3.432567118171953,500.6828145780351,1.0841887922659657,0.9341058521704168,0.9166437826652474,0.10851512346405484,0.13041932093765915,10863.776799780928,90.24173062908258,24.46153846153846,5.10626,1088.11842862765,1.444333102968004,515.7302180242054,0.7709478321252661,1.0,0.9973408936679932,0.06747831844148078,0.13942046646077252,11909.387526812274,91.13733276245972,25.0,0,0,0,13,1206.5,42.5,50.2,0.46678321678321677,0.044566348818597695 +RF (tuned),2056.9684221919783,0.3944415463341607,500.6828145780351,0.1540690468576434,0.9521440935358421,0.937148261712957,0.11360740003197413,0.13890561971663462,10863.776799780928,12.055960583687282,26.384615384615383,5.18858,1088.11842862765,0.3136819733513726,515.7302180242054,0.12356183047078627,1.0,1.0,0.07199386349783587,0.14203697823894862,11909.387526812274,11.406268840053338,28.5,0,0,0,13,1152.7,46.9,46.7,0.4230769230769231,0.04053609545692784 +EBM (tuned + ensemble),26125.740811508127,1.1628193525167612,3543.4601988059676,0.44619286429284205,0.8401103872067855,0.8462881268223567,0.13677139620409515,0.19454117314163905,102743.72801916403,13.991542249573758,27.615384615384617,4.37076,6865.337549757957,0.3269915845659044,1890.6770917738593,0.13287644820933492,1.0,1.0,0.12484220041460736,0.13745169436097826,100996.51953892264,10.735277156769923,34.0,0,1,0,12,1120.1,60.7,47.0,0.3951048951048951,0.07386689004336064 +XT (default),4.038539225015885,0.28929371426248146,0.7743502790637721,0.08386602222003156,0.9388750296989964,0.9177393093348434,0.12238733836142465,0.17918452962557382,12.035409694284047,7.5346994725559195,28.346153846153847,5.13889,2.058894846174452,0.22108591927422416,0.46766450701756074,0.055025941487738636,1.0,1.0,0.08970042626125052,0.15512506986029978,11.279879616649893,6.868103658105229,31.0,0,0,0,13,1107.4,47.4,55.3,0.3784965034965035,0.03984525577032241 +EBM (tuned),26125.740811508127,0.09807351307991222,3543.4601988059676,0.04003457596727012,0.8629348413551933,0.8635907788274764,0.14218261253427486,0.20871356680163713,102743.72801916403,1.2806753432569877,29.153846153846153,4.42885,6865.337549757957,0.03976681497361925,1890.6770917738593,0.012767925630469206,1.0,1.0,0.1292889595673019,0.16421526338237233,100996.51953892264,1.0,35.0,0,0,1,12,1082.8,54.2,60.0,0.36013986013986016,0.057998568721787075 +FASTAI (tuned + ensemble),4642.340904501768,8.77000401672135,1248.4159649024482,5.455805855994076,0.976857214707512,0.9643156078678398,0.1220212594240265,0.19741406200182526,32660.757986061028,336.5557366220283,30.53846153846154,5.27232,3620.151651991738,7.070411682128906,540.0550122797715,2.672383567926809,1.0,1.0,0.10220001788754296,0.1756471684639439,33252.590479353305,324.2617231497696,32.0,0,0,0,13,1038.8,55.9,45.5,0.32867132867132864,0.03524942099572964 +NN_TORCH (default),151.43337847534409,0.22200924249795764,23.930992166336175,0.08705592871966639,0.9842745385355711,0.9650521294786105,0.11939774031540712,0.19547728488656121,831.2534637345914,6.296769612194,30.615384615384617,4.81722,65.27966655625238,0.14955372280544704,20.47535029226034,0.07966387341594139,1.0,1.0,0.14050249681884142,0.13614151845721162,546.0088939544379,6.398331846186531,33.0,0,0,0,13,1036.9,52.1,56.3,0.3269230769230769,0.03521064575405939 +EBM (default),61.24195487458481,0.11774356426336827,10.057660454061297,0.07098698960857609,0.9074728958123806,0.8939078911486562,0.15059436506506363,0.22072991511633744,260.56038663650384,2.782386904835081,31.23076923076923,4.44221,19.539602756500244,0.04366175333658854,6.327684720357259,0.039228283502797535,1.0,1.0,0.13598656576261503,0.17428198310764084,214.4635432482436,1.7877726532004268,36.0,0,0,0,13,1019.2,58.0,58.4,0.3129370629370629,0.03546267306388764 +RF (default),9.267833071284823,0.31600932585887426,1.1524694810969494,0.08818027662752802,1.0,0.9884213213255257,0.12896215969897823,0.19010351222094507,27.793203837891944,7.952067403429296,31.846153846153847,5.26146,5.924832847383287,0.26005201869540745,0.5271703851240811,0.062155184511682476,1.0,1.0,0.08142906388988314,0.17217694084546725,20.351375021697393,6.812325017295935,32.0,0,0,0,13,1000.0,0.0,0.0,0.29895104895104896,0.0320644207638942 +TABICL (default),9.267833071284823,0.31600932585887426,1.1524694810969494,0.08818027662752802,1.0,0.9884213213255257,0.12896215969897823,0.19010351222094507,27.793203837891944,7.952067403429296,31.846153846153847,5.26146,5.924832847383287,0.26005201869540745,0.5271703851240811,0.062155184511682476,1.0,1.0,0.08142906388988314,0.17217694084546725,20.351375021697393,6.812325017295935,32.0,0,0,0,13,996.4,44.2,53.3,0.29895104895104896,0.0320644207638942 +FASTAI (tuned),4642.340904501768,0.9693611326380673,1248.4159649024482,0.5658202849858516,0.9865659937937807,0.97994047327997,0.1268703117462002,0.21643571574719017,32660.757986061028,38.043706128121705,32.30769230769231,5.18701,3620.151651991738,1.0039918687608507,540.0550122797715,0.32495714500374384,1.0,1.0,0.11787324661401322,0.18136473255372101,33252.590479353305,36.96325797937462,34.0,0,0,0,13,983.9,45.9,55.1,0.28846153846153844,0.03236746719718856 +FASTAI (default),18.837261917856004,0.970951892168094,4.540025666114834,0.4814105255967321,1.0,0.9953660303790696,0.1713566954109247,0.27575692737746516,123.96432879696866,34.056295479841545,35.69230769230769,6.36878,15.734567880630493,1.036571078830295,2.6035035917474465,0.39168673048638547,1.0,1.0,0.11912950408368406,0.27762931758033993,138.38888737159849,36.35774230804583,36.0,0,0,0,13,851.5,55.2,68.7,0.21153846153846154,0.028551139305471058 +KNN (tuned + ensemble),114.20154888548403,2.240403470422468,27.24026724430238,0.3822937952790146,1.0,0.995259752245121,0.3613185359622549,0.6538216804001686,84.22414291151355,25.830539833366643,40.73076923076923,8.1617,12.562016169230143,0.2777775393591987,2.427842858706766,0.14225338857865483,1.0,1.0,0.41352429770943944,0.7463425738250281,66.86014550581744,15.449077396683121,42.0,0,0,0,13,520.3,72.0,87.3,0.09702797202797203,0.02466366830731113 +LR (tuned + ensemble),260.53517347665934,0.6450919024964683,90.82426667518703,0.24319307436598073,1.0,1.0,0.3542242194195658,0.6560326250593088,2016.752248654558,10.333316070921164,41.30769230769231,8.15038,155.9093332555559,0.2263851695590549,45.737119844257606,0.10677772758861735,1.0,1.0,0.3837007808160382,0.7329121013386546,1735.7243863195374,7.455644136363483,41.0,0,0,0,13,469.3,60.0,87.6,0.08391608391608392,0.024246926114257515 +KNN (tuned),114.20154888548403,0.2574708763350788,27.24026724430238,0.05410989827741383,1.0,0.9986930045171645,0.36756573727320035,0.6864360756071909,84.22414291151355,3.3744578525576268,41.73076923076923,8.25638,12.562016169230143,0.09626038869222005,2.427842858706766,0.028318042556444805,1.0,1.0,0.420237807146593,0.7514125894823619,66.86014550581744,2.1032822899149863,43.0,0,0,0,13,423.2,101.5,87.1,0.0743006993006993,0.024072712010619994 +LR (tuned),260.53517347665934,0.13893676680377404,90.82426667518703,0.0633906225067962,1.0,1.0,0.3558634144590602,0.6630785837306736,2016.752248654558,3.076869056672641,41.92307692307692,8.15927,155.9093332555559,0.05223793453640408,45.737119844257606,0.05009355954825878,1.0,1.0,0.38692302721983185,0.7430806606996537,1735.7243863195374,2.8910146707817423,42.0,0,0,0,13,406.7,71.2,74.1,0.06993006993006994,0.023909095789413432 +LR (default),4.903454369968838,0.19863318235446245,2.033074367074023,0.07563726553282422,1.0,1.0,0.38080916510999724,0.7702257900218233,45.803264898726965,4.508173919929611,43.15384615384615,8.22758,5.286295996771918,0.09712121221754286,1.191311693685635,0.0859815087197251,1.0,1.0,0.38694683448707623,0.8770019722050186,34.79737790723628,4.100500531123858,43.0,0,0,0,13,279.5,74.5,94.2,0.04195804195804196,0.023205412158900533 +KNN (default),2.0401845610039864,0.07292802802517882,0.5185761600539422,0.029646415007926124,1.0,0.9998530803391149,0.40764910544712396,0.8492569035555164,1.1215157323318734,1.57971988992164,43.57692307692308,8.55152,0.15191650390625,0.04789047771030002,0.03752343922831045,0.021942545900661376,1.0,1.0,0.4573857800165779,1.0,1.0,1.5317182357480712,45.0,0,0,0,13,214.6,121.2,119.5,0.032342657342657344,0.023064375238288283 diff --git a/data/full-imputed-reg/time_plot.pdf b/data/full-imputed-reg/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..dfd971ed46ac356a5a9c8d41072da5b8d3569fdd Binary files /dev/null and b/data/full-imputed-reg/time_plot.pdf differ diff --git a/data/full-imputed-reg/time_plot.png.zip b/data/full-imputed-reg/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..263ba6a3c2cb850a1ba7da44ac69dda4162238c3 --- /dev/null +++ b/data/full-imputed-reg/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe29fdec65f03c7d3bae994bc888e7c7ec498e3800a153239b08e6703208a4a +size 79911 diff --git a/data/full-imputed-reg/tuning-impact-elo-horizontal.pdf b/data/full-imputed-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fa99d13f1764f21347e4e3f84a9183689be4f8cf Binary files /dev/null and b/data/full-imputed-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full-imputed-reg/tuning-impact-elo-horizontal.png.zip b/data/full-imputed-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..95a13229786bc6e9b4aaeb3b1925294acd57587c --- /dev/null +++ b/data/full-imputed-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7763698868699976cb6bc6983d84a21cb6289f9fdc372f4c53289e4f0c5818b6 +size 155490 diff --git a/data/full-imputed-reg/tuning-impact-elo.pdf b/data/full-imputed-reg/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0641409dca583318a6381238781bc55a864424df Binary files /dev/null and b/data/full-imputed-reg/tuning-impact-elo.pdf differ diff --git a/data/full-imputed-reg/tuning-impact-elo.png.zip b/data/full-imputed-reg/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c0a79f745731d0b1db23454ae7a3111bdc8db8dc --- /dev/null +++ b/data/full-imputed-reg/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5b69c34917f2c565e7487c09d55cd3b936da19993feb16027c89ce94c1aa173 +size 162911 diff --git a/data/full-imputed/figures/critical-diagram.pdf b/data/full-imputed/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..654eb1203dc767fb7ff1e50aa55cbcad632c89f9 Binary files /dev/null and b/data/full-imputed/figures/critical-diagram.pdf differ diff --git a/data/full-imputed/figures/critical-diagram.png.zip b/data/full-imputed/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..9002a5418eaf16c3ee195a7fcea6d7f783cecbf5 --- /dev/null +++ b/data/full-imputed/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f4333e1332194db0036e55bd21228c80b814ff384b793062c746bce92695b65 +size 317258 diff --git a/data/full-imputed/leaderboard.tex b/data/full-imputed/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..949ae96894dff57d96e908a45ff26566cb58d6b2 --- /dev/null +++ b/data/full-imputed/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1588${}_{-27,+24}$}} & \textcolor{gold}{\textbf{0.608}} & \textcolor{gold}{\textbf{8.4}} & \textcolor{silver}{\textbf{3.1}} & \textcolor{silver}{\textbf{8}} & \textcolor{gold}{\textbf{5.8\%}} & 1408.78 & 3.34 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1547${}_{-21,+26}$}} & \textcolor{silver}{\textbf{0.499}} & \textcolor{silver}{\textbf{9.7}} & 6.3 & 0 & \textcolor{bronze}{\textbf{7.4\%}} & 6044.76 & 3.80 \\ +TabM (T+E) & \textcolor{bronze}{\textbf{1540${}_{-21,+29}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{bronze}{\textbf{9.9}} & 4.9 & 3 & \textcolor{silver}{\textbf{7.0\%}} & 3285.87 & 1.47 \\ +LightGBM (T+E) & 1525${}_{-24,+24}$ & 0.437 & 10.6 & 6.5 & 1 & 8.6\% & 416.98 & 2.64 \\ +CatBoost (T+E) & 1482${}_{-27,+16}$ & 0.419 & 12.4 & 8.3 & 0 & 7.9\% & 1658.41 & 0.65 \\ +CatBoost (T) & 1469${}_{-23,+19}$ & 0.403 & 12.9 & 7.0 & 1 & 8.1\% & 1658.41 & 0.08 \\ +TabM (T) & 1450${}_{-26,+21}$ & 0.400 & 13.7 & 7.3 & 1 & 8.0\% & 3285.87 & 0.17 \\ +LightGBM (T) & 1447${}_{-28,+21}$ & 0.347 & 13.9 & 11.7 & 0 & 9.3\% & 416.98 & 0.33 \\ +XGBoost (T+E) & 1439${}_{-23,+19}$ & 0.349 & 14.2 & 9.9 & 0 & 9.4\% & 693.49 & 1.69 \\ +ModernNCA (T+E) & 1434${}_{-28,+24}$ & 0.432 & 14.5 & 5.3 & 3 & 8.7\% & 4621.67 & 8.15 \\ +CatBoost (D) & 1427${}_{-21,+26}$ & 0.356 & 14.8 & 7.9 & 1 & 9.3\% & 6.83 & 0.08 \\ +TabPFNv2 (T+E) & 1413${}_{-26,+27}$ & 0.466 & 15.4 & \textcolor{gold}{\textbf{3.0}} & \textcolor{gold}{\textbf{11}} & 8.5\% & 3030.15 & 21.44 \\ +XGBoost (T) & 1403${}_{-23,+21}$ & 0.299 & 15.9 & 13.2 & 0 & 9.7\% & 693.49 & 0.31 \\ +ModernNCA (T) & 1401${}_{-21,+18}$ & 0.297 & 16.0 & 8.7 & 1 & 9.3\% & 4621.67 & 0.47 \\ +TabICL (D) & 1386${}_{-23,+25}$ & 0.373 & 16.6 & \textcolor{bronze}{\textbf{4.5}} & 6 & 9.2\% & 6.63 & 1.48 \\ +RealMLP (T) & 1347${}_{-23,+24}$ & 0.238 & 18.6 & 14.7 & 0 & 10.4\% & 6044.76 & 0.18 \\ +TabPFNv2 (T) & 1346${}_{-25,+27}$ & 0.354 & 18.6 & 5.6 & 1 & 10.6\% & 3030.15 & 0.46 \\ +TabM (D) & 1345${}_{-18,+21}$ & 0.275 & 18.6 & 12.8 & 0 & 10.9\% & 10.49 & 0.13 \\ +TorchMLP (T+E) & 1327${}_{-22,+25}$ & 0.209 & 19.5 & 14.9 & 0 & 10.6\% & 2874.67 & 1.95 \\ +TabPFNv2 (D) & 1319${}_{-22,+22}$ & 0.318 & 20.0 & 5.5 & 4 & 11.6\% & 3.36 & 0.31 \\ +TabDPT (D) & 1299${}_{-28,+24}$ & 0.293 & 21.0 & 4.9 & \textcolor{bronze}{\textbf{7}} & 12.3\% & 22.53 & 8.55 \\ +ModernNCA (D) & 1298${}_{-22,+26}$ & 0.153 & 21.0 & 12.6 & 1 & 12.8\% & 14.87 & 0.31 \\ +EBM (T+E) & 1289${}_{-25,+25}$ & 0.175 & 21.5 & 13.5 & 0 & 14.6\% & 1331.68 & 0.20 \\ +FastaiMLP (T+E) & 1247${}_{-21,+24}$ & 0.152 & 23.5 & 14.7 & 0 & 14.0\% & 593.24 & 4.47 \\ +ExtraTrees (T+E) & 1241${}_{-20,+22}$ & 0.117 & 23.8 & 16.0 & 0 & 14.3\% & 183.02 & 0.76 \\ +EBM (T) & 1235${}_{-23,+18}$ & 0.126 & 24.1 & 17.7 & 0 & 15.3\% & 1331.68 & 0.02 \\ +XGBoost (D) & 1234${}_{-24,+19}$ & 0.106 & 24.2 & 19.6 & 0 & 12.7\% & 1.94 & 0.12 \\ +RealMLP (D) & 1234${}_{-22,+21}$ & 0.102 & 24.2 & 20.2 & 0 & 12.7\% & 35.36 & 0.19 \\ +TorchMLP (T) & 1224${}_{-20,+23}$ & 0.103 & 24.6 & 21.2 & 0 & 12.7\% & 2874.67 & 0.13 \\ +LightGBM (D) & 1202${}_{-23,+23}$ & 0.074 & 25.7 & 23.0 & 0 & 13.5\% & 1.96 & 0.14 \\ +RandomForest (T+E) & 1201${}_{-24,+21}$ & 0.096 & 25.8 & 15.8 & 0 & 15.1\% & 373.18 & 0.77 \\ +EBM (D) & 1200${}_{-28,+22}$ & 0.125 & 25.8 & 13.5 & 1 & 16.2\% & 4.67 & 0.04 \\ +ExtraTrees (T) & 1200${}_{-29,+17}$ & 0.092 & 25.9 & 17.0 & 0 & 15.4\% & 183.02 & 0.09 \\ +FastaiMLP (T) & 1162${}_{-20,+23}$ & 0.070 & 27.5 & 22.8 & 0 & 15.6\% & 593.24 & 0.31 \\ +RandomForest (T) & 1152${}_{-24,+20}$ & 0.069 & 28.1 & 17.9 & 0 & 16.2\% & 373.18 & 0.09 \\ +TorchMLP (D) & 1068${}_{-23,+22}$ & 0.019 & 31.6 & 28.9 & 0 & 17.5\% & 9.99 & 0.13 \\ +FastaiMLP (D) & 1010${}_{-22,+24}$ & 0.019 & 33.7 & 31.1 & 0 & 20.8\% & 2.86 & 0.37 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.008 & 34.0 & 32.3 & 0 & 21.3\% & 0.43 & 0.05 \\ +ExtraTrees (D) & 971${}_{-29,+22}$ & 0.023 & 35.0 & 31.3 & 0 & 23.0\% & 0.25 & 0.05 \\ +Linear (T+E) & 918${}_{-20,+31}$ & 0.031 & 36.5 & 28.2 & 0 & 30.8\% & 47.49 & 0.17 \\ +Linear (T) & 882${}_{-31,+31}$ & 0.019 & 37.5 & 32.8 & 0 & 31.5\% & 47.49 & 0.07 \\ +Linear (D) & 863${}_{-30,+24}$ & 0.014 & 38.0 & 30.5 & 0 & 32.9\% & 1.52 & 0.09 \\ +KNN (T+E) & 684${}_{-32,+27}$ & 0.000 & 41.4 & 41.1 & 0 & 45.3\% & 3.26 & 0.18 \\ +KNN (T) & 607${}_{-48,+31}$ & 0.000 & 42.4 & 42.3 & 0 & 46.9\% & 3.26 & 0.04 \\ +KNN (D) & 459${}_{-47,+42}$ & 0.000 & 43.8 & 43.6 & 0 & 54.1\% & 0.05 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full-imputed/tabarena_leaderboard.csv b/data/full-imputed/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..edb4fd31b9f5f95648743dea9f89edc4bf7ed637 --- /dev/null +++ b/data/full-imputed/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),8462.98349033711,33.138286938542635,2912.1242917208747,4.391016265244401,0.39210856520445686,0.3952207974156523,0.05822662641732407,0.038656173275860704,43301.56973340033,539.3969074295995,8.372549019607844,0.2055,7367.614226023356,3.9490213659074573,1408.7828331379249,3.337414261487274,0.33725328327072535,0.34354944358388295,0.022224886416280176,0.016643068102528722,33121.993698706065,185.27028732280235,5.0,8,6,3,34,1588.0,23.3,27.0,0.8324420677361853,0.3253674406914166 +REALMLP (tuned + ensemble),73311.83245680422,16.208294235543228,6156.508467345906,8.023682171781228,0.5008360344149994,0.5052732642742225,0.07396107454992124,0.047039327271839126,177178.04293492812,276.8359259771498,9.745098039215685,0.20511,23874.98158947627,5.369593381881714,6044.758795122033,3.804109742244085,0.48215030896347355,0.5149467040415536,0.0363542362621595,0.029161376498297843,128255.5951822475,249.18630751354758,8.0,0,1,6,44,1546.6,25.8,20.4,0.8012477718360071,0.15830473963861558 +TABM (tuned + ensemble),36462.58337058756,8.791583087761158,5017.258422060137,3.1802055454964386,0.5087786520610308,0.5227448610128222,0.06977046907024002,0.06341802879682454,80230.4785975727,130.9039885664859,9.911764705882353,0.20535,8420.993191123009,2.7141049438052707,3285.8688373170553,1.4723486146222118,0.47020083976979304,0.5081840173827434,0.03250086230493732,0.027567784172117953,47614.825535817596,111.53030159160512,8.0,3,2,3,43,1540.3,28.9,20.2,0.7974598930481284,0.20446721945067065 +GBM (tuned + ensemble),3088.2834950947295,22.058380506500956,771.2925090199282,4.080656504886727,0.5633713731527658,0.5700094236297886,0.08557613078017116,0.05910896345836255,11617.562781863631,427.3461655076801,10.588235294117647,0.2112,1667.667911251386,3.7860276963975696,416.9832926671224,2.6387318875879693,0.5860060403960241,0.5844894225112219,0.04023364809387353,0.02358471585546149,9501.568897853978,118.5632719261967,10.0,1,1,3,46,1525.4,23.3,23.6,0.7820855614973262,0.15355447775093242 +CAT (tuned + ensemble),20021.3498029566,3.148214934334516,4130.291158755297,0.9792413821552454,0.580901607522528,0.5774297548622486,0.07932909148042525,0.05277142640286518,58497.64726501732,62.60491239485109,12.401960784313726,0.21079,7127.110804247856,1.3727677133348253,1658.412974283854,0.65278892715772,0.5700946601892775,0.5907801405339781,0.04606751035449885,0.02774155341625371,22349.13952253535,41.43902270876451,11.0,0,1,3,47,1482.5,15.4,26.2,0.7408645276292335,0.11982717117082757 +CAT (tuned),20021.3498029566,0.45918073721700764,4130.291158755297,0.13269281663977184,0.5972104481929726,0.590874976186108,0.08123456288719788,0.05345084723509436,58497.64726501732,8.058537488270943,12.92156862745098,0.21123,7127.110804247856,0.16668947537740073,1658.412974283854,0.08101450844552308,0.601029599608834,0.6338898362509008,0.05045951030687079,0.030933936753504664,22349.13952253535,5.582269457200397,12.0,1,3,2,45,1468.7,19.0,22.9,0.7290552584670231,0.14372608077734617 +TABM (tuned),36462.58337058756,0.9691277276976176,5017.258422060137,0.3543499279245272,0.5996801665884344,0.5894392156662352,0.08005397792131351,0.07918093685262247,80230.4785975727,13.69951936973594,13.696078431372548,0.21042,8420.993191123009,0.2791590425703261,3285.8688373170553,0.1728818165133111,0.5745991505979262,0.592779150360785,0.041022605221466724,0.03536573040794217,47614.825535817596,11.564361688735561,13.5,1,3,2,45,1450.3,20.2,25.4,0.7114527629233511,0.13780161432051177 +GBM (tuned),3088.2834950947295,3.280151636356362,771.2925090199282,0.7008989122586311,0.6529668965012456,0.6341023639146358,0.09282220934736846,0.06957232946152313,11617.562781863631,67.66975356224859,13.911764705882353,0.21181,1667.667911251386,0.5984517203436958,416.9832926671224,0.33384935590955944,0.6262832446301,0.6519687249021858,0.05048061869912612,0.03622173092974084,9501.568897853978,17.561755180687065,12.0,0,0,0,51,1447.2,21.0,27.9,0.7065508021390374,0.08565457797402178 +XGB (tuned + ensemble),6066.175009788823,8.000445660682546,1229.0991373608288,3.120951906510457,0.6509288335184475,0.6437619836489613,0.09352816952933034,0.06895163935820021,14800.692905333448,192.56405868832437,14.196078431372548,0.21458,2256.9276883072325,3.0598979949951173,693.4907982506384,1.6904262577061315,0.6793464009669915,0.6595270919953716,0.05552089944600691,0.03200108278665274,11769.825378159274,82.2877578838671,13.0,0,1,1,49,1439.4,18.9,23.0,0.7000891265597148,0.1013601976229367 +MNCA (tuned + ensemble),50831.51192726024,408.7024568246081,6035.4844365627405,41.810485127597225,0.568211153208397,0.5501368681201666,0.08712345362398452,0.06897277115037478,107380.48324313454,2632.7754273052897,14.490196078431373,0.21294,14486.050127214856,13.240725604693095,4621.665633563503,8.148513113458952,0.5565369335340675,0.528476133918774,0.03527643869682828,0.031199094919591976,89084.88026764008,537.8274695033359,11.0,3,3,5,40,1433.6,23.1,27.3,0.6934046345811051,0.19036002244261158 +CAT (default),190.03885268302784,0.2709793741147243,87.82759080296049,0.13562630112555335,0.643860524899134,0.6473213604262623,0.09253129668253035,0.060816531102421634,428.26912869207973,7.186903437353133,14.813725490196079,0.21142,28.268621895048355,0.1854714552561442,6.827020885706225,0.08026752106160412,0.6746502933037238,0.6439696146756235,0.04582475392187091,0.02917957826918542,113.47556087857672,6.139565341592893,16.0,1,3,1,46,1426.6,25.5,20.1,0.6860516934046346,0.12620896189325026 +TABPFNV2 (tuned + ensemble),10256.531407853461,95.46951768190513,2958.800295487964,47.682041415407696,0.5336461270994103,0.5756859433884367,0.08515145920011706,0.08426450292085469,64069.99714449898,3721.414760799794,15.392156862745098,0.22709,4578.3020022392275,13.798940539360046,3030.145221648074,21.443889733771147,0.6071274732571645,0.6069317461979158,0.03866296841083827,0.03250100481138207,29840.56658077224,861.272449686595,11.0,11,6,1,33,1412.7,26.3,25.9,0.6729055258467023,0.3278729045790513 +XGB (tuned),6066.175009788823,1.6190793797341305,1229.0991373608288,0.7004875821871327,0.7007828758839686,0.6803135064515848,0.09696298847871114,0.07331091982279783,14800.692905333448,40.17091923504828,15.852941176470589,0.21499,2256.9276883072325,0.41774741808573407,693.4907982506384,0.3083513292273418,0.7223432295573234,0.7129456514258546,0.06100086263844462,0.03462028858330468,11769.825378159274,12.136706817415504,14.0,0,0,0,51,1403.4,20.3,22.4,0.6624331550802139,0.07586490654061125 +MNCA (tuned),50831.51192726024,15.584971993005873,6035.4844365627405,1.7226505575752833,0.7032607657750378,0.6433451252379919,0.09326988821561474,0.07237812699834675,107380.48324313454,104.77227719302421,16.029411764705884,0.21328,14486.050127214856,0.585451708899604,4621.665633563503,0.4747242314576746,0.7724401098237351,0.635037134317342,0.06113391902932641,0.0493534926522164,89084.88026764008,26.6476237825924,16.0,1,2,1,47,1401.0,17.2,20.6,0.6584224598930482,0.11520033492162958 +TABICL (default),82.80334805787777,14.446106202929627,7.4655941033985815,1.6847655412525513,0.6267118101348285,0.6591483802547826,0.09205010388137871,0.09066559220446027,132.83095048510535,173.47404322746496,16.637254901960784,0.21659,20.0619904200236,1.316457470258077,6.625686376434735,1.479660415649414,0.7253160785960966,0.7300188790588114,0.05183434289459943,0.0307061823959099,110.37274820175764,91.25701004379168,14.0,6,4,1,40,1386.5,24.7,22.2,0.6446078431372549,0.22069365279362593 +TABM (default),139.62633961878052,1.0737770921524312,22.4928837749369,0.40818286049864,0.7249223512608948,0.7271977899580744,0.10904674351892713,0.10206769049189926,289.28104127940037,12.833742622244758,18.57843137254902,0.21188,40.332407061258955,0.18133597903781468,10.492230631952996,0.13221126395693922,0.8325407781946508,0.7894820610150435,0.05780353607735633,0.03944837820705307,186.77830789084513,10.724711035853641,18.0,0,0,0,51,1345.1,20.4,17.8,0.6004901960784313,0.07809522496642297 +TABPFNV2 (tuned),10256.531407853461,3.508518432739773,2958.800295487964,1.6633483351042642,0.6455033122665624,0.6621142339433628,0.10613855092878187,0.10151317103696923,64069.99714449898,134.3876978041907,18.58823529411765,0.22751,4578.3020022392275,0.5097733656565349,3030.145221648074,0.46198440414964803,0.7572752170216083,0.7071311634998629,0.07667977096051137,0.04654872982993791,29840.56658077224,22.48627830890164,15.5,1,10,1,39,1346.5,26.8,24.7,0.6002673796791443,0.17841696402639748 +REALMLP (tuned),73311.83245680422,0.765440520637695,6156.508467345906,0.43160487228962463,0.7615648419724279,0.710732530428881,0.10375821258342363,0.0827549915204941,177178.04293492812,13.697358431538635,18.607843137254903,0.215,23874.98158947627,0.26132775147755943,6044.758795122033,0.18050895994575464,0.8260416027840659,0.7411687898250225,0.06682574439935507,0.04931132619439678,128255.5951822475,11.19862181751071,17.0,0,0,0,51,1347.2,23.5,22.5,0.5998217468805704,0.06819290912989692 +NN_TORCH (tuned + ensemble),29590.123960411,13.183612702147375,3729.842332819238,3.5618312259496654,0.7913423906704912,0.7660666706649322,0.10611131869401512,0.09038128659154394,97942.50427646744,194.194370742189,19.54901960784314,0.21479,10848.660208092795,3.946354971991645,2874.6743506773596,1.9516254299583915,0.9196144626274988,0.8406520609695045,0.06624137678637454,0.05785446391646678,61441.834728019065,144.9992483814283,20.0,0,0,0,51,1327.2,24.7,21.6,0.5784313725490197,0.06731049079573719 +TABPFNV2 (default),12.045264010803372,0.825882379421741,4.478694030451919,0.4530850283232118,0.6822105535450186,0.7166511738002558,0.11639838627727908,0.1129506889897436,69.53545044274777,29.75030650326298,20.029411764705884,0.22887,9.117408725950453,0.42119165261586505,3.3572004182270923,0.3130195506083406,1.0,0.8502991289200135,0.07592598037026033,0.05279556334085833,52.331266670190345,18.512009051191257,22.0,4,1,6,40,1319.1,21.2,21.6,0.5675133689839572,0.18240480693759595 +MNCA (default),252.05720278889527,8.333707038145958,17.796029105655933,1.1526772129094414,0.8466123430768121,0.7953614233947742,0.1283337233918568,0.09667613065132963,329.3551130182264,62.49957238602975,20.96078431372549,0.21885,36.0034454398685,0.5316168732113309,14.869495839530392,0.30768591310277943,1.0,0.8801463886825595,0.07424254289688792,0.06351531911154724,236.99495784925895,20.43419277465042,22.0,1,0,0,50,1298.0,25.2,21.5,0.5463458110516934,0.07959304417991664 +TABDPT (default),166.18249968905855,63.11337411850366,27.980557312145997,23.24231264879125,0.7067604944091354,0.699237694368164,0.12276620862052218,0.09536848200243395,577.6877537613715,1486.1376930964866,20.980392156862745,0.22562,99.10453534126282,28.39870807859633,22.52690614988171,8.550738306685618,1.0,0.8691048403402053,0.05029899916003,0.0433680924463882,528.528670151851,1255.434427440434,22.0,7,0,3,41,1299.0,24.0,27.3,0.5459001782531194,0.20400425047519355 +EBM (tuned + ensemble),34026.536489860475,1.2927426090946903,5478.959788432886,0.48902813323980865,0.8251828316021278,0.8184174666065822,0.1459292024770503,0.13123289194272533,45019.152540237825,18.109773959357398,21.470588235294116,0.21615,2925.6548613442314,0.400875727335612,1331.6775166450918,0.19908260374566636,0.9668458098028044,0.8759451782285146,0.08565806627599937,0.05590448728964968,17751.99098903195,11.13922354039944,20.0,0,1,1,49,1289.1,24.6,24.7,0.5347593582887701,0.07393115786950148 +FASTAI (tuned + ensemble),6629.648996400055,16.16286987151975,1343.5604508466745,7.704229376998794,0.847763610444884,0.8382245163385191,0.1397117316065552,0.11512346483592802,22455.399300542234,425.24771894393024,23.49019607843137,0.21679,3182.895098288854,10.795994228786892,593.237788402893,4.466873745216533,1.0,0.9598194541774426,0.0932055660765102,0.07243352000599475,19851.125229101002,409.94392325616803,25.0,0,1,0,50,1247.2,23.8,21.0,0.4888591800356506,0.06787442467463556 +XT (tuned + ensemble),1289.1519403179227,3.0578905248434194,464.9314949446578,1.2901947647379284,0.8827760158511792,0.8526330342118325,0.14328507877461197,0.1178072949267725,5501.829777288316,78.94715148525705,23.80392156862745,0.21814,763.5855970117781,1.8364481396145291,183.01944048073585,0.761281055543471,1.0,0.9521932895093149,0.08603989010676916,0.07137968239257503,3537.1622158448404,68.20231667792302,27.0,0,0,1,50,1241.2,21.7,19.1,0.48172905525846704,0.06248381594607686 +EBM (tuned),34026.536489860475,0.16189759790507796,5478.959788432886,0.07139558038617727,0.8740079837962136,0.8509315543435954,0.1526111960614516,0.14046525162709442,45019.152540237825,2.1906554232861546,24.098039215686274,0.21772,2925.6548613442314,0.0440410852432251,1331.6775166450918,0.022730636596679687,1.0,0.9343933032613297,0.09714090003058773,0.06554958177928984,17751.99098903195,1.2223545537568268,24.0,0,0,1,50,1235.3,17.6,22.4,0.4750445632798574,0.05642764990076486 +REALMLP (default),448.72095363529684,0.7614608424421488,36.551378391408186,0.4234937252271941,0.8980359281995941,0.8486069321416838,0.1271802553088573,0.10985323243216587,1038.880257668142,13.498920180198532,24.225490196078432,0.21553,142.2050400045183,0.2570535076989068,35.35800008725903,0.19140706459681192,1.0,0.9219879409782441,0.10584984201426972,0.07572085053752324,719.9745480267878,11.535269383553011,24.0,0,0,0,51,1234.0,21.0,21.7,0.4721479500891266,0.04938698679756707 +XGB (default),12.860019501050314,0.6666093941607507,3.1825529067488505,0.3000924886961127,0.8935881423667761,0.8532334504320886,0.12712004719135736,0.11901449406144302,40.716829559380564,17.42803351650834,24.245098039215687,0.21667,5.82192047437032,0.3283502260843913,1.9409389396340444,0.12262328807300621,1.0,0.9468441210750017,0.09769803191981652,0.0669685923578734,34.48133232859512,10.22021081972956,24.0,0,0,0,51,1234.4,18.4,23.9,0.47170231729055256,0.050923488128909414 +NN_TORCH (tuned),29590.123960411,0.7234875786018787,3729.842332819238,0.20874333848987678,0.8968088714508368,0.8475017840981602,0.12669338653934464,0.1136897319489509,97942.50427646744,10.830632864633353,24.637254901960784,0.21784,10848.660208092795,0.2631075382232666,2874.6743506773596,0.131112832826372,1.0,0.9145996378610912,0.1030878645350336,0.06896829387646289,61441.834728019065,8.236695976463633,25.0,0,0,0,51,1224.0,22.8,19.2,0.46278966131907306,0.047231345493399926 +GBM (default),8.619418829147072,1.0011564760166576,3.0462908189340183,0.25265600996978405,0.9255861200115366,0.8846516474566253,0.13512307210624444,0.11629352795926363,44.26353412937288,22.939896191571705,25.705882352941178,0.21706,5.971681065029568,0.2858244842953152,1.9600326879612946,0.14173548842042513,1.0,0.9460283485970739,0.09851412306889507,0.07460814077084837,32.72231234173676,8.612355874421967,26.0,0,0,0,51,1201.7,22.5,22.9,0.4385026737967914,0.04351765156181404 +RF (tuned + ensemble),2245.014868743487,2.632504310950734,530.9489806737863,1.2198476221584795,0.9036228102132694,0.8780254175848018,0.15120886022535032,0.12864404431298362,6771.2411119904955,73.51636782881279,25.764705882352942,0.21966,886.9249708387587,1.8479143513573542,373.17861356387994,0.7709478321252661,1.0,0.9786075371468006,0.09165643783998545,0.08516072051847612,5833.628398157948,63.708677480395814,27.0,0,1,1,49,1200.8,20.6,23.3,0.43716577540106955,0.06329546224076242 +EBM (default),104.78526760316363,0.17165232525412033,11.079598604500092,0.09398863718788793,0.8753701223112293,0.8652694048452835,0.16210494430458539,0.14850767908108042,148.0901517975655,3.2508346897376326,25.784313725490197,0.21677,11.465454594294231,0.05977429548899333,4.6738599788414605,0.03961948198354664,1.0,0.9662629570470155,0.10271933543623024,0.06419218276989155,75.98904610132907,2.400460311914132,26.0,1,0,2,48,1200.2,21.6,27.7,0.43672014260249553,0.07421734941797684 +XT (tuned),1289.1519403179227,0.3436224609158917,464.9314949446578,0.1744826726067124,0.9078855110953847,0.8819150140308499,0.15403366505022814,0.12742228138105077,5501.829777288316,9.97674146406182,25.862745098039216,0.21915,763.5855970117781,0.19129647148980033,183.01944048073585,0.09120693679998855,1.0,0.9769882906818511,0.10343347628405763,0.07994728986866971,3537.1622158448404,8.867381424714122,29.0,0,1,0,50,1200.2,16.8,28.9,0.43493761140819964,0.05870912420356502 +FASTAI (tuned),6629.648996400055,1.0291698354002177,1343.5604508466745,0.6091231343301344,0.930127335805145,0.8926194283084432,0.15586855797145246,0.13603130394016383,22455.399300542234,33.54113778341792,27.53921568627451,0.2173,3182.895098288854,0.8112125396728516,593.237788402893,0.306391541190021,1.0,0.9788627510723802,0.0988046136347035,0.08330206169020851,19851.125229101002,31.29559841059079,28.0,0,0,0,51,1161.9,22.6,19.8,0.3968360071301248,0.04380245578259077 +RF (tuned),2245.014868743487,0.2778017885544721,530.9489806737863,0.15585979099514927,0.9311148829801792,0.9028450265149066,0.161536041758762,0.1397857082339994,6771.2411119904955,8.469005968793565,28.058823529411764,0.22016,886.9249708387587,0.17402595943874782,373.17861356387994,0.08526202343425164,1.0,0.9971929103870871,0.10456209496862212,0.09283342230804408,5833.628398157948,7.94881742200239,31.0,0,1,1,49,1152.4,19.1,23.8,0.3850267379679144,0.05583388533175464 +NN_TORCH (default),74.85835825680128,0.5412627521423472,14.695998951842784,0.19921856017345402,0.9806438127346997,0.9547730745646801,0.17458091177603513,0.15954937856770027,327.51821759182326,9.71084645377221,31.558823529411764,0.22759,34.192402362823486,0.22645958264668783,9.990997226772679,0.1258046787872722,1.0,0.9967824967824969,0.14050249681884142,0.10372091439545034,204.15320945265722,7.844934898940183,33.0,0,0,0,51,1067.7,21.6,22.3,0.3054812834224599,0.034545021433579105 +FASTAI (default),27.993367113578813,1.070063710732138,4.951242935164595,0.5056871108143457,0.980597127305428,0.9552499725431345,0.20804378505182314,0.19756225539807246,87.08936574190912,29.523324076097715,33.72549019607843,0.24127,12.973222759034899,0.8480017715030246,2.8561126039251374,0.37317813888351864,1.0,1.0,0.15801035292114685,0.14150141420291248,77.36570184770021,28.70383139894662,36.0,0,0,0,51,1010.0,23.1,21.6,0.25623885918003564,0.032185776373391164 +RF (default),5.289521401640116,0.18747803105248345,0.8914167974682212,0.07557012377860906,0.9921615843984544,0.9727243059692057,0.21278870438809022,0.2205761144517812,11.63160057311077,4.814878723410724,34.009803921568626,0.2484,1.2628253036075168,0.08775801128811306,0.43425701731008426,0.05354175385774865,1.0,1.0,0.15453679020731148,0.13517433236964935,7.010079512208331,3.9076874559468244,35.5,0,0,0,51,1000.0,0.0,0.0,0.24977718360071302,0.03092019219675887 +XT (default),3.067954011524425,0.20851136391458946,0.7599985259503769,0.07668050932017083,0.976846527522819,0.9605937708427402,0.22983039198977984,0.2401119955011306,6.922794409042171,5.082769758447109,34.970588235294116,0.24428,1.029017792807685,0.0932659043206109,0.2473449527431567,0.04980480471851431,1.0,1.0,0.17836788960429084,0.15384123117868195,5.707021421143385,4.456198846093199,38.0,0,0,0,51,971.3,21.8,28.3,0.22794117647058823,0.0319093029788781 +LR (tuned + ensemble),298.0772925112502,1.5473658189794337,106.92655310814473,0.5272909274052285,0.9690155979901051,0.9649277391245569,0.3076836650088154,0.363888755727768,1326.7185005565484,20.631703511708864,36.549019607843135,0.25172,171.24826147821216,0.28876688745286727,47.49214683366935,0.1676693138737952,1.0,1.0,0.24584819980048034,0.25856044395126276,1064.131515124883,12.175213508812917,40.0,0,0,1,50,917.6,30.2,19.9,0.19206773618538325,0.035427685711888916 +LR (tuned),298.0772925112502,0.41952294077488855,106.92655310814473,0.14529639602271893,0.9805057099085136,0.9700729044893278,0.31502752204472384,0.3731108042116186,1326.7185005565484,5.927745129039777,37.509803921568626,0.25255,171.24826147821216,0.10993631680806477,47.49214683366935,0.0665447885938415,1.0,1.0,0.2610498572359117,0.2575122541077485,1064.131515124883,4.067640041721783,41.0,0,0,0,51,882.0,31.0,30.5,0.17023172905525846,0.030529102311359715 +LR (default),6.912347784956555,0.4444732105550163,2.521765696020704,0.1600876370747322,0.9855950536843827,0.9742457765044912,0.328743265214212,0.4183743495192478,32.35021650084229,7.018103298699637,38.03921568627451,0.2546,5.298751910527547,0.12218634287516277,1.5162047581506444,0.0887949061495271,1.0,1.0,0.26947668808304803,0.29385559917332205,22.991652687961256,4.725399415979996,41.5,0,0,1,50,863.0,23.4,29.7,0.15819964349376114,0.03280759256561152 +KNN (tuned + ensemble),185.73769010095035,9.265668427242952,28.077306593709025,0.6763553759282402,1.0,0.9960143410984765,0.45319634536787595,0.6115009873493139,80.584480435074,64.66285269534394,41.431372549019606,0.34658,12.91673379474216,0.27287014325459796,3.260792818960679,0.17676389939136214,1.0,1.0,0.41352429770943944,0.6662115944591954,60.87290895584066,13.90517184260326,43.0,0,0,0,51,684.1,27.0,31.7,0.08110516934046345,0.02430200505969366 +KNN (tuned),185.73769010095035,1.4108907164571591,28.077306593709025,0.11144693589219826,1.0,0.9979619364556479,0.46874729012301525,0.6540729052092533,80.584480435074,10.182721712640424,42.431372549019606,0.35012,12.91673379474216,0.08784447775946723,3.260792818960679,0.03624739765555662,1.0,1.0,0.4456786012073394,0.740394718900499,60.87290895584066,2.2791618782574123,44.0,0,0,0,51,607.3,30.7,47.9,0.05837789661319073,0.023658093377554222 +KNN (default),1.8202055170645122,0.18718713898544476,0.4967972483415847,0.03640307146637206,1.0,0.9999625498903626,0.5414051109614146,0.9163581410098474,1.035103875827835,2.1647566343855695,43.833333333333336,0.40859,0.22567404641045463,0.036888705359564886,0.050692503527237594,0.021942545900661376,1.0,1.0,0.5109534123207524,1.0,1.0,1.2564234767690339,45.0,0,0,0,51,458.6,41.2,46.1,0.026515151515151516,0.022918450740172708 diff --git a/data/full-imputed/time_plot.pdf b/data/full-imputed/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..18da66e6ac6e448a5dac1082f270ddec02d78472 Binary files /dev/null and b/data/full-imputed/time_plot.pdf differ diff --git a/data/full-imputed/time_plot.png.zip b/data/full-imputed/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..37122cdeb1b531b39b2d2530daa036e5ebf8752f --- /dev/null +++ b/data/full-imputed/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1425f58f7556e77814ca63062d6ffbc1d9f5f0b02dfed773d91d750d54675e9 +size 338592 diff --git a/data/full-imputed/tuning-impact-elo-horizontal.pdf b/data/full-imputed/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..36584767122541fa34b98ee6d2af6cbd911f4a74 Binary files /dev/null and b/data/full-imputed/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full-imputed/tuning-impact-elo-horizontal.png.zip b/data/full-imputed/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..9bfad0537f036930805c1ae5c069683289557a01 --- /dev/null +++ b/data/full-imputed/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:40a9013b43f4ad5807a9a23a498f17e8688a0f507366819b883f193c85aa9616 +size 155996 diff --git a/data/full-imputed/tuning-impact-elo.pdf b/data/full-imputed/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..23bf8d46a5c33637a6970116d70e9a87c602f3ed Binary files /dev/null and b/data/full-imputed/tuning-impact-elo.pdf differ diff --git a/data/full-imputed/tuning-impact-elo.png.zip b/data/full-imputed/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..725b3a584d70c171066dc658e00a2321048bd8c7 --- /dev/null +++ b/data/full-imputed/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b06bb49de3089f94392fa747be2a80ad66ae2e3ab15d416956834862b277f2dd +size 168849 diff --git a/data/full-reg/figures/critical-diagram.pdf b/data/full-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..05a32546ee98d768209b8dc3de52de24c8c34f4e Binary files /dev/null and b/data/full-reg/figures/critical-diagram.pdf differ diff --git a/data/full-reg/figures/critical-diagram.png.zip b/data/full-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..05bf6ba23e3c05ac8a74fe91271598efb7449c5f --- /dev/null +++ b/data/full-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ade5ce1686ebdc9790174260de63d2867ffd11045f93679eddae440176ce8e97 +size 294500 diff --git a/data/full-reg/leaderboard.tex b/data/full-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..87e77cc0454c252631034d2f678577df40cde430 --- /dev/null +++ b/data/full-reg/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1887${}_{-76,+77}$}} & \textcolor{gold}{\textbf{0.735}} & \textcolor{gold}{\textbf{3.7}} & \textcolor{silver}{\textbf{2.3}} & \textcolor{bronze}{\textbf{2}} & \textcolor{silver}{\textbf{2.3\%}} & 1625.74 & 6.76 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1818${}_{-57,+65}$}} & \textcolor{silver}{\textbf{0.734}} & \textcolor{silver}{\textbf{4.8}} & \textcolor{bronze}{\textbf{2.7}} & \textcolor{bronze}{\textbf{2}} & \textcolor{gold}{\textbf{1.6\%}} & 6567.16 & 2.48 \\ +ModernNCA (T+E) & \textcolor{bronze}{\textbf{1701${}_{-72,+54}$}} & 0.603 & \textcolor{bronze}{\textbf{7.3}} & \textcolor{bronze}{\textbf{2.7}} & \textcolor{silver}{\textbf{3}} & 3.4\% & 3779.52 & 7.69 \\ +CatBoost (T+E) & 1668${}_{-56,+71}$ & 0.536 & 8.1 & 5.8 & 0 & 4.1\% & 3552.96 & 0.97 \\ +LightGBM (T+E) & 1660${}_{-65,+60}$ & 0.531 & 8.2 & 6.1 & 0 & 4.6\% & 700.15 & 9.32 \\ +TabDPT (D) & 1659${}_{-52,+60}$ & \textcolor{bronze}{\textbf{0.635}} & 8.2 & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{5}} & \textcolor{bronze}{\textbf{2.5\%}} & 22.53 & 8.55 \\ +CatBoost (T) & 1625${}_{-55,+40}$ & 0.512 & 9.3 & 4.9 & 1 & 4.2\% & 3552.96 & 0.10 \\ +TabM (T+E) & 1623${}_{-71,+64}$ & 0.488 & 9.5 & 5.2 & 0 & 2.8\% & 4158.29 & 1.41 \\ +LightGBM (T) & 1538${}_{-47,+51}$ & 0.432 & 11.8 & 10.3 & 0 & 5.2\% & 700.15 & 0.97 \\ +XGBoost (T+E) & 1535${}_{-47,+58}$ & 0.432 & 11.8 & 11.1 & 0 & 5.1\% & 834.93 & 2.61 \\ +XGBoost (T) & 1514${}_{-52,+47}$ & 0.398 & 12.6 & 12.1 & 0 & 5.3\% & 834.93 & 0.39 \\ +ModernNCA (T) & 1484${}_{-41,+71}$ & 0.351 & 13.5 & 6.5 & 0 & 5.5\% & 3779.52 & 0.40 \\ +TabM (T) & 1478${}_{-74,+58}$ & 0.380 & 13.9 & 10.3 & 0 & 3.9\% & 4158.29 & 0.17 \\ +CatBoost (D) & 1465${}_{-45,+61}$ & 0.389 & 14.0 & 10.2 & 0 & 5.9\% & 10.89 & 0.09 \\ +RealMLP (T) & 1436${}_{-49,+44}$ & 0.344 & 15.2 & 10.9 & 0 & 4.2\% & 6567.16 & 0.09 \\ +ModernNCA (D) & 1367${}_{-54,+55}$ & 0.209 & 17.5 & 13.0 & 0 & 7.1\% & 15.50 & 0.30 \\ +TabM (D) & 1347${}_{-54,+60}$ & 0.287 & 18.0 & 14.3 & 0 & 5.6\% & 13.32 & 0.13 \\ +TorchMLP (T+E) & 1317${}_{-48,+58}$ & 0.171 & 19.2 & 14.1 & 0 & 7.2\% & 4608.59 & 1.23 \\ +RealMLP (D) & 1297${}_{-53,+57}$ & 0.137 & 19.6 & 15.5 & 0 & 6.7\% & 33.36 & 0.08 \\ +ExtraTrees (T+E) & 1287${}_{-58,+51}$ & 0.157 & 20.2 & 11.8 & 0 & 9.7\% & 158.22 & 0.84 \\ +LightGBM (D) & 1268${}_{-65,+57}$ & 0.070 & 20.8 & 20.3 & 0 & 7.7\% & 2.11 & 0.27 \\ +ExtraTrees (T) & 1262${}_{-52,+55}$ & 0.129 & 21.0 & 14.5 & 0 & 9.9\% & 158.22 & 0.15 \\ +TorchMLP (T) & 1251${}_{-66,+43}$ & 0.123 & 21.5 & 18.8 & 0 & 8.1\% & 4608.59 & 0.10 \\ +XGBoost (D) & 1220${}_{-50,+45}$ & 0.111 & 22.6 & 20.4 & 0 & 8.4\% & 2.24 & 0.24 \\ +RandomForest (T+E) & 1211${}_{-63,+44}$ & 0.066 & 23.2 & 20.4 & 0 & 10.6\% & 515.73 & 0.77 \\ +RandomForest (T) & 1150${}_{-54,+52}$ & 0.048 & 24.8 & 22.7 & 0 & 11.1\% & 515.73 & 0.12 \\ +EBM (T+E) & 1145${}_{-51,+63}$ & 0.160 & 24.8 & 12.9 & 0 & 13.4\% & 1890.68 & 0.13 \\ +ExtraTrees (D) & 1114${}_{-58,+45}$ & 0.061 & 26.0 & 22.8 & 0 & 11.9\% & 0.47 & 0.06 \\ +EBM (T) & 1104${}_{-57,+55}$ & 0.137 & 26.4 & 16.4 & 0 & 13.9\% & 1890.68 & 0.01 \\ +FastaiMLP (T+E) & 1067${}_{-52,+51}$ & 0.023 & 27.4 & 25.8 & 0 & 11.9\% & 540.06 & 2.67 \\ +TorchMLP (D) & 1048${}_{-72,+52}$ & 0.016 & 28.1 & 26.3 & 0 & 11.6\% & 20.48 & 0.08 \\ +EBM (D) & 1025${}_{-48,+60}$ & 0.093 & 28.5 & 26.0 & 0 & 14.8\% & 6.33 & 0.04 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 29.2 & 28.6 & 0 & 12.6\% & 0.53 & 0.06 \\ +FastaiMLP (T) & 994${}_{-54,+57}$ & 0.013 & 29.2 & 28.2 & 0 & 12.4\% & 540.06 & 0.32 \\ +FastaiMLP (D) & 858${}_{-56,+52}$ & 0.000 & 32.2 & 31.8 & 0 & 16.9\% & 2.60 & 0.39 \\ +KNN (T+E) & 507${}_{-111,+65}$ & 0.000 & 37.0 & 36.8 & 0 & 36.0\% & 2.43 & 0.14 \\ +Linear (T+E) & 470${}_{-96,+65}$ & 0.000 & 37.3 & 37.2 & 0 & 35.3\% & 45.74 & 0.11 \\ +KNN (T) & 413${}_{-101,+81}$ & 0.000 & 38.0 & 37.8 & 0 & 36.6\% & 2.43 & 0.03 \\ +Linear (T) & 398${}_{-99,+88}$ & 0.000 & 37.9 & 37.8 & 0 & 35.5\% & 45.74 & 0.05 \\ +Linear (D) & 276${}_{-99,+71}$ & 0.000 & 39.2 & 39.1 & 0 & 38.0\% & 1.19 & 0.09 \\ +KNN (D) & 200${}_{-136,+122}$ & 0.000 & 39.7 & 39.6 & 0 & 40.6\% & 0.04 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full-reg/tabarena_leaderboard.csv b/data/full-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ea4f6b7c6ac5a4ab8b4f730d55e3b314338f980 --- /dev/null +++ b/data/full-reg/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),10053.09853225386,65.70725388180496,3138.716102354025,8.201870690838728,0.26461131829744206,0.27698219418045594,0.02291895685939331,0.035964926580766866,79227.35072832079,1344.7158520024777,3.6923076923076925,4.16484,12938.261539538702,10.17258334159851,1625.738447135909,6.759745919437983,0.23133371369265834,0.31069761727157824,0.015074633964319739,0.016643068102528722,85166.92529682687,517.5224332581762,3.0,2,3,4,4,1887.4,76.5,75.1,0.9326923076923077,0.4262820512820513 +REALMLP (tuned + ensemble),67709.67308180964,12.011677133731354,6963.87401913588,6.973854948645224,0.26596078198184286,0.29443527620235754,0.015584141131814132,0.027230868655267488,303258.6729720159,231.34066267352048,4.846153846153846,4.12234,28517.58209461636,4.232689115736219,6567.164030631503,2.480181700638205,0.19393332805279354,0.29742106435254645,0.008809518245537662,0.018856294653559095,203710.74184331874,227.32654052633927,4.0,2,2,2,7,1817.9,64.3,56.7,0.9038461538461539,0.3723776223776224 +MNCA (tuned + ensemble),31759.101002019288,48.99773550746787,6166.048475961011,17.643273328787647,0.39661026996847265,0.3574009968929841,0.034393187333226535,0.0356806857944739,186080.1526868885,677.6872983460363,7.3076923076923075,4.48697,16310.556293937894,10.227302259869045,3779.5248398651206,7.690422738079533,0.35096562881515253,0.35660094620644167,0.019057076597787104,0.023956472897060137,198038.2195884601,507.5335424076248,5.0,3,1,1,8,1701.4,53.3,71.3,0.8423076923076923,0.371127337574706 +CAT (tuned + ensemble),30302.612278386874,4.803950041991014,6846.603031359736,1.23549623448159,0.46394270088709505,0.4802827806871086,0.04080599575171776,0.05421914583673029,150000.99966111325,116.73562230823504,8.076923076923077,4.20863,22090.5574801498,2.685193909539117,3552.958864906998,0.9657383741190037,0.4150140849407447,0.4814904032190787,0.020503839297461468,0.036001021424025365,164350.82166574517,80.04598787510639,9.0,0,1,1,11,1668.1,70.5,55.8,0.823076923076923,0.1736409564344787 +GBM (tuned + ensemble),3469.6354441180188,51.50608464469258,806.792675338677,8.57889161766087,0.4687885136784251,0.4779542732223701,0.0459142163370746,0.05847756014580863,20529.34803212188,1135.938430886033,8.153846153846153,4.21165,3055.419878217909,17.84196005927192,700.1537746143185,9.321818212785747,0.41525976638432377,0.45795538731815566,0.021205168859483203,0.03428335611022812,21786.570035668443,556.5446710812895,6.0,0,0,1,12,1660.1,60.0,64.2,0.8211538461538461,0.16348235098235098 +TABDPT (default),150.02111775732448,54.38835432835114,28.728808967170515,25.042434382939593,0.36524485788247774,0.3757566695219451,0.0248363952945231,0.03287367328135333,859.9949957462201,1916.4746059233426,8.23076923076923,4.26747,137.4807067182329,29.239719518025716,22.52690614988171,8.547073882575031,0.28825222902504594,0.3825474634976233,0.024988993973347418,0.018753299523578005,904.9754515353668,1821.0928942426492,6.0,5,0,0,8,1659.1,59.1,52.0,0.8192307692307692,0.4508083796545335 +CAT (tuned),30302.612278386874,0.5550543483505901,6846.603031359736,0.16909210735792113,0.48768370669415817,0.49734395231843176,0.04200103064707736,0.05784545109220123,150000.99966111325,13.40589862804556,9.26923076923077,4.23064,22090.5574801498,0.3789627022213406,3552.958864906998,0.10450043094654878,0.5120020111687084,0.49257184784034574,0.02559764776763218,0.03741223775007456,164350.82166574517,10.277501137463553,8.0,1,1,0,11,1624.9,39.7,54.9,0.7932692307692307,0.20512820512820512 +TABM (tuned + ensemble),42663.03407314721,6.401903104782104,7337.2216133773,2.4450322937588758,0.5116630985629478,0.49582652586594606,0.028484933382026085,0.07757253674263247,180683.6213219068,120.23686774247648,9.538461538461538,4.1458,20820.912871148852,3.0152715841929116,4158.291053548661,1.4096720886484275,0.43090407292838107,0.45442246548589066,0.02530931453787877,0.01972389522417027,166291.5239595113,107.26839105814781,6.0,0,2,0,11,1623.4,63.2,70.8,0.7865384615384615,0.19279400424137266 +XGB (tuned + ensemble),6384.208144447945,12.513759183476113,1409.0815132938305,4.086239880001324,0.567688570586379,0.5777747629771025,0.05130276883821868,0.06605947558091682,26398.72664659111,353.2622484982949,11.76923076923077,4.22219,2596.930354913076,6.618133616447449,834.9300717202715,2.614265349176195,0.6256631137377601,0.6286907892907813,0.027928072769987877,0.04797658607386891,26370.57574541831,164.97865636299682,11.0,0,0,0,13,1534.8,57.5,46.7,0.7307692307692307,0.08990147610057113 +GBM (tuned),3469.6354441180188,7.385669826034806,806.792675338677,1.273710690539978,0.5677497712335507,0.5662287079782077,0.0520873055824581,0.07113770565213798,20529.34803212188,170.85098129275468,11.846153846153847,4.23482,3055.419878217909,2.690451833936903,700.1537746143185,0.9682498776019389,0.5552965190223861,0.5777514545736621,0.027972044011269404,0.03922298758674819,21786.570035668443,130.77892053638826,12.0,0,0,0,13,1538.0,50.6,46.2,0.7288461538461538,0.09731628351537853 +XGB (tuned),6384.208144447945,2.5463058025409016,1409.0815132938305,0.7958717570786976,0.6023900941776976,0.6057381247672692,0.052778191073056716,0.07014560263282078,26398.72664659111,74.35901041305158,12.576923076923077,4.23559,2596.930354913076,1.7797584003872342,834.9300717202715,0.3883258596259137,0.6620816436761664,0.6405605013277449,0.02927659597280985,0.04927060675426248,26370.57574541831,37.05707269936294,12.0,0,0,0,13,1514.0,46.4,51.4,0.7105769230769231,0.08262651752470757 +MNCA (tuned),31759.101002019288,2.180037997319148,6166.048475961011,0.7699041848491119,0.6494157477338965,0.5952759886308904,0.05539455073257844,0.07068306551551398,186080.1526868885,32.66971509741366,13.461538461538462,4.74895,16310.556293937894,0.48738079600863987,3779.5248398651206,0.3958729871859153,0.686862000079896,0.5700170149041335,0.027829209062692706,0.0493534926522164,198038.2195884601,22.639832589759934,14.0,0,2,1,10,1484.4,70.4,40.2,0.6884615384615385,0.1533190381368519 +TABM (tuned),42663.03407314721,0.6790118826760185,7337.2216133773,0.2826346699151617,0.6195303355392976,0.5994402285038597,0.03858566517030648,0.09844026405388778,180683.6213219068,13.142332198971314,13.923076923076923,4.26932,20820.912871148852,0.3543446593814426,4158.291053548661,0.16724776809008424,0.5658117450288995,0.5832600605513046,0.034427431733584535,0.027702057966854137,166291.5239595113,12.524825096296247,11.0,0,0,0,13,1478.2,57.9,73.1,0.676923076923077,0.09674719867027558 +CAT (default),101.69209800418625,0.27084102752881173,23.09400681118272,0.13269262448628016,0.6107641551712003,0.6248418005606562,0.05895124482581516,0.07549119565100736,496.3567653671193,9.26273127760114,13.961538461538462,4.21395,62.699359814325966,0.2736650307973226,10.889876924735699,0.09199146861016888,0.5847419575275037,0.6538434834236965,0.021738610101570965,0.03712451320809096,412.72250349454265,8.72239577968297,13.0,0,0,0,13,1464.8,60.5,45.0,0.6759615384615385,0.09783139765925522 +REALMLP (tuned),67709.67308180964,0.5510838443397457,6963.87401913588,0.35676328285131836,0.655870427194709,0.6192525352803894,0.041781108455330826,0.0794078602157222,303258.6729720159,12.866326813784351,15.23076923076923,4.2931,28517.58209461636,0.2134650813208686,6567.164030631503,0.09160214336768219,0.67949922992921,0.6789002713578278,0.0397756810178872,0.04931132619439678,203710.74184331874,10.089309852682138,16.0,0,0,1,12,1435.5,43.8,48.5,0.6442307692307693,0.09133017251832487 +MNCA (default),99.58075828980176,2.0463109099966847,18.34482305470451,0.7039566701063034,0.7905734390801098,0.758807785538453,0.07077992898843004,0.09644209551436314,547.7971597202185,27.012858819060174,17.53846153846154,4.94033,49.107323222690155,0.41290783882141113,15.50085128260229,0.29868905742963153,0.9481480616823943,0.9086181882493612,0.03934956674595991,0.09386686583837221,536.5510136544884,15.897990578305649,18.0,0,0,0,13,1367.3,54.5,53.7,0.5865384615384616,0.07718910857215008 +TABM (default),109.08048485323914,0.5565733836247371,30.11061011517182,0.2424694191265854,0.7132806572850755,0.7147282609076164,0.05630269517815623,0.12970614775630301,580.6229196238922,9.300928058329742,18.0,4.27071,65.0270922978719,0.1736939483218723,13.315938751381204,0.13061717308512005,0.6984407490367052,0.6980606107077207,0.05131334419415079,0.051190784688604524,354.8698900002396,7.016702417594013,16.0,0,0,0,13,1347.3,59.8,53.4,0.575,0.06992665938463788 +NN_TORCH (tuned + ensemble),44960.17932087947,4.03415755573501,5714.412959426929,1.3298489659738517,0.8294336223884518,0.8131262211817997,0.07233788099307822,0.11855200253632468,220430.49479235077,97.5953017172201,19.153846153846153,4.65351,15497.21247045199,2.8239229255252414,4608.594420268999,1.2325370779691025,0.9419042422995463,0.8828910186177211,0.05961664534372901,0.0834039942118561,177697.9133637009,96.53385472639289,20.0,0,0,0,13,1316.9,57.9,47.4,0.5461538461538461,0.0711002949006045 +REALMLP (default),360.5571643448284,0.5610987406510572,36.16407680854995,0.35279691100634736,0.8625329178435776,0.8317168418227403,0.06669325379717181,0.12718726997120225,1586.4859687066598,12.051410462198454,19.615384615384617,4.76491,145.05578433142767,0.20847519238789877,33.35619963926919,0.08008880801221668,0.9645369038929024,0.9167568200448138,0.0482796143974169,0.09560361463677074,955.4745851239355,8.821199701887194,20.0,0,0,0,13,1297.2,56.7,52.7,0.5346153846153846,0.06456321227379722 +XT (tuned + ensemble),1206.5194327423715,3.367576224375994,442.36567464742467,1.069316176291616,0.8432098887288747,0.8539657200625945,0.09727743600772741,0.11929865478792087,8220.916376158188,88.72398178178156,20.23076923076923,5.04794,766.4287914435068,3.576531834072537,158.22496863160976,0.8436571643025992,1.0,1.0,0.049883734447319306,0.11775243552615773,5379.1926370125775,93.77443700336585,24.0,0,0,1,12,1287.1,50.1,57.9,0.5192307692307693,0.08465359052403586 +GBM (default),10.696644908750159,2.245801542559241,3.3219193518156818,0.4939224454467883,0.9303753212721771,0.8861460110602865,0.07714163256645466,0.11821573250771258,80.53396018410795,58.38494586351267,20.76923076923077,4.4838,7.6057972113291425,0.9335102770063612,2.1107352135741744,0.2745991643955073,0.9919628610131755,0.9232692312204189,0.04937573951369445,0.09720745168389198,91.35379089849596,24.071571166022103,20.0,0,0,0,13,1267.6,57.0,64.6,0.5057692307692307,0.0493082737247497 +XT (tuned),1206.5194327423715,0.45855456478575357,442.36567464742467,0.18106308268317445,0.8708387105454042,0.8747725588350008,0.09945068180602606,0.12636366597608262,8220.916376158188,14.733646953552363,21.03846153846154,5.07281,766.4287914435068,0.34996385044521755,158.22496863160976,0.15116311924152026,1.0,0.9988849454089667,0.050434912392149145,0.1220745399591585,5379.1926370125775,14.85253877204164,23.0,0,0,0,13,1261.7,54.5,51.3,0.49903846153846154,0.0689176100929359 +NN_TORCH (tuned),44960.17932087947,0.2904610134597517,5714.412959426929,0.10578639607158029,0.8767833734464541,0.8672270215081037,0.0805298918130512,0.13726924760597434,220430.49479235077,7.008617614539204,21.53846153846154,4.7148,15497.21247045199,0.18749599986606175,4608.594420268999,0.09690652350320204,1.0,0.9256758910554999,0.0730528979706524,0.0987961550603162,177697.9133637009,6.6162587674624795,21.0,0,0,0,13,1250.6,42.6,65.5,0.48653846153846153,0.05309187244027516 +XGB (default),12.109628748486186,0.9366908014330091,3.1242101566662273,0.3058622971724495,0.8890754852284094,0.8651216835463518,0.08407275864462525,0.12418615325132822,67.76669386312491,28.981192639888032,22.576923076923077,4.75458,7.79438853263855,0.5301833947499593,2.2441525977447814,0.24247013095584213,1.0,0.9462051303684406,0.053268364359151055,0.09620970472363667,75.78126190262087,16.70456778360137,23.0,0,0,0,13,1220.0,44.6,49.8,0.46057692307692305,0.04904036202859359 +RF (tuned + ensemble),2056.9684221919783,3.432567118171953,500.6828145780351,1.0841887922659657,0.9341058521704168,0.9236706018825189,0.105513096504343,0.12898261780451312,10863.776799780928,90.24173062908258,23.153846153846153,5.10626,1088.11842862765,1.444333102968004,515.7302180242054,0.7709478321252661,1.0,1.0,0.06747831844148078,0.12984639499235218,11909.387526812274,91.13733276245972,25.0,0,0,0,13,1210.7,43.7,62.2,0.4461538461538462,0.04896049394550144 +RF (tuned),2056.9684221919783,0.3944415463341607,500.6828145780351,0.1540690468576434,0.9521440935358421,0.9437852901121029,0.11064962874343257,0.13747898342548212,10863.776799780928,12.055960583687282,24.76923076923077,5.18858,1088.11842862765,0.3136819733513726,515.7302180242054,0.12356183047078627,1.0,0.9985687166782279,0.07199386349783587,0.13249201584155068,11909.387526812274,11.406268840053338,26.5,0,0,0,13,1149.9,51.2,53.6,0.40576923076923077,0.043959898297324766 +EBM (tuned + ensemble),26125.740811508127,1.1628193525167612,3543.4601988059676,0.44619286429284205,0.8401103872067855,0.8467568705429724,0.1338965285273,0.1931502560496656,102743.72801916403,13.991542249573758,24.846153846153847,4.37076,6865.337549757957,0.3269915845659044,1890.6770917738593,0.13287644820933492,1.0,1.0,0.12484220041460736,0.13745169436097826,100996.51953892264,10.735277156769923,30.0,0,1,0,12,1145.1,62.3,50.5,0.40384615384615385,0.07744479104773222 +XT (default),4.038539225015885,0.28929371426248146,0.7743502790637721,0.08386602222003156,0.9388750296989964,0.9258564635651129,0.11937488392983725,0.17775652044709925,12.035409694284047,7.5346994725559195,25.96153846153846,5.13889,2.058894846174452,0.22108591927422416,0.46766450701756074,0.055025941487738636,1.0,1.0,0.08970042626125052,0.14572571437043855,11.279879616649893,6.868103658105229,28.0,0,0,0,13,1113.8,44.8,57.6,0.37596153846153846,0.043944583247239793 +EBM (tuned),26125.740811508127,0.09807351307991222,3543.4601988059676,0.04003457596727012,0.8629348413551933,0.8641912711828517,0.13933265319744756,0.20733107333477666,102743.72801916403,1.2806753432569877,26.384615384615383,4.42885,6865.337549757957,0.03976681497361925,1890.6770917738593,0.012767925630469206,1.0,1.0,0.1292889595673019,0.16421526338237233,100996.51953892264,1.0,31.0,0,0,1,12,1103.8,54.1,56.9,0.36538461538461536,0.06110409305380842 +FASTAI (tuned + ensemble),4642.340904501768,8.77000401672135,1248.4159649024482,5.455805855994076,0.976857214707512,0.9650087128953216,0.1187077131039122,0.1958954791648477,32660.757986061028,336.5557366220283,27.384615384615383,5.27232,3620.151651991738,7.070411682128906,540.0550122797715,2.672383567926809,1.0,1.0,0.09553883535836205,0.1756471684639439,33252.590479353305,324.2617231497696,28.0,0,0,0,13,1066.9,50.2,51.7,0.3403846153846154,0.038734526845304776 +NN_TORCH (default),151.43337847534409,0.22200924249795764,23.930992166336175,0.08705592871966639,0.9842745385355711,0.9653152924171982,0.1159311306350962,0.1939720391754568,831.2534637345914,6.296769612194,28.076923076923077,4.81722,65.27966655625238,0.14955372280544704,20.47535029226034,0.07966387341594139,1.0,1.0,0.14050249681884142,0.12787389324907794,546.0088939544379,6.398331846186531,29.0,0,0,0,13,1047.6,52.0,71.7,0.3230769230769231,0.038013218612602546 +EBM (default),61.24195487458481,0.11774356426336827,10.057660454061297,0.07098698960857609,0.9074728958123806,0.8949324780335647,0.1478441407239659,0.21939746082384276,260.56038663650384,2.782386904835081,28.46153846153846,4.44221,19.539602756500244,0.04366175333658854,6.327684720357259,0.039228283502797535,1.0,1.0,0.13598656576261503,0.17428198310764084,214.4635432482436,1.7877726532004268,32.0,0,0,0,13,1025.0,59.4,47.3,0.31346153846153846,0.038402841931956005 +FASTAI (tuned),4642.340904501768,0.9693611326380673,1248.4159649024482,0.5658202849858516,0.9865659937937807,0.9841061225078185,0.12355018947405211,0.2149265638517959,32660.757986061028,38.043706128121705,29.153846153846153,5.18701,3620.151651991738,1.0039918687608507,540.0550122797715,0.32495714500374384,1.0,1.0,0.11498630600016613,0.18136473255372101,33252.590479353305,36.96325797937462,30.0,0,0,0,13,993.6,56.6,54.0,0.29615384615384616,0.03550248539329544 +RF (default),9.267833071284823,0.31600932585887426,1.1524694810969494,0.08818027662752802,1.0,0.9863683581302479,0.12604929965195016,0.1887047479294239,27.793203837891944,7.952067403429296,29.192307692307693,5.26146,5.924832847383287,0.26005201869540745,0.5271703851240811,0.062155184511682476,1.0,1.0,0.08142906388988314,0.16952476586239196,20.351375021697393,6.812325017295935,30.0,0,0,0,13,1000.0,0.0,0.0,0.2951923076923077,0.03495782131228705 +FASTAI (default),18.837261917856004,0.970951892168094,4.540025666114834,0.4814105255967321,1.0,0.9959552618191563,0.16872453978924914,0.27439674920317614,123.96432879696866,34.056295479841545,32.23076923076923,6.36878,15.734567880630493,1.036571078830295,2.6035035917474465,0.39168673048638547,1.0,1.0,0.11912950408368406,0.2702307281941561,138.38888737159849,36.35774230804583,33.0,0,0,0,13,857.7,51.1,55.4,0.21923076923076923,0.031473026513349094 +KNN (tuned + ensemble),114.20154888548403,2.240403470422468,27.24026724430238,0.3822937952790146,1.0,0.9945031966048126,0.3598801335906333,0.6533089580186643,84.22414291151355,25.830539833366643,36.96153846153846,8.1617,12.562016169230143,0.2777775393591987,2.427842858706766,0.14225338857865483,1.0,1.0,0.40397120788143714,0.7463425738250281,66.86014550581744,15.449077396683121,38.0,0,0,0,13,507.2,64.8,110.8,0.10096153846153846,0.02715962228346129 +LR (tuned + ensemble),260.53517347665934,0.6450919024964683,90.82426667518703,0.24319307436598073,1.0,1.0,0.352892655701834,0.6557523261730988,2016.752248654558,10.333316070921164,37.30769230769231,8.15038,155.9093332555559,0.2263851695590549,45.737119844257606,0.10677772758861735,1.0,1.0,0.3837007808160382,0.7329121013386546,1735.7243863195374,7.455644136363483,37.0,0,0,0,13,470.2,64.6,95.5,0.09230769230769231,0.02685627582626868 +LR (tuned),260.53517347665934,0.13893676680377404,90.82426667518703,0.0633906225067962,1.0,1.0,0.35453305327406925,0.6627996774467568,2016.752248654558,3.076869056672641,37.92307692307692,8.15927,155.9093332555559,0.05223793453640408,45.737119844257606,0.05009355954825878,1.0,1.0,0.38692302721983185,0.7428842308683161,1735.7243863195374,2.8910146707817423,38.0,0,0,0,13,398.3,87.3,98.2,0.07692307692307693,0.02644513514958859 +KNN (tuned),114.20154888548403,0.2574708763350788,27.24026724430238,0.05410989827741383,1.0,0.9986251034032172,0.36615711155679204,0.6859659134833532,84.22414291151355,3.3744578525576268,37.96153846153846,8.25638,12.562016169230143,0.09626038869222005,2.427842858706766,0.028318042556444805,1.0,1.0,0.4107940735262967,0.7514125894823619,66.86014550581744,2.1032822899149863,39.0,0,0,0,13,413.0,80.3,100.6,0.07596153846153846,0.026437532206762978 +LR (default),4.903454369968838,0.19863318235446245,2.033074367074023,0.07563726553282422,1.0,1.0,0.3796067152688096,0.7701691922754811,45.803264898726965,4.508173919929611,39.15384615384615,8.22758,5.286295996771918,0.09712121221754286,1.191311693685635,0.0859815087197251,1.0,1.0,0.38694683448707623,0.8770019722050186,34.79737790723628,4.100500531123858,39.0,0,0,0,13,275.5,70.5,98.7,0.046153846153846156,0.02558384584157271 +KNN (default),2.0401845610039864,0.07292802802517882,0.5185761600539422,0.029646415007926124,1.0,0.9998182520639809,0.4063058194859719,0.8488883831481537,1.1215157323318734,1.57971988992164,39.73076923076923,8.55152,0.15191650390625,0.04789047771030002,0.03752343922831045,0.021942545900661376,1.0,1.0,0.4485471489101098,1.0,1.0,1.5317182357480712,41.0,0,0,0,13,199.5,121.3,135.8,0.03173076923076923,0.025270981610470106 diff --git a/data/full-reg/time_plot.pdf b/data/full-reg/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..90922e6262ebb3eb971775e92bf721a1cf0a0a0a Binary files /dev/null and b/data/full-reg/time_plot.pdf differ diff --git a/data/full-reg/time_plot.png.zip b/data/full-reg/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..263ba6a3c2cb850a1ba7da44ac69dda4162238c3 --- /dev/null +++ b/data/full-reg/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe29fdec65f03c7d3bae994bc888e7c7ec498e3800a153239b08e6703208a4a +size 79911 diff --git a/data/full-reg/tuning-impact-elo-horizontal.pdf b/data/full-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8ab93305764ec10694e573706f10749d661d127d Binary files /dev/null and b/data/full-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full-reg/tuning-impact-elo-horizontal.png.zip b/data/full-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..afe3867d082612c2e91501f734d1dac3b920e392 --- /dev/null +++ b/data/full-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1d5f881271b5a00e3b778451ad67bace45bd811aab630061c78057f191972c3 +size 129887 diff --git a/data/full-reg/tuning-impact-elo.pdf b/data/full-reg/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3bee6e8cdcd324aab499ef8c9d8694cd8a1120f2 Binary files /dev/null and b/data/full-reg/tuning-impact-elo.pdf differ diff --git a/data/full-reg/tuning-impact-elo.png.zip b/data/full-reg/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c20e3e0f66bfd81ed643341f4e1ced5c1d314601 --- /dev/null +++ b/data/full-reg/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48a53db8aa38c1ed4c8785923113986e05d6b398ff01a02150bf521dbad9d5be +size 131368 diff --git a/data/full/figures/critical-diagram.pdf b/data/full/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d93ca7dd961de52243a3db2bbdf6d240ea15e668 Binary files /dev/null and b/data/full/figures/critical-diagram.pdf differ diff --git a/data/full/figures/critical-diagram.png.zip b/data/full/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..e9f3fa98dc88711314c65417b79f3997c3a6af49 --- /dev/null +++ b/data/full/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15ef6dc5b13370dc816b3db4e65ebfd7e1bbffda30630b5beed17cf780e6443a +size 294416 diff --git a/data/full/leaderboard.tex b/data/full/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..4c776548eb77786c5911d6c73b2d469a45e51ab2 --- /dev/null +++ b/data/full/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1644${}_{-30,+39}$}} & \textcolor{gold}{\textbf{0.608}} & \textcolor{gold}{\textbf{6.8}} & \textcolor{gold}{\textbf{2.4}} & \textcolor{gold}{\textbf{13}} & \textcolor{gold}{\textbf{4.0\%}} & 1408.78 & 3.34 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1600${}_{-33,+24}$}} & \textcolor{silver}{\textbf{0.499}} & \textcolor{silver}{\textbf{8.1}} & 4.2 & 3 & \textcolor{bronze}{\textbf{5.7\%}} & 6044.76 & 3.80 \\ +TabM (T+E) & \textcolor{bronze}{\textbf{1594${}_{-36,+28}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{bronze}{\textbf{8.3}} & \textcolor{silver}{\textbf{3.4}} & \textcolor{bronze}{\textbf{6}} & \textcolor{silver}{\textbf{5.0\%}} & 3285.87 & 1.47 \\ +LightGBM (T+E) & 1573${}_{-26,+30}$ & 0.437 & 8.9 & 5.6 & 1 & 6.9\% & 416.98 & 2.64 \\ +CatBoost (T+E) & 1521${}_{-25,+35}$ & 0.419 & 10.6 & 6.6 & 1 & 6.0\% & 1658.41 & 0.65 \\ +CatBoost (T) & 1506${}_{-27,+28}$ & 0.403 & 11.1 & 5.8 & 2 & 6.2\% & 1658.41 & 0.08 \\ +TabM (T) & 1489${}_{-30,+29}$ & 0.400 & 11.8 & 5.9 & 1 & 6.1\% & 3285.87 & 0.17 \\ +LightGBM (T) & 1483${}_{-24,+25}$ & 0.347 & 12.0 & 10.2 & 0 & 7.6\% & 416.98 & 0.33 \\ +XGBoost (T+E) & 1472${}_{-29,+30}$ & 0.349 & 12.3 & 8.6 & 0 & 7.7\% & 693.49 & 1.69 \\ +ModernNCA (T+E) & 1468${}_{-33,+30}$ & 0.432 & 12.6 & 4.2 & 5 & 6.9\% & 4621.67 & 8.15 \\ +CatBoost (D) & 1459${}_{-28,+31}$ & 0.356 & 12.9 & 6.6 & 2 & 7.6\% & 6.83 & 0.08 \\ +XGBoost (T) & 1435${}_{-31,+27}$ & 0.299 & 13.9 & 11.7 & 0 & 8.0\% & 693.49 & 0.31 \\ +ModernNCA (T) & 1429${}_{-20,+29}$ & 0.297 & 14.0 & 6.7 & 2 & 7.6\% & 4621.67 & 0.47 \\ +TabM (D) & 1373${}_{-29,+31}$ & 0.275 & 16.5 & 10.7 & 0 & 9.3\% & 10.49 & 0.13 \\ +RealMLP (T) & 1373${}_{-29,+30}$ & 0.238 & 16.4 & 11.9 & 0 & 8.7\% & 6044.76 & 0.18 \\ +TorchMLP (T+E) & 1352${}_{-29,+23}$ & 0.209 & 17.3 & 11.7 & 0 & 9.0\% & 2874.67 & 1.95 \\ +ModernNCA (D) & 1319${}_{-23,+25}$ & 0.153 & 18.7 & 10.7 & 1 & 11.2\% & 14.87 & 0.31 \\ +TabDPT (D) & 1318${}_{-26,+34}$ & 0.293 & 18.8 & \textcolor{bronze}{\textbf{4.1}} & \textcolor{silver}{\textbf{9}} & 10.8\% & 22.53 & 8.55 \\ +EBM (T+E) & 1315${}_{-23,+24}$ & 0.175 & 18.9 & 10.8 & 0 & 12.9\% & 1331.68 & 0.20 \\ +FastaiMLP (T+E) & 1270${}_{-29,+30}$ & 0.152 & 20.8 & 12.4 & 0 & 12.8\% & 593.24 & 4.47 \\ +ExtraTrees (T+E) & 1257${}_{-29,+29}$ & 0.117 & 21.4 & 12.8 & 0 & 13.0\% & 183.02 & 0.76 \\ +EBM (T) & 1256${}_{-26,+24}$ & 0.126 & 21.4 & 14.9 & 0 & 13.6\% & 1331.68 & 0.02 \\ +RealMLP (D) & 1249${}_{-26,+27}$ & 0.102 & 21.7 & 17.6 & 0 & 11.1\% & 35.36 & 0.19 \\ +XGBoost (D) & 1247${}_{-31,+25}$ & 0.106 & 21.8 & 17.4 & 0 & 11.1\% & 1.94 & 0.12 \\ +TorchMLP (T) & 1240${}_{-25,+30}$ & 0.103 & 22.0 & 18.2 & 0 & 11.2\% & 2874.67 & 0.13 \\ +EBM (D) & 1220${}_{-27,+26}$ & 0.125 & 23.0 & 9.5 & 3 & 14.6\% & 4.67 & 0.04 \\ +LightGBM (D) & 1213${}_{-26,+27}$ & 0.074 & 23.2 & 20.9 & 0 & 12.1\% & 1.96 & 0.14 \\ +ExtraTrees (T) & 1210${}_{-28,+28}$ & 0.092 & 23.3 & 14.8 & 0 & 14.1\% & 183.02 & 0.09 \\ +RandomForest (T+E) & 1208${}_{-25,+25}$ & 0.096 & 23.4 & 14.0 & 0 & 13.6\% & 373.18 & 0.77 \\ +FastaiMLP (T) & 1179${}_{-28,+20}$ & 0.070 & 24.6 & 19.7 & 0 & 14.5\% & 593.24 & 0.31 \\ +RandomForest (T) & 1153${}_{-28,+28}$ & 0.069 & 25.5 & 16.6 & 0 & 14.7\% & 373.18 & 0.09 \\ +TorchMLP (D) & 1070${}_{-25,+23}$ & 0.019 & 28.7 & 26.3 & 0 & 16.2\% & 9.99 & 0.13 \\ +FastaiMLP (D) & 1014${}_{-28,+26}$ & 0.019 & 30.5 & 27.9 & 0 & 19.8\% & 2.86 & 0.37 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.008 & 30.9 & 29.1 & 0 & 20.1\% & 0.43 & 0.05 \\ +ExtraTrees (D) & 977${}_{-29,+23}$ & 0.023 & 31.5 & 27.8 & 0 & 22.0\% & 0.25 & 0.05 \\ +Linear (T+E) & 924${}_{-26,+29}$ & 0.031 & 33.0 & 25.1 & 0 & 29.6\% & 47.49 & 0.17 \\ +Linear (T) & 886${}_{-29,+26}$ & 0.019 & 33.9 & 27.1 & 0 & 30.4\% & 47.49 & 0.07 \\ +Linear (D) & 864${}_{-30,+31}$ & 0.014 & 34.5 & 20.5 & 1 & 32.0\% & 1.52 & 0.09 \\ +KNN (T+E) & 687${}_{-31,+39}$ & 0.000 & 37.5 & 37.2 & 0 & 44.7\% & 3.26 & 0.18 \\ +KNN (T) & 608${}_{-44,+38}$ & 0.000 & 38.5 & 38.4 & 0 & 46.3\% & 3.26 & 0.04 \\ +KNN (D) & 449${}_{-72,+46}$ & 0.000 & 40.0 & 39.8 & 0 & 53.7\% & 0.05 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/full/tabarena_leaderboard.csv b/data/full/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..5d3fd1ed6fc3802f6ed16693b2a442b49a8d4405 --- /dev/null +++ b/data/full/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),8462.98349033711,33.138286938542635,2912.1242917208747,4.391016265244401,0.39210856520445686,0.34904137217357156,0.03975956193049951,0.029706646727682955,43301.56973340033,539.3969074295995,6.784313725490196,0.2055,7367.614226023356,3.9490213659074573,1408.7828331379249,3.337414261487274,0.33725328327072535,0.31069761727157824,0.015883109174547605,0.0137838373861104,33121.993698706065,185.27028732280235,4.0,13,4,8,26,1643.5,38.7,29.1,0.8553921568627451,0.41392781688875935 +REALMLP (tuned + ensemble),73311.83245680422,16.208294235543228,6156.508467345906,8.023682171781228,0.5008360344149994,0.46214916919286664,0.05665914931491045,0.03807847400553225,177178.04293492812,276.8359259771498,8.058823529411764,0.20511,23874.98158947627,5.369593381881714,6044.758795122033,3.804109742244085,0.48215030896347355,0.45450333260642206,0.021990739446628438,0.018856294653559095,128255.5951822475,249.18630751354758,6.0,3,5,6,37,1600.4,23.8,33.0,0.8235294117647058,0.23789224905961384 +TABM (tuned + ensemble),36462.58337058756,8.791583087761158,5017.258422060137,3.1802055454964386,0.5087786520610308,0.47533274202313947,0.04982932238357392,0.054412466910024854,80230.4785975727,130.9039885664859,8.264705882352942,0.20535,8420.993191123009,2.7141049438052707,3285.8688373170553,1.4723486146222118,0.47020083976979304,0.4531573745622899,0.02530931453787877,0.014678650112589856,47614.825535817596,111.53030159160512,6.0,6,9,2,34,1594.0,27.1,35.9,0.8183823529411764,0.29738848927703415 +GBM (tuned + ensemble),3088.2834950947295,22.058380506500956,771.2925090199282,4.080656504886727,0.5633713731527658,0.5348599223259493,0.06897871778751817,0.0503304665001517,11617.562781863631,427.3461655076801,8.882352941176471,0.2112,1667.667911251386,3.7860276963975696,416.9832926671224,2.6387318875879693,0.5860060403960241,0.548121690931643,0.02669651687138408,0.020199925754737696,9501.568897853978,118.5632719261967,7.0,1,3,2,45,1573.0,29.1,25.1,0.8029411764705883,0.1791687775251789 +CAT (tuned + ensemble),20021.3498029566,3.148214934334516,4130.291158755297,0.9792413821552454,0.580901607522528,0.5441016791405096,0.06042198162454047,0.04401299938732732,58497.64726501732,62.60491239485109,10.617647058823529,0.21079,7127.110804247856,1.3727677133348253,1658.412974283854,0.65278892715772,0.5700946601892775,0.5474283264159685,0.02741677546323218,0.025036082174699086,22349.13952253535,41.43902270876451,10.0,1,2,2,46,1521.3,34.8,24.5,0.7595588235294117,0.15165092074534797 +CAT (tuned),20021.3498029566,0.45918073721700764,4130.291158755297,0.13269281663977184,0.5972104481929726,0.5574797218986534,0.0623947409741932,0.044677601016314815,58497.64726501732,8.058537488270943,11.117647058823529,0.21123,7127.110804247856,0.16668947537740073,1658.412974283854,0.08101450844552308,0.601029599608834,0.5872284062129579,0.033769165522777866,0.02645273717109452,22349.13952253535,5.582269457200397,10.0,2,4,2,43,1506.4,27.1,26.7,0.7470588235294118,0.1738821747274908 +TABM (tuned),36462.58337058756,0.9691277276976176,5017.258422060137,0.3543499279245272,0.5996801665884344,0.5469831569788877,0.06058069094813254,0.07033713181811814,80230.4785975727,13.69951936973594,11.77450980392157,0.21042,8420.993191123009,0.2791590425703261,3285.8688373170553,0.1728818165133111,0.5745991505979262,0.5819755629743474,0.03376668052514742,0.02597899958681764,47614.825535817596,11.564361688735561,11.0,1,5,3,42,1488.7,28.9,29.5,0.7306372549019607,0.16832488064973897 +GBM (tuned),3088.2834950947295,3.280151636356362,771.2925090199282,0.7008989122586311,0.6529668965012456,0.6042462614423542,0.0762989584456341,0.060843556700722376,11617.562781863631,67.66975356224859,12.029411764705882,0.21181,1667.667911251386,0.5984517203436958,416.9832926671224,0.33384935590955944,0.6262832446301,0.6276669488795971,0.0328575001678707,0.027189476852237368,9501.568897853978,17.561755180687065,12.0,0,0,0,51,1483.0,24.3,23.7,0.7242647058823529,0.0977555613690435 +XGB (tuned + ensemble),6066.175009788823,8.000445660682546,1229.0991373608288,3.120951906510457,0.6509288335184475,0.6177441419082373,0.07677541704493614,0.060419782876437525,14800.692905333448,192.56405868832437,12.323529411764707,0.21458,2256.9276883072325,3.0598979949951173,693.4907982506384,1.6904262577061315,0.6793464009669915,0.6380210093138985,0.034214063223028224,0.029912916021147633,11769.825378159274,82.2877578838671,11.0,0,2,0,49,1471.8,29.4,28.2,0.7169117647058824,0.11652490624158723 +MNCA (tuned + ensemble),50831.51192726024,408.7024568246081,6035.4844365627405,41.810485127597225,0.568211153208397,0.5149849170519787,0.0688274981267202,0.0605520811121876,107380.48324313454,2632.7754273052897,12.607843137254902,0.21294,14486.050127214856,13.240725604693095,4621.665633563503,8.148513113458952,0.5565369335340675,0.5118196340084307,0.024506273144865753,0.021283852056777703,89084.88026764008,537.8274695033359,10.0,5,4,4,38,1467.9,29.1,32.4,0.7098039215686275,0.23742574183295365 +CAT (default),190.03885268302784,0.2709793741147243,87.82759080296049,0.13562630112555335,0.643860524899134,0.6196674016579227,0.07608253429058649,0.052193395777049297,428.26912869207973,7.186903437353133,12.911764705882353,0.21142,28.268621895048355,0.1854714552561442,6.827020885706225,0.08026752106160412,0.6746502933037238,0.6538434834236965,0.040283724715528546,0.024042640881410785,113.47556087857672,6.139565341592893,13.0,2,2,2,45,1458.7,30.1,27.7,0.7022058823529411,0.15094934971503085 +XGB (tuned),6066.175009788823,1.6190793797341305,1229.0991373608288,0.7004875821871327,0.7007828758839686,0.656471544613785,0.08021422165682039,0.06483692415176416,14800.692905333448,40.17091923504828,13.852941176470589,0.21499,2256.9276883072325,0.41774741808573407,693.4907982506384,0.3083513292273418,0.7223432295573234,0.6699580711692178,0.039851558091928974,0.03359463160650185,11769.825378159274,12.136706817415504,13.0,0,0,0,51,1435.4,26.7,30.2,0.6786764705882353,0.08553630288392573 +MNCA (tuned),50831.51192726024,15.584971993005873,6035.4844365627405,1.7226505575752833,0.7032607657750378,0.616689223742635,0.07567762156992885,0.06400824126147407,107380.48324313454,104.77227719302421,14.009803921568627,0.21328,14486.050127214856,0.585451708899604,4621.665633563503,0.4747242314576746,0.7724401098237351,0.6447177773449706,0.044069196843860126,0.04711164679001383,89084.88026764008,26.6476237825924,14.0,2,3,2,44,1428.6,28.3,19.8,0.6747549019607844,0.148986454489082 +REALMLP (tuned),73311.83245680422,0.765440520637695,6156.508467345906,0.43160487228962463,0.7615648419724279,0.688207302114276,0.08722617712339523,0.074188574366866,177178.04293492812,13.697358431538635,16.372549019607842,0.215,23874.98158947627,0.26132775147755943,6044.758795122033,0.18050895994575464,0.8260416027840659,0.7037015717498039,0.049718079859661035,0.03476200489314026,128255.5951822475,11.19862181751071,15.0,0,0,2,49,1372.7,29.4,28.2,0.615686274509804,0.08404632995980613 +TABM (default),139.62633961878052,1.0737770921524312,22.4928837749369,0.40818286049864,0.7249223512608948,0.7011689052436787,0.09327453272880078,0.09342522224307015,289.28104127940037,12.833742622244758,16.5,0.21188,40.332407061258955,0.18133597903781468,10.492230631952996,0.13221126395693922,0.8325407781946508,0.7101145169048528,0.04495971412156341,0.03430523183690445,186.77830789084513,10.724711035853641,16.0,0,1,0,50,1373.2,30.3,28.3,0.6125,0.09325166078098776 +NN_TORCH (tuned + ensemble),29590.123960411,13.183612702147375,3729.842332819238,3.5618312259496654,0.7913423906704912,0.7497717151970622,0.09005636083671732,0.08177089153065253,97942.50427646744,194.194370742189,17.294117647058822,0.21479,10848.660208092795,3.946354971991645,2874.6743506773596,1.9516254299583915,0.9196144626274988,0.8091881095168137,0.057918188893721245,0.04800420908274404,61441.834728019065,144.9992483814283,17.0,0,1,0,50,1351.7,22.5,28.1,0.5926470588235294,0.08512462431139158 +MNCA (default),252.05720278889527,8.333707038145958,17.796029105655933,1.1526772129094414,0.8466123430768121,0.7782944440883417,0.11219494516011082,0.0884997718557035,329.3551130182264,62.49957238602975,18.705882352941178,0.21885,36.0034454398685,0.5316168732113309,14.869495839530392,0.30768591310277943,1.0,0.8560745564571833,0.05667068743276982,0.05396804733520216,236.99495784925895,20.43419277465042,20.0,1,0,0,50,1319.3,25.0,22.6,0.5573529411764706,0.09306806365699362 +TABDPT (default),166.18249968905855,63.11337411850366,27.980557312145997,23.24231264879125,0.7067604944091354,0.6787597108860185,0.1077142126086124,0.08676756731636276,577.6877537613715,1486.1376930964866,18.764705882352942,0.22562,99.10453534126282,28.39870807859633,22.52690614988171,8.550738306685618,1.0,0.8430700103282259,0.0458913996174477,0.031816577640631366,528.528670151851,1255.434427440434,22.0,9,1,2,39,1317.6,34.0,25.7,0.5558823529411765,0.24600748397065467 +EBM (tuned + ensemble),34026.536489860475,1.2927426090946903,5478.959788432886,0.48902813323980865,0.8251828316021278,0.8030509938441766,0.12900348188485525,0.12360180683081272,45019.152540237825,18.109773959357398,18.88235294117647,0.21615,2925.6548613442314,0.400875727335612,1331.6775166450918,0.19908260374566636,0.9668458098028044,0.8779163732028434,0.08037070273754277,0.04181933336668213,17751.99098903195,11.13922354039944,18.0,0,2,2,47,1315.1,23.9,22.8,0.5529411764705883,0.09221874270375138 +FASTAI (tuned + ensemble),6629.648996400055,16.16286987151975,1343.5604508466745,7.704229376998794,0.847763610444884,0.8192378884582732,0.1280772560437908,0.10682866114025558,22455.399300542234,425.24771894393024,20.84313725490196,0.21679,3182.895098288854,10.795994228786892,593.237788402893,4.466873745216533,1.0,0.9481484787714531,0.07214417888217062,0.059212630303949465,19851.125229101002,409.94392325616803,22.5,0,1,2,48,1269.7,29.8,28.3,0.503921568627451,0.08034631734787774 +XT (tuned + ensemble),1289.1519403179227,3.0578905248434194,464.9314949446578,1.2901947647379284,0.8827760158511792,0.8396909000564725,0.12954079138309096,0.10995331462599778,5501.829777288316,78.94715148525705,21.372549019607842,0.21814,763.5855970117781,1.8364481396145291,183.01944048073585,0.761281055543471,1.0,0.9539534608972561,0.07457413317133521,0.06267735565102701,3537.1622158448404,68.20231667792302,24.0,0,0,3,48,1257.1,28.1,28.1,0.4906862745098039,0.07834090101915143 +EBM (tuned),34026.536489860475,0.16189759790507796,5478.959788432886,0.07139558038617727,0.8740079837962136,0.8379891966210511,0.13616014326308232,0.1329545911800612,45019.152540237825,2.1906554232861546,21.431372549019606,0.21772,2925.6548613442314,0.0440410852432251,1331.6775166450918,0.022730636596679687,1.0,0.9131323197096801,0.08806871275364736,0.05883998273382683,17751.99098903195,1.2223545537568268,22.0,0,0,1,50,1255.6,23.1,25.4,0.4892156862745098,0.06726792471193889 +REALMLP (default),448.72095363529684,0.7614608424421488,36.551378391408186,0.4234937252271941,0.8980359281995941,0.8395383044225884,0.11120114953337384,0.10170779666511945,1038.880257668142,13.498920180198532,21.735294117647058,0.21553,142.2050400045183,0.2570535076989068,35.35800008725903,0.19140706459681192,1.0,0.9145611503199415,0.08431809961962611,0.06278250719469083,719.9745480267878,11.535269383553011,23.0,0,0,0,51,1249.3,26.3,25.9,0.48161764705882354,0.05674906895653699 +XGB (default),12.860019501050314,0.6666093941607507,3.1825529067488505,0.3000924886961127,0.8935881423667761,0.8416378104789131,0.11133944893771364,0.11101893700190776,40.716829559380564,17.42803351650834,21.833333333333332,0.21667,5.82192047437032,0.3283502260843913,1.9409389396340444,0.12262328807300621,1.0,0.9309644533653846,0.0851641387449773,0.05741943426265329,34.48133232859512,10.22021081972956,21.5,0,0,1,50,1246.8,24.3,31.0,0.4791666666666667,0.057339775655101725 +NN_TORCH (tuned),29590.123960411,0.7234875786018787,3729.842332819238,0.20874333848987678,0.8968088714508368,0.8408346389591811,0.11185498046304537,0.10553076909476344,97942.50427646744,10.830632864633353,22.029411764705884,0.21784,10848.660208092795,0.2631075382232666,2874.6743506773596,0.131112832826372,1.0,0.8983629507892926,0.07707637065160389,0.06734944583074608,61441.834728019065,8.236695976463633,23.0,0,0,0,51,1240.5,29.7,24.1,0.4742647058823529,0.0548648405384145 +EBM (default),104.78526760316363,0.17165232525412033,11.079598604500092,0.09398863718788793,0.8753701223112293,0.8534095921423823,0.14635460063694122,0.1411111056948659,148.0901517975655,3.2508346897376326,23.0,0.21677,11.465454594294231,0.05977429548899333,4.6738599788414605,0.03961948198354664,1.0,0.9668495675431852,0.09424073545770384,0.05947657617617017,75.98904610132907,2.400460311914132,24.0,3,0,0,48,1219.5,25.1,26.2,0.45,0.10564503521194404 +GBM (default),8.619418829147072,1.0011564760166576,3.0462908189340183,0.25265600996978405,0.9255861200115366,0.8769150749775005,0.12054395704677265,0.10827112348591665,44.26353412937288,22.939896191571705,23.176470588235293,0.21706,5.971681065029568,0.2858244842953152,1.9600326879612946,0.14173548842042513,1.0,0.9228303733189597,0.09113184479689307,0.06431302282850372,32.72231234173676,8.612355874421967,23.0,0,0,0,51,1213.4,26.2,25.9,0.4455882352941177,0.047813328248814776 +XT (tuned),1289.1519403179227,0.3436224609158917,464.9314949446578,0.1744826726067124,0.9078855110953847,0.8739049195200678,0.14066728588722457,0.11970990910282847,5501.829777288316,9.97674146406182,23.294117647058822,0.21915,763.5855970117781,0.19129647148980033,183.01944048073585,0.09120693679998855,1.0,0.9787971295350696,0.08494158558463882,0.07201944622991324,3537.1622158448404,8.867381424714122,27.0,0,1,0,50,1209.6,27.2,27.3,0.4426470588235294,0.06759055036499491 +RF (tuned + ensemble),2245.014868743487,2.632504310950734,530.9489806737863,1.2198476221584795,0.9036228102132694,0.8731694684128604,0.13567527813989935,0.12125867838552555,6771.2411119904955,73.51636782881279,23.431372549019606,0.21966,886.9249708387587,1.8479143513573542,373.17861356387994,0.7709478321252661,1.0,0.9878551972335748,0.07975685370779195,0.07527555727694124,5833.628398157948,63.708677480395814,25.0,0,1,2,48,1207.6,24.3,24.6,0.4392156862745098,0.07162992146969836 +FASTAI (tuned),6629.648996400055,1.0291698354002177,1343.5604508466745,0.6091231343301344,0.930127335805145,0.8792501704158417,0.14464636411393333,0.12803909045611062,22455.399300542234,33.54113778341792,24.598039215686274,0.2173,3182.895098288854,0.8112125396728516,593.237788402893,0.306391541190021,1.0,0.9592036547779735,0.08788217007513122,0.07457639304675989,19851.125229101002,31.29559841059079,25.0,0,0,0,51,1178.9,19.3,27.7,0.4100490196078431,0.05064188074717983 +RF (tuned),2245.014868743487,0.2778017885544721,530.9489806737863,0.15585979099514927,0.9311148829801792,0.8984741658212836,0.14718016493069716,0.13254577101689308,6771.2411119904955,8.469005968793565,25.519607843137255,0.22016,886.9249708387587,0.17402595943874782,373.17861356387994,0.08526202343425164,1.0,0.997425996963559,0.08945470542596179,0.09283342230804408,5833.628398157948,7.94881742200239,28.0,0,1,1,49,1152.6,27.7,27.7,0.38700980392156864,0.060398062899333166 +NN_TORCH (default),74.85835825680128,0.5412627521423472,14.695998951842784,0.19921856017345402,0.9806438127346997,0.9515909665803707,0.1620847222292403,0.15205391708922938,327.51821759182326,9.71084645377221,28.65686274509804,0.22759,34.192402362823486,0.22645958264668783,9.990997226772679,0.1258046787872722,1.0,1.0,0.12442133778369147,0.10251098454201864,204.15320945265722,7.844934898940183,29.0,0,0,0,51,1070.4,23.0,24.8,0.30857843137254903,0.03807980911291169 +FASTAI (default),27.993367113578813,1.070063710732138,4.951242935164595,0.5056871108143457,0.980597127305428,0.953088543036155,0.19826288690738417,0.1907306783039593,87.08936574190912,29.523324076097715,30.470588235294116,0.24127,12.973222759034899,0.8480017715030246,2.8561126039251374,0.37317813888351864,1.0,1.0,0.15050538424095006,0.14150141420291248,77.36570184770021,28.70383139894662,33.0,0,0,0,51,1014.0,25.6,27.4,0.26323529411764707,0.03579216187562658 +RF (default),5.289521401640116,0.18747803105248345,0.8914167974682212,0.07557012377860906,0.9921615843984544,0.9665274322030536,0.20072931494064547,0.21489407007492525,11.63160057311077,4.814878723410724,30.862745098039216,0.2484,1.2628253036075168,0.08775801128811306,0.43425701731008426,0.05354175385774865,1.0,1.0,0.14295610983302143,0.12316470752268181,7.010079512208331,3.9076874559468244,32.0,0,0,0,51,1000.0,0.0,0.0,0.2534313725490196,0.03431062861608254 +XT (default),3.067954011524425,0.20851136391458946,0.7599985259503769,0.07668050932017083,0.976846527522819,0.95762955963915,0.21975063089427654,0.2346925990623719,6.922794409042171,5.082769758447109,31.53921568627451,0.24428,1.029017792807685,0.0932659043206109,0.2473449527431567,0.04980480471851431,1.0,1.0,0.17351668887004712,0.13674733154934707,5.707021421143385,4.456198846093199,34.0,0,0,0,51,977.3,22.3,29.0,0.23651960784313725,0.03600270779735502 +LR (tuned + ensemble),298.0772925112502,1.5473658189794337,106.92655310814473,0.5272909274052285,0.9690155979901051,0.9604101018326597,0.2964561838358268,0.358017663742451,1326.7185005565484,20.631703511708864,33.01960784313726,0.25172,171.24826147821216,0.28876688745286727,47.49214683366935,0.1676693138737952,1.0,1.0,0.23206264161510082,0.2393082389556497,1064.131515124883,12.175213508812917,36.0,0,0,1,50,923.5,28.4,25.8,0.19950980392156864,0.0397840394514003 +LR (tuned),298.0772925112502,0.41952294077488855,106.92655310814473,0.14529639602271893,0.9805057099085136,0.9657886847720463,0.3043853512990743,0.367315025593031,1326.7185005565484,5.927745129039777,33.94117647058823,0.25255,171.24826147821216,0.10993631680806477,47.49214683366935,0.0665447885938415,1.0,1.0,0.24354532900807213,0.2575122541077485,1064.131515124883,4.067640041721783,37.0,0,0,1,50,885.6,25.8,28.3,0.17647058823529413,0.03685974160525651 +LR (default),6.912347784956555,0.4444732105550163,2.521765696020704,0.1600876370747322,0.9855950536843827,0.9704218334420531,0.31958653451500185,0.4128363986558877,32.35021650084229,7.018103298699637,34.450980392156865,0.2546,5.298751910527547,0.12218634287516277,1.5162047581506444,0.0887949061495271,1.0,1.0,0.26933687033179554,0.2880686130933437,22.991652687961256,4.725399415979996,37.5,1,0,0,50,864.4,30.3,29.5,0.16372549019607843,0.04873304319530605 +KNN (tuned + ensemble),185.73769010095035,9.265668427242952,28.077306593709025,0.6763553759282402,1.0,0.9949115115995666,0.447040608541432,0.6075041958882814,80.584480435074,64.66285269534394,37.53921568627451,0.34658,12.91673379474216,0.27287014325459796,3.260792818960679,0.17676389939136214,1.0,1.0,0.4055189293595234,0.6658931347136859,60.87290895584066,13.90517184260326,39.0,0,0,0,51,686.9,38.4,30.9,0.08651960784313725,0.02684707640182819 +KNN (tuned),185.73769010095035,1.4108907164571591,28.077306593709025,0.11144693589219826,1.0,0.9978177275394109,0.46275440863389483,0.6502357234318229,80.584480435074,10.182721712640424,38.53921568627451,0.35012,12.91673379474216,0.08784447775946723,3.260792818960679,0.03624739765555662,1.0,1.0,0.420693335539855,0.740394718900499,60.87290895584066,2.2791618782574123,40.0,0,0,0,51,608.2,37.7,43.3,0.06151960784313725,0.026046267610479377 +KNN (default),1.8202055170645122,0.18718713898544476,0.4967972483415847,0.03640307146637206,1.0,0.9999536720947403,0.5365866131716529,0.9156894938911053,1.035103875827835,2.1647566343855695,39.96078431372549,0.40859,0.22567404641045463,0.036888705359564886,0.050692503527237594,0.021942545900661376,1.0,1.0,0.5109534123207524,1.0,1.0,1.2564234767690339,41.0,0,0,0,51,449.2,45.1,71.3,0.025980392156862746,0.025126134267953678 diff --git a/data/full/time_plot.pdf b/data/full/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c861a227698ca3f02bb107756bdde8c1852197b6 Binary files /dev/null and b/data/full/time_plot.pdf differ diff --git a/data/full/time_plot.png.zip b/data/full/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..37122cdeb1b531b39b2d2530daa036e5ebf8752f --- /dev/null +++ b/data/full/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1425f58f7556e77814ca63062d6ffbc1d9f5f0b02dfed773d91d750d54675e9 +size 338592 diff --git a/data/full/tuning-impact-elo-horizontal.pdf b/data/full/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0e1072b99e29499c07680838b06d84e029b29788 Binary files /dev/null and b/data/full/tuning-impact-elo-horizontal.pdf differ diff --git a/data/full/tuning-impact-elo-horizontal.png.zip b/data/full/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c433d9669a016f04e47cd229da721e21f36d6163 --- /dev/null +++ b/data/full/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e8ca383234264edb2f91b7ceb983dbec4663cf7525f4cf1fc75a12cfef8b89dd +size 129065 diff --git a/data/full/tuning-impact-elo.pdf b/data/full/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fe71f5b2d86e8b7712e7104eec0e385ef17aff02 Binary files /dev/null and b/data/full/tuning-impact-elo.pdf differ diff --git a/data/full/tuning-impact-elo.png.zip b/data/full/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..fb1a0019a4401584b3f58c50723737e5b6d23601 --- /dev/null +++ b/data/full/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8e85834f7ee7086c3ffdd269028fb734dab4ae9488a5e4600a610c3bd5390736 +size 126040 diff --git a/data/lite/full-cls/figures/critical-diagram.pdf b/data/lite/full-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fcba36cf5f60298f6cc49fa20f954977818ca780 Binary files /dev/null and b/data/lite/full-cls/figures/critical-diagram.pdf differ diff --git a/data/lite/full-cls/figures/critical-diagram.png.zip b/data/lite/full-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..fb1c72a0f47700992a4bbc21723ed521c2ed270c --- /dev/null +++ b/data/lite/full-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a4f696da207039843ab87b576f7f02cf853c814c3b21cac719bfaf87a11d8558 +size 295197 diff --git a/data/lite/full-cls/leaderboard.tex b/data/lite/full-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..ca4d53e34776dd65bdc5f3d0ea7d5cc133f0ebb0 --- /dev/null +++ b/data/lite/full-cls/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1537${}_{-39,+35}$}} & \textcolor{gold}{\textbf{0.564}} & \textcolor{gold}{\textbf{7.2}} & \textcolor{gold}{\textbf{2.9}} & \textcolor{gold}{\textbf{7}} & \textcolor{gold}{\textbf{6.2\%}} & 1355.34 & 1.98 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1435${}_{-28,+34}$}} & \textcolor{silver}{\textbf{0.492}} & \textcolor{silver}{\textbf{10.6}} & \textcolor{silver}{\textbf{4.4}} & \textcolor{bronze}{\textbf{3}} & \textcolor{silver}{\textbf{9.3\%}} & 2461.81 & 1.28 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1427${}_{-29,+34}$}} & \textcolor{bronze}{\textbf{0.419}} & \textcolor{bronze}{\textbf{10.7}} & 6.1 & 1 & 9.9\% & 5463.43 & 3.35 \\ +LightGBM (T+E) & 1420${}_{-29,+33}$ & 0.404 & 10.9 & 7.9 & 0 & 10.8\% & 391.23 & 1.24 \\ +XGBoost (T+E) & 1392${}_{-30,+30}$ & 0.321 & 12.2 & 5.9 & 2 & 11.3\% & 682.62 & 0.97 \\ +CatBoost (T+E) & 1387${}_{-19,+29}$ & 0.379 & 12.3 & 7.2 & 0 & \textcolor{bronze}{\textbf{9.8\%}} & 1326.58 & 0.53 \\ +CatBoost (T) & 1371${}_{-29,+28}$ & 0.365 & 13.0 & 8.4 & 0 & 10.4\% & 1326.58 & 0.05 \\ +CatBoost (D) & 1367${}_{-27,+32}$ & 0.345 & 13.2 & 7.4 & 2 & 10.8\% & 5.74 & 0.11 \\ +TabM (T) & 1366${}_{-22,+29}$ & 0.407 & 13.1 & 5.4 & 2 & 10.3\% & 2461.81 & 0.14 \\ +ModernNCA (T) & 1360${}_{-25,+38}$ & 0.278 & 13.4 & 5.9 & 2 & 10.3\% & 4853.07 & 0.47 \\ +ModernNCA (T+E) & 1355${}_{-22,+34}$ & 0.373 & 13.7 & \textcolor{bronze}{\textbf{5.1}} & 2 & 10.2\% & 4853.07 & 9.39 \\ +LightGBM (T) & 1352${}_{-24,+30}$ & 0.318 & 13.8 & 9.4 & 0 & 11.8\% & 391.23 & 0.19 \\ +XGBoost (T) & 1351${}_{-23,+32}$ & 0.266 & 13.7 & 7.5 & 1 & 11.6\% & 682.62 & 0.18 \\ +TabM (D) & 1296${}_{-36,+30}$ & 0.271 & 16.3 & 7.7 & 1 & 13.9\% & 10.31 & 0.14 \\ +RealMLP (T) & 1278${}_{-25,+33}$ & 0.202 & 17.0 & 11.4 & 0 & 13.8\% & 5463.43 & 0.19 \\ +TorchMLP (T+E) & 1276${}_{-27,+26}$ & 0.222 & 17.2 & 9.6 & 1 & 13.0\% & 2480.32 & 2.18 \\ +EBM (T+E) & 1276${}_{-24,+28}$ & 0.180 & 17.1 & 7.2 & 2 & 15.3\% & 1011.59 & 0.22 \\ +EBM (T) & 1236${}_{-23,+32}$ & 0.122 & 18.9 & 9.2 & 1 & 16.0\% & 1011.59 & 0.02 \\ +EBM (D) & 1206${}_{-29,+29}$ & 0.136 & 20.5 & 9.8 & 1 & 17.0\% & 5.44 & 0.06 \\ +FastaiMLP (T+E) & 1202${}_{-21,+29}$ & 0.196 & 20.5 & 11.5 & 0 & 17.0\% & 622.79 & 5.04 \\ +XGBoost (D) & 1200${}_{-29,+28}$ & 0.105 & 20.6 & 11.4 & 1 & 14.8\% & 1.88 & 0.12 \\ +ExtraTrees (T+E) & 1192${}_{-25,+32}$ & 0.104 & 21.0 & 11.8 & 0 & 16.7\% & 197.16 & 0.72 \\ +TorchMLP (T) & 1190${}_{-25,+32}$ & 0.096 & 21.0 & 15.2 & 0 & 15.4\% & 2480.32 & 0.12 \\ +RealMLP (D) & 1181${}_{-21,+34}$ & 0.090 & 21.4 & 11.8 & 1 & 15.5\% & 34.76 & 0.19 \\ +ModernNCA (D) & 1176${}_{-29,+28}$ & 0.134 & 21.8 & 11.2 & 1 & 17.2\% & 13.35 & 0.35 \\ +RandomForest (T+E) & 1164${}_{-28,+31}$ & 0.107 & 22.3 & 10.6 & 1 & 17.0\% & 327.31 & 0.71 \\ +ExtraTrees (T) & 1155${}_{-26,+33}$ & 0.079 & 22.6 & 10.3 & 0 & 17.8\% & 197.16 & 0.08 \\ +LightGBM (D) & 1150${}_{-33,+27}$ & 0.076 & 23.1 & 19.0 & 0 & 16.5\% & 2.25 & 0.16 \\ +TabDPT (D) & 1137${}_{-29,+35}$ & 0.176 & 23.4 & 6.5 & \textcolor{silver}{\textbf{4}} & 17.6\% & 22.39 & 8.59 \\ +FastaiMLP (T) & 1128${}_{-26,+30}$ & 0.089 & 24.0 & 14.4 & 0 & 19.0\% & 622.79 & 0.35 \\ +RandomForest (T) & 1116${}_{-24,+34}$ & 0.076 & 24.4 & 16.8 & 0 & 17.9\% & 327.31 & 0.08 \\ +TorchMLP (D) & 1020${}_{-28,+34}$ & 0.021 & 28.5 & 24.9 & 0 & 21.5\% & 5.73 & 0.15 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.011 & 29.2 & 22.8 & 0 & 24.9\% & 0.37 & 0.04 \\ +FastaiMLP (D) & 990${}_{-30,+32}$ & 0.026 & 29.5 & 25.7 & 0 & 23.2\% & 3.12 & 0.33 \\ +ExtraTrees (D) & 928${}_{-38,+31}$ & 0.010 & 31.6 & 27.6 & 0 & 27.9\% & 0.24 & 0.04 \\ +Linear (T+E) & 927${}_{-25,+35}$ & 0.042 & 31.7 & 17.3 & 1 & 31.3\% & 51.96 & 0.17 \\ +Linear (T) & 893${}_{-27,+31}$ & 0.026 & 32.6 & 25.7 & 0 & 32.1\% & 51.96 & 0.06 \\ +Linear (D) & 869${}_{-35,+29}$ & 0.019 & 33.4 & 31.1 & 0 & 33.0\% & 1.62 & 0.09 \\ +KNN (T+E) & 745${}_{-33,+34}$ & 0.000 & 36.3 & 29.9 & 0 & 48.4\% & 3.51 & 0.16 \\ +KNN (T) & 678${}_{-48,+33}$ & 0.000 & 37.5 & 25.8 & 0 & 50.5\% & 3.51 & 0.04 \\ +KNN (D) & 467${}_{-52,+55}$ & 0.000 & 39.8 & 39.4 & 0 & 58.9\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/full-cls/tabarena_leaderboard.csv b/data/lite/full-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..67b9caa6ae8057b8e1a4e394cd2a562815cb1f8e --- /dev/null +++ b/data/lite/full-cls/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7774.991528053033,19.875839289865997,2811.0795427294815,3.0481623340146933,0.4357260444094882,0.3637085860567527,0.06245100760478425,0.03653837673944927,29846.24559964093,265.3700814985468,7.197368421052632,0.157955,7014.845859646797,3.7256141901016235,1355.3359335244184,1.9841131308583402,0.3593825434326649,0.3030096923956198,0.03219178172950288,0.012606181703571726,21384.37692728419,133.53161235412554,6.0,7,5,2,24,1536.6,34.7,38.4,0.8450657894736842,0.3447152162656293 +TABM (tuned + ensemble),34310.62852196317,8.826874965115598,4300.923574958667,3.470904002174896,0.5077918677314276,0.4928011834178911,0.09320926958157053,0.07730800937457659,45131.79918122951,135.34700738700144,10.578947368421053,0.174405,8260.626611948013,3.27972948551178,2461.8116026347016,1.282862647131695,0.47136645123500964,0.48985326453336486,0.043573580667432665,0.023331148793904504,37327.19260107514,113.73588992793104,8.0,3,2,5,28,1434.7,33.1,27.1,0.7605263157894737,0.22504512519953232 +REALMLP (tuned + ensemble),75157.8044012283,16.487929639063385,5934.025599797772,7.762023296294149,0.5811880944579212,0.5319261312902436,0.09944852534202508,0.06176461994817447,134658.591064252,301.4009053371881,10.736842105263158,0.17028,21574.945648550987,6.0496333837509155,5463.431997434496,3.3505522191847916,0.5453504649932683,0.5231839760881247,0.057529385085511586,0.024185595487775234,90262.54064674574,233.90844576360837,10.5,1,1,3,33,1426.7,33.1,29.0,0.756578947368421,0.16368475192391602 +GBM (tuned + ensemble),2920.3429353990055,11.208824527891059,755.2524337234863,2.3454352129897833,0.5957286671834613,0.5737983496916242,0.1078370952464348,0.07600581388469646,8248.756938263266,205.60882328189754,10.947368421052632,0.169945,1594.5504041910172,3.0867438316345215,391.23230441146256,1.2433237336531138,0.6251845339504845,0.6067647774155833,0.06327985294726352,0.02460541630128259,6721.07699522549,94.37802750757459,11.5,0,0,4,34,1420.2,32.5,28.9,0.7513157894736842,0.12643804209206685 +XGB (tuned + ensemble),5799.34833753109,5.967870285636501,1147.820903083116,2.46393255534894,0.6794057655741552,0.6067073014576646,0.1127775838071689,0.08327000158924112,10132.57248871615,131.35779311288925,12.197368421052632,0.17348,1593.032392859459,2.3060801029205322,682.6224605815719,0.9713359475135803,0.7126419091535116,0.6013161780727305,0.0776581625864774,0.030772307456733206,7276.753308183146,76.46662354595179,10.0,2,2,1,33,1391.7,29.8,29.4,0.7200657894736842,0.16847782936771857 +CAT (tuned + ensemble),16302.859700416264,2.6250121342508415,3325.427412489986,0.969863744839929,0.6209138650557025,0.5577668345741698,0.09766402859128138,0.06197009345110731,25676.604134391517,50.871466139342466,12.263157894736842,0.16073500000000002,4791.775886774063,1.3303372859954834,1326.5813740920516,0.5313769242211359,0.6153632608570143,0.508710826413508,0.047928271484256546,0.027465195510299092,19895.263297992577,42.031537823549,11.25,0,1,5,32,1387.1,28.6,18.6,0.718421052631579,0.13853532024112644 +CAT (tuned),16302.859700416264,0.5146497425280119,3325.427412489986,0.11950448243427039,0.6346801229162512,0.607823142068742,0.10418042872521101,0.06643409939079102,25676.604134391517,8.18618115589542,12.986842105263158,0.16238999999999998,4791.775886774063,0.08605468273162842,1326.5813740920516,0.051041753580035654,0.6314852452199684,0.6752515912811781,0.05890693710569872,0.03050184583052966,19895.263297992577,3.2560454821039713,12.5,0,2,0,36,1371.1,27.1,28.8,0.7003289473684211,0.11939509746789889 +TABM (tuned),34310.62852196317,1.0356075324510272,4300.923574958667,0.38276682295841274,0.5928893193157706,0.5607706108532287,0.1027377007459753,0.09206173915144808,45131.79918122951,14.823042284101287,13.118421052631579,0.175585,8260.626611948013,0.24344730377197266,2461.8116026347016,0.13835550204254063,0.5877464635288101,0.5378454940167339,0.05088886416034427,0.03460069818708005,37327.19260107514,9.329646701651438,10.5,2,3,1,32,1365.6,28.2,21.7,0.6970394736842105,0.18517248003295747 +CAT (default),218.94677375492296,0.43937695026397705,108.24085958784511,0.1671626095074056,0.6551829671744798,0.6279082904310321,0.10840002368024312,0.06735018596604794,424.08805051709817,10.569617726030572,13.197368421052632,0.161695,18.44840371608734,0.411457896232605,5.736216485552211,0.10729215986107409,0.6872314742824219,0.7060350943661016,0.06287271652427706,0.027531961871926348,91.81803735744086,6.75924675825947,13.0,2,0,0,36,1366.9,31.6,26.6,0.6950657894736842,0.13481246242743797 +MNCA (tuned),56939.09770153698,19.318985493559587,6212.23493998028,1.9628894017927039,0.721681429841744,0.5940816418122864,0.10280356014466159,0.08704490836977338,79415.11574896757,126.43602273733568,13.43421052631579,0.175715,14329.4857878685,0.6404991149902344,4853.0743321238315,0.4731015689224996,0.7744612480983963,0.5643618086641096,0.08252221274713517,0.030308265873669055,67721.7850080898,32.352773289608024,11.75,2,1,3,32,1360.5,37.6,24.5,0.6891447368421053,0.16812302923874708 +MNCA (tuned + ensemble),56939.09770153698,351.628879396539,6212.23493998028,38.90413717422724,0.6269167185273185,0.5596391671494632,0.10228440690238981,0.08788652793549975,79415.11574896757,2408.3540246144216,13.657894736842104,0.18282500000000002,14329.4857878685,13.981043934822083,4853.0743321238315,9.386068319220197,0.6265388929939845,0.5102224133177858,0.05971605848744582,0.03645962776745639,67721.7850080898,659.6644218615952,12.0,2,3,4,29,1354.7,33.2,21.2,0.6835526315789474,0.19607459138590716 +XGB (tuned),5799.34833753109,1.27206594065616,1147.820903083116,0.6531124582306008,0.7344435643624297,0.6512402505751366,0.11578814590786622,0.08914600718423207,10132.57248871615,32.25362424926425,13.736842105263158,0.176915,1593.032392859459,0.36533260345458984,682.6224605815719,0.18295137338644407,0.7444722954009122,0.6575011741041673,0.08274352833208848,0.03384177972942684,7276.753308183146,12.253516308447622,12.5,1,2,0,35,1351.4,32.0,22.1,0.6815789473684211,0.13421755507229133 +GBM (tuned),2920.3429353990055,1.8159009845633256,755.2524337234863,0.5236582654503446,0.6821201235665096,0.6376584779749269,0.11758507354539502,0.09099571025788182,8248.756938263266,42.22279271596955,13.763157894736842,0.16955,1594.5504041910172,0.5379931926727295,391.23230441146256,0.18570879781926358,0.7168421399910502,0.6786419470829066,0.06739368355222075,0.03541051328529879,6721.07699522549,11.394220763848338,14.0,0,1,0,37,1351.8,30.0,23.9,0.680921052631579,0.10643616210187982 +TABM (default),151.95178728354605,1.2483709925099422,21.512717277386358,0.45562303344686134,0.7289050360420961,0.6786453203068225,0.13877047460946834,0.11611812197565075,207.28000362311982,15.083359148300596,16.289473684210527,0.17709,33.152939319610596,0.19442057609558105,10.309423174430956,0.13586267957403653,0.8404844924976476,0.7288272190296358,0.09071877675178053,0.04657437619358792,135.49680849639785,10.343117540463151,15.0,1,2,1,34,1296.5,29.4,35.8,0.6177631578947368,0.13010935542286323 +REALMLP (tuned),75157.8044012283,0.8101433327323512,5934.025599797772,0.42781608188450077,0.7977234575542793,0.7533802047762741,0.1379713743754094,0.10230348368091405,134658.591064252,14.869687448366182,16.973684210526315,0.16913499999999998,21574.945648550987,0.25513756275177,5463.431997434496,0.18623669559057404,0.8787741747777617,0.8403133538788359,0.1099830751870754,0.052454140783326594,90262.54064674574,11.894743904291648,16.0,0,1,0,37,1277.6,32.3,24.3,0.6006578947368421,0.08780857466817892 +EBM (tuned + ensemble),36486.59858077451,1.3923115416576988,7650.518716752531,0.5440226127306762,0.8200760362636922,0.7495142261116479,0.1532911737872781,0.12272176924427085,21740.48293152139,22.192423380997223,17.06578947368421,0.177435,2273.2198663949966,0.35086679458618164,1011.590888307647,0.2249400302454423,0.9326307523783561,0.8863269928616002,0.06694552802938192,0.040508730984801566,14796.589958532411,12.4343949892923,18.0,2,1,1,34,1276.0,27.8,23.2,0.5983552631578948,0.1395908790473925 +NN_TORCH (tuned + ensemble),24155.06064368549,16.430658089487178,3018.869066392961,4.171284511368181,0.77831117981961,0.7704711043097339,0.12989579450651284,0.09221372497935572,54165.934988934685,250.8131490307278,17.210526315789473,0.174655,9535.178918361664,4.419585108757019,2480.3150361602966,2.175346818589723,0.9031626632034823,0.8933582647718805,0.07336033272937209,0.051810123946366915,43976.90578112008,186.31979424186875,17.0,1,0,1,36,1276.3,25.4,27.0,0.5947368421052631,0.10467542399999485 +EBM (tuned),36486.59858077451,0.1713965378309551,7650.518716752531,0.07548087023444203,0.8777961641049835,0.8019073020584209,0.15956901825815256,0.1281310505772345,21740.48293152139,2.6567815809050077,18.907894736842106,0.176925,2273.2198663949966,0.040004611015319824,1011.590888307647,0.024237136044788353,1.0,0.9934554973822004,0.08520888190451481,0.038628140113403854,14796.589958532411,1.267048812472469,20.5,1,1,1,35,1236.3,31.4,22.2,0.5523026315789473,0.10908364611485215 +EBM (default),91.60202875890229,0.20198602425424675,10.86984880564699,0.10636637956102298,0.8643875945345197,0.8073053583195938,0.1703079614773141,0.13614965004123059,91.71524858307365,4.255062874198898,20.460526315789473,0.17785499999999999,9.752015590667725,0.06734633445739746,5.438852341621796,0.06019285478263442,1.0,1.0,0.09629003634838529,0.04426810265651604,55.554769096641394,3.385160223476314,21.75,1,1,0,36,1206.1,28.2,29.0,0.5134868421052632,0.10243386370325318 +FASTAI (tuned + ensemble),7277.4807652172285,19.036358243540715,1381.3381577435234,8.418367504420003,0.8036000089866165,0.8046280351855666,0.16981138145223337,0.10386737138608466,18515.61407347416,513.5182779565009,20.460526315789473,0.17479,3045.3940094709396,12.611860513687134,622.7909689289006,5.036877045866174,1.0,1.0,0.09538503729238873,0.06507626741615394,12734.142743886789,430.3713585782513,21.0,0,1,0,37,1202.4,28.7,20.9,0.5134868421052632,0.0870412065035782 +XGB (default),13.091729760169983,0.5841734095623619,3.2452155420082907,0.31884078802225646,0.8951319461246384,0.8118822894959987,0.14781292995222475,0.1356243199914764,30.28280029264466,15.70638363121535,20.644736842105264,0.174975,6.213568687438965,0.346899151802063,1.8757318201110196,0.1209323363837647,1.0,0.9614805112978229,0.10791165646668927,0.05042512699967597,23.70039322753218,9.284870866458713,20.0,1,0,0,37,1200.5,27.1,28.8,0.5088815789473684,0.08744467155181224 +XT (tuned + ensemble),1323.525801639808,3.044984867698268,475.9687294303007,1.5051105818150412,0.8963117961824938,0.8311319166848262,0.16669130455950426,0.13258535149758138,4483.031571431591,90.45918158334129,21.026315789473685,0.17722,771.266873717308,2.041845917701721,197.16355296740528,0.7207715417972143,1.0,1.0,0.11915769010932858,0.06415374880272719,2772.464639120071,76.33017880937523,23.25,0,1,1,36,1191.6,32.0,24.4,0.4993421052631579,0.08509267909766356 +NN_TORCH (tuned),24155.06064368549,0.7120582053535863,3018.869066392961,0.21113726214708323,0.903659699715494,0.8168365246349634,0.15378028282956727,0.11501492969091702,54165.934988934685,11.64610038179527,21.026315789473685,0.170275,9535.178918361664,0.22691142559051514,2480.3150361602966,0.11665950752975736,1.0,1.0,0.10185992118567738,0.061748729900024175,43976.90578112008,8.64392602078908,22.5,0,0,0,38,1190.0,31.4,24.6,0.4993421052631579,0.06600302000506919 +REALMLP (default),476.9483523494319,0.8381578796788266,36.30909370748047,0.4350989512151319,0.9101816949003365,0.8316771371273155,0.15535502377419658,0.11182970543082638,843.0795281741405,15.31190920332024,21.43421052631579,0.17567,137.81327652931213,0.256045937538147,34.76442448369694,0.19322861602793034,1.0,1.0,0.10615244914625077,0.07415873564674558,579.0193625765228,12.615190387657702,24.75,1,0,1,36,1181.1,33.5,21.0,0.48914473684210524,0.08497921116275692 +MNCA (default),295.14466257471787,10.148893080259624,18.71986495055019,1.275643167953242,0.8657835470756838,0.8379326033536787,0.1716779557726991,0.1157135550371373,239.59173183900742,74.09701288388523,21.82894736842105,0.190215,30.572218418121338,0.5992807149887085,13.346578494858026,0.35250805744744207,1.0,1.0,0.146823748577877,0.07898980482307513,166.7604765821143,25.21201207586398,23.0,1,1,0,36,1176.5,28.0,28.5,0.47927631578947366,0.08898925215551719 +RF (tuned + ensemble),2283.87174061725,2.1008308561224687,539.2243606533888,1.2856745856516028,0.8931944011226663,0.8652413779454514,0.17017473101932454,0.14889965091379367,5202.357004362883,79.46832861690719,22.342105263157894,0.17431000000000002,864.6687717437744,1.8380836248397827,327.30532028870516,0.7083122934636021,1.0,1.0,0.09997743095832684,0.07317646474552662,4181.556008332933,65.49937121638476,25.5,1,1,0,36,1164.2,30.1,27.3,0.4664473684210526,0.09469255569936558 +XT (tuned),1323.525801639808,0.31659403600190816,475.9687294303007,0.1798597078129874,0.9205594165466939,0.8385252535333505,0.17787864194713782,0.13964170457893255,4483.031571431591,9.660217818491914,22.644736842105264,0.18019000000000002,771.266873717308,0.16697990894317627,197.16355296740528,0.08311920181240409,1.0,1.0,0.12426195121937145,0.07682782754093201,2772.464639120071,7.999561250297406,26.0,0,3,1,34,1155.3,33.0,25.4,0.4588815789473684,0.09727770972230496 +GBM (default),8.557482913920754,0.7352644769768966,2.999430927452211,0.212636163127602,0.923947709053949,0.8704582853978946,0.16461350212197798,0.1421467372098976,33.42403528282587,16.15506317327496,23.06578947368421,0.17846,6.559425115585327,0.615138053894043,2.2516979473740024,0.15858503371213617,1.0,1.0,0.1406276401064807,0.0550717820779026,20.494723229907102,10.269446661494042,23.5,0,0,0,38,1149.8,26.5,32.6,0.44835526315789476,0.052676848571788386 +TABDPT (default),170.86559940639296,65.69096382668144,26.71478740353263,21.258106281632944,0.8235947911156236,0.8149153057596086,0.17618316229008854,0.1334188158755202,467.1692516605129,1539.0473534455273,23.38157894736842,0.191185,97.51883804798126,28.29759955406189,22.386440735738876,8.590693574547014,1.0,1.0,0.11828777342192798,0.07097771501831898,386.2317213587028,1143.1582415263433,26.5,4,0,0,34,1137.0,34.1,28.1,0.44046052631578947,0.15425691326056737 +FASTAI (tuned),7277.4807652172285,1.090680348245721,1381.3381577435234,0.6851415707626749,0.9108193738616642,0.8688531957576351,0.19028587001438002,0.1353761891515429,18515.61407347416,41.17026917318611,24.026315789473685,0.175545,3045.3940094709396,0.8673808574676514,622.7909689289006,0.34554260532273706,1.0,1.0,0.1546465989062637,0.07157514101067176,12734.142743886789,28.552621407710575,25.5,0,1,0,37,1127.5,29.2,25.3,0.4243421052631579,0.06959555459599899 +RF (tuned),2283.87174061725,0.22623545872537712,539.2243606533888,0.14783605660070437,0.9239206793690314,0.9198075650475217,0.17941201693431327,0.16044862859190603,5202.357004362883,7.803307308994161,24.355263157894736,0.17563499999999999,864.6687717437744,0.15117239952087402,327.30532028870516,0.08452519779888767,1.0,1.0,0.11191460592454666,0.09657799136409392,4181.556008332933,6.996946602769185,25.5,0,1,0,37,1115.8,33.8,23.1,0.4161184210526316,0.059694214378136094 +NN_TORCH (default),47.78759841542495,0.6881854220440513,11.127495716627488,0.2952049566930359,0.9794017223291384,0.954809605073461,0.21542278792412453,0.16915538129196842,136.11141421046577,15.027014536020943,28.460526315789473,0.180125,24.229564905166626,0.31156718730926514,5.726755660627104,0.14843028917594223,1.0,1.0,0.14433311546503363,0.09575304821587584,131.897186986728,9.730207537012728,29.5,0,0,0,38,1019.5,33.1,27.6,0.3134868421052632,0.04010552355542409 +RF (default),4.080644030320017,0.13749531068299947,0.8126703096595244,0.06965281933938666,0.9894800211663467,0.9674716972377848,0.24916098049052274,0.23883179708413727,6.74726637377606,4.2459113559317,29.157894736842106,0.20493,1.552122712135315,0.09239459037780762,0.37380590277783665,0.037675282187559014,1.0,1.0,0.22032371855138488,0.13407694191427183,5.06536582970867,3.354866759305418,31.75,0,0,1,37,1000.0,0.0,0.0,0.29605263157894735,0.043926004795501065 +FASTAI (default),30.567647507316188,1.0886577556007786,5.142705410747499,0.514750514881177,0.9739593024362323,0.9433285405953568,0.23208065955007007,0.1822497405417556,72.5941463445461,31.969331116236056,29.473684210526315,0.187165,14.332883596420288,0.6891474723815918,3.1247662358290373,0.33408521032303745,1.0,1.0,0.16181477069756522,0.12140229652210177,51.9702290983138,26.158132978083096,31.0,0,0,0,38,990.5,32.0,29.2,0.2881578947368421,0.03886887345261015 +XT (default),2.8151443882992395,0.17746856965516744,0.7494419232366839,0.0764609601145254,0.9898367767783371,0.9716632164859568,0.2793547475052598,0.2725382456849271,4.797654893597667,4.664132317562142,31.63157894736842,0.20700000000000002,0.9193735122680664,0.08453762531280518,0.24344244584214264,0.037069730458521875,1.0,1.0,0.2174961946642111,0.15156553071339784,3.5050999047562326,4.1617612391934,34.5,0,0,0,38,927.6,30.2,37.2,0.23421052631578948,0.036229169693596776 +LR (tuned + ensemble),310.8921214467601,1.6511000143854242,112.04387665947259,0.6061180299813982,0.95841567098672,0.9708437365552002,0.31310270156620607,0.28149705038959444,1051.5597600046694,27.942607605611126,31.657894736842106,0.20664,172.39098751544952,0.3049362897872925,51.96211560156496,0.1713220944305931,1.0,1.0,0.2560382503294761,0.1682426614669617,652.0216614396725,8.851961834833686,35.0,1,0,0,37,927.0,34.9,24.7,0.23355263157894737,0.057640312921943146 +LR (tuned),310.8921214467601,0.5151690432899877,112.04387665947259,0.17414856714912683,0.9738366106666893,0.97980647335986,0.32065351674271636,0.2951636582555988,1051.5597600046694,7.373474320542152,32.63157894736842,0.206755,172.39098751544952,0.14061975479125977,51.96211560156496,0.06357160024321462,1.0,1.0,0.2713987757406089,0.1825065086765385,652.0216614396725,5.062717123136234,35.5,0,0,1,37,893.4,30.7,26.1,0.20921052631578949,0.03888425407679143 +LR (default),7.7264530345013265,0.5259399602287694,2.6686237618793442,0.19925514469367503,0.9806670457343031,0.9860341079029044,0.33047877847380847,0.3220913721066172,27.432135439923183,8.872452564577243,33.36842105263158,0.21439,5.452793717384338,0.1415010690689087,1.6174976146916054,0.09387864407044556,1.0,1.0,0.27139753987310355,0.1831054339424733,15.972794808418932,5.719143257720329,36.0,0,0,0,38,869.4,28.8,34.9,0.19078947368421054,0.03212487482338037 +KNN (tuned + ensemble),212.22012870562705,8.11043459490726,28.850708739248347,0.6076026284446762,1.0,0.9828353904168845,0.48361351363956845,0.5911588168624139,79.58293822496161,51.24023018425664,36.31578947368421,0.320445,18.52320122718811,0.22250914573669434,3.5145795148857952,0.1617064055034519,1.0,1.0,0.4081435434925469,0.6872102333351288,56.97784214902664,12.39378247919743,39.0,0,0,0,38,744.6,34.0,32.3,0.11710526315789474,0.0334880126619938 +KNN (tuned),212.22012870562705,1.883336155038131,28.850708739248347,0.13054448296641816,1.0,0.9845999552108321,0.5047438562255071,0.6412798413840507,79.58293822496161,11.122407039906847,37.51315789473684,0.32193499999999997,18.52320122718811,0.07584631443023682,3.5145795148857952,0.03818510847996826,1.0,1.0,0.48517378670484473,0.7165338507811103,56.97784214902664,2.1231636318644824,40.0,0,1,0,37,677.7,32.4,47.2,0.08717105263157894,0.038768005188542266 +KNN (default),1.74290458779586,0.21126952296809146,0.5001806322977429,0.036905984979792666,1.0,1.0,0.5890502564791692,0.9411577191145609,1.0024208997554083,2.3365493404640363,39.828947368421055,0.37738,0.3047138452529907,0.03128814697265625,0.06855464474199147,0.019133871606833427,1.0,1.0,0.5282941392408126,1.0,1.0,1.2278781340930796,41.0,0,0,0,38,467.1,55.0,51.4,0.029276315789473685,0.02541007993534458 diff --git a/data/lite/full-cls/time_plot.pdf b/data/lite/full-cls/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..19364a1a41aca9a0e5c3d5172acf719e33a175a7 Binary files /dev/null and b/data/lite/full-cls/time_plot.pdf differ diff --git a/data/lite/full-cls/time_plot.png.zip b/data/lite/full-cls/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..8b42c4b6e2232a76fd9e3d1f5ee7ee2f360f2d09 --- /dev/null +++ b/data/lite/full-cls/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0389b7f203eb4d39d6b6a3da311337a46c01621e12fc206d904eff8ce58527 +size 337899 diff --git a/data/lite/full-cls/tuning-impact-elo-horizontal.pdf b/data/lite/full-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bd2bbd6c2d88e3c17f415c27abdbd1920b5aa370 Binary files /dev/null and b/data/lite/full-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/full-cls/tuning-impact-elo-horizontal.png.zip b/data/lite/full-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..fe0ff9a7960de4149eaee744b8c19171d2bd2ff5 --- /dev/null +++ b/data/lite/full-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4a737dcd6d82a8040b8eb82b9d90664185427b808cdef322a20c51796c88f15c +size 128533 diff --git a/data/lite/full-cls/tuning-impact-elo.pdf b/data/lite/full-cls/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..628097f297eb220c9f6f25adced9cfdf98a316f9 Binary files /dev/null and b/data/lite/full-cls/tuning-impact-elo.pdf differ diff --git a/data/lite/full-cls/tuning-impact-elo.png.zip b/data/lite/full-cls/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..548c5b62d0aa0ac9670259f66fb36291c4cef5ed --- /dev/null +++ b/data/lite/full-cls/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2b7f6bd0146921670f8452f5a59e6970208f4dbb60786b6ec8b535284d5fb72e +size 124422 diff --git a/data/lite/full-reg/figures/critical-diagram.pdf b/data/lite/full-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8c8c71bb64483b45ec7d27bc8e8d7f74f516d4dc Binary files /dev/null and b/data/lite/full-reg/figures/critical-diagram.pdf differ diff --git a/data/lite/full-reg/figures/critical-diagram.png.zip b/data/lite/full-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..806ac9e814e9ff517048964639c9a84a47722c59 --- /dev/null +++ b/data/lite/full-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c820f0dacaec7458f05c9bf9b281cd3eefcc5514fb205adfe75336408a7e14a2 +size 291039 diff --git a/data/lite/full-reg/leaderboard.tex b/data/lite/full-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..155cc043844ee2e04bb19ccfde7e858c639feca1 --- /dev/null +++ b/data/lite/full-reg/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1881${}_{-87,+73}$}} & \textcolor{gold}{\textbf{0.735}} & \textcolor{gold}{\textbf{3.6}} & \textcolor{gold}{\textbf{2.1}} & \textcolor{silver}{\textbf{3}} & \textcolor{silver}{\textbf{2.4\%}} & 1734.20 & 7.06 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1758${}_{-56,+70}$}} & \textcolor{silver}{\textbf{0.734}} & \textcolor{silver}{\textbf{5.6}} & 3.4 & 1 & \textcolor{gold}{\textbf{2.1\%}} & 6534.34 & 2.30 \\ +ModernNCA (T+E) & \textcolor{bronze}{\textbf{1675${}_{-55,+60}$}} & 0.603 & \textcolor{bronze}{\textbf{7.5}} & \textcolor{silver}{\textbf{2.3}} & \textcolor{gold}{\textbf{4}} & 4.3\% & 3811.43 & 7.58 \\ +LightGBM (T+E) & 1657${}_{-49,+62}$ & 0.531 & 7.8 & 5.9 & 0 & 4.9\% & 686.46 & 5.48 \\ +TabDPT (D) & 1632${}_{-52,+57}$ & \textcolor{bronze}{\textbf{0.635}} & 8.6 & \textcolor{bronze}{\textbf{2.9}} & \textcolor{silver}{\textbf{3}} & \textcolor{bronze}{\textbf{2.7\%}} & 16.97 & 8.70 \\ +CatBoost (T+E) & 1615${}_{-48,+53}$ & 0.536 & 9.2 & 7.0 & 0 & 4.6\% & 2895.38 & 1.32 \\ +TabM (T+E) & 1583${}_{-57,+59}$ & 0.488 & 10.2 & 5.2 & 1 & 3.1\% & 4228.53 & 1.19 \\ +CatBoost (T) & 1569${}_{-51,+52}$ & 0.512 & 10.7 & 5.6 & 0 & 4.7\% & 2895.38 & 0.07 \\ +LightGBM (T) & 1542${}_{-49,+40}$ & 0.432 & 11.5 & 9.1 & 0 & 5.7\% & 686.46 & 0.74 \\ +XGBoost (T) & 1504${}_{-54,+53}$ & 0.398 & 12.7 & 10.9 & 0 & 5.6\% & 848.99 & 0.47 \\ +XGBoost (T+E) & 1496${}_{-50,+62}$ & 0.432 & 12.8 & 11.6 & 0 & 5.6\% & 848.99 & 2.38 \\ +ModernNCA (D) & 1490${}_{-45,+61}$ & 0.209 & 13.0 & 9.3 & 0 & 7.1\% & 16.07 & 0.29 \\ +TabM (T) & 1484${}_{-56,+48}$ & 0.380 & 13.3 & 8.9 & 0 & 3.9\% & 4228.53 & 0.13 \\ +RealMLP (T) & 1452${}_{-50,+48}$ & 0.344 & 14.5 & 11.0 & 0 & 4.9\% & 6534.34 & 0.07 \\ +CatBoost (D) & 1442${}_{-53,+63}$ & 0.389 & 14.6 & 10.6 & 0 & 6.9\% & 8.35 & 0.09 \\ +ModernNCA (T) & 1424${}_{-41,+60}$ & 0.351 & 15.3 & 6.4 & 0 & 7.0\% & 3811.43 & 0.45 \\ +TabM (D) & 1366${}_{-44,+56}$ & 0.287 & 17.2 & 14.0 & 0 & 5.7\% & 13.90 & 0.12 \\ +TorchMLP (T+E) & 1324${}_{-45,+43}$ & 0.171 & 18.9 & 16.0 & 0 & 7.0\% & 4452.11 & 0.85 \\ +RealMLP (D) & 1306${}_{-52,+53}$ & 0.137 & 19.4 & 11.2 & 0 & 7.1\% & 32.24 & 0.08 \\ +ExtraTrees (T+E) & 1301${}_{-51,+55}$ & 0.157 & 19.6 & 12.3 & 0 & 10.1\% & 161.73 & 0.78 \\ +ExtraTrees (T) & 1278${}_{-53,+54}$ & 0.129 & 20.5 & 11.6 & 0 & 10.5\% & 161.73 & 0.12 \\ +TorchMLP (T) & 1262${}_{-48,+58}$ & 0.123 & 21.1 & 17.2 & 0 & 7.6\% & 4452.11 & 0.09 \\ +LightGBM (D) & 1224${}_{-45,+47}$ & 0.070 & 22.2 & 21.2 & 0 & 8.6\% & 2.03 & 0.30 \\ +RandomForest (T+E) & 1196${}_{-52,+45}$ & 0.066 & 23.5 & 20.5 & 0 & 11.2\% & 526.17 & 0.77 \\ +XGBoost (D) & 1188${}_{-54,+61}$ & 0.111 & 23.7 & 22.0 & 0 & 9.4\% & 2.15 & 0.18 \\ +RandomForest (T) & 1137${}_{-52,+53}$ & 0.048 & 25.2 & 22.7 & 0 & 12.0\% & 526.17 & 0.12 \\ +EBM (T+E) & 1135${}_{-51,+69}$ & 0.160 & 25.2 & 15.5 & 0 & 13.8\% & 2124.78 & 0.12 \\ +ExtraTrees (D) & 1112${}_{-46,+55}$ & 0.061 & 26.0 & 22.7 & 0 & 12.1\% & 0.42 & 0.06 \\ +FastaiMLP (T+E) & 1093${}_{-40,+60}$ & 0.023 & 26.5 & 25.6 & 0 & 12.1\% & 527.21 & 2.83 \\ +EBM (T) & 1085${}_{-66,+59}$ & 0.137 & 27.1 & 9.0 & 1 & 14.4\% & 2124.78 & 0.01 \\ +TorchMLP (D) & 1077${}_{-59,+60}$ & 0.016 & 27.4 & 25.4 & 0 & 11.9\% & 20.50 & 0.08 \\ +FastaiMLP (T) & 1048${}_{-54,+48}$ & 0.013 & 27.9 & 27.3 & 0 & 12.6\% & 527.21 & 0.31 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 29.3 & 28.7 & 0 & 13.3\% & 0.63 & 0.06 \\ +EBM (D) & 999${}_{-56,+50}$ & 0.093 & 29.4 & 27.0 & 0 & 15.1\% & 7.25 & 0.04 \\ +FastaiMLP (D) & 919${}_{-60,+50}$ & 0.000 & 31.1 & 30.6 & 0 & 17.0\% & 3.08 & 0.29 \\ +KNN (T+E) & 488${}_{-125,+81}$ & 0.000 & 37.1 & 36.9 & 0 & 36.8\% & 2.25 & 0.15 \\ +Linear (T+E) & 467${}_{-81,+69}$ & 0.000 & 37.3 & 37.2 & 0 & 34.9\% & 46.50 & 0.14 \\ +Linear (T) & 397${}_{-92,+86}$ & 0.000 & 37.9 & 37.8 & 0 & 35.1\% & 46.50 & 0.04 \\ +KNN (T) & 369${}_{-79,+103}$ & 0.000 & 38.2 & 38.1 & 0 & 37.4\% & 2.25 & 0.03 \\ +Linear (D) & 284${}_{-112,+94}$ & 0.000 & 39.0 & 39.0 & 0 & 37.6\% & 1.16 & 0.08 \\ +KNN (D) & 204${}_{-107,+107}$ & 0.000 & 39.7 & 39.6 & 0 & 41.1\% & 0.04 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/full-reg/tabarena_leaderboard.csv b/data/lite/full-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..3127f0f93cd53b0fe3d96efbd385640ea1ae6ab2 --- /dev/null +++ b/data/lite/full-reg/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),10051.83785378016,70.68019696382376,3153.0175193360524,8.747021690702612,0.26461131829744206,0.23246284524554242,0.023916771631798464,0.02739153218756116,77389.85490429384,1642.993680810643,3.6153846153846154,4.10013,12458.540483474731,10.641454219818115,1734.201033855079,7.062674608341483,0.23133371369265832,0.15318450222486296,0.009595256724279078,0.010747053668057768,76123.2860686676,448.5777143640419,3.0,3,3,2,5,1881.3,72.9,86.1,0.9346153846153846,0.46789044289044285 +REALMLP (tuned + ensemble),67720.63958248726,12.596411228179932,7060.621324893922,7.808561874535024,0.26596078198184286,0.28211068496756564,0.02079798687061351,0.0318369877671686,300508.62324682355,250.14082076713328,5.615384615384615,4.1572,28985.167714595795,4.047658205032349,6534.342759080719,2.2990216834440518,0.1939333280527935,0.22514913688996746,0.013728568811321784,0.014835698760319415,215284.61589654352,243.1358386691401,6.0,1,2,2,8,1758.0,69.5,55.9,0.8846153846153846,0.2927655677655677 +MNCA (tuned + ensemble),32095.7095044026,50.336935831950264,6167.970103891367,17.67280087259302,0.39661026996847265,0.32801771819090547,0.04309650922972409,0.03908136048505314,183828.00911751532,735.9936636946994,7.461538461538462,5.17584,16410.6605322361,9.919520378112793,3811.431210311487,7.580550796782489,0.35096562881515253,0.15722206903461022,0.013723617424411305,0.015091117740655323,215831.1755427445,518.4548592505826,4.0,4,1,0,8,1674.9,59.2,54.3,0.8384615384615385,0.43316990701606084 +GBM (tuned + ensemble),3436.114175393031,58.23533050830547,795.9694443599602,8.418828059060083,0.4687885136784251,0.4435140155179781,0.04872699145877461,0.048825649620474074,19731.156544048128,1434.220040412032,7.846153846153846,4.23184,3069.166275501251,10.259183883666992,686.4649311851701,5.476648655048637,0.41525976638432377,0.43203539999740836,0.025882078477524217,0.03357473090357181,19944.502036932092,444.7715795428446,6.0,0,0,2,11,1656.8,61.7,48.4,0.8288461538461539,0.1695818070818071 +TABDPT (default),148.7485225384052,53.04140243163476,27.664670515034977,22.751733349883246,0.36524485788247774,0.398418313824876,0.02707452489314851,0.042379631052937836,812.9269128730391,1906.3906090967916,8.615384615384615,4.24006,138.3429434299469,23.635958433151245,16.972548083255166,8.69507121915186,0.28825222902504594,0.2790724690964867,0.028230517433738833,0.02927377278034004,862.9680595038594,1818.2713070390416,6.0,3,0,1,9,1631.9,56.3,51.3,0.8096153846153846,0.34416985729202926 +CAT (tuned + ensemble),28869.75754873569,4.897776438639714,6008.917386152554,1.2106725402691727,0.46394270088709505,0.48689248463883505,0.0458947416071935,0.05367012205676516,133179.3418491014,149.6309053684063,9.153846153846153,4.34931,19659.746299266815,2.423489809036255,2895.38206857098,1.3169555135467037,0.4150140849407447,0.4887362232633289,0.020782158167854115,0.04340637912663692,136008.98155550173,63.945909864284914,8.0,0,0,2,11,1615.3,52.7,47.8,0.7961538461538461,0.14267800912537754 +TABM (tuned + ensemble),43863.87365627289,7.196907648673425,7630.0436857309,2.7973145627637854,0.5116630985629478,0.4854321382759927,0.030867223542042906,0.0665660489753135,178405.92798162796,122.01127915897794,10.153846153846153,4.37433,20695.145789146423,2.30704665184021,4228.525932170142,1.1878318177487799,0.43090407292838107,0.5177608028715915,0.028846208921784244,0.04776607732739337,167683.34522914552,99.64653279572039,8.0,1,0,1,11,1583.0,58.1,57.0,0.7711538461538462,0.19352813852813855 +CAT (tuned),28869.75754873569,0.37941277944124663,6008.917386152554,0.13317195365011947,0.48768370669415817,0.5245466944361494,0.04712689907826864,0.06084325206537784,133179.3418491014,12.25994177013758,10.73076923076923,4.33359,19659.746299266815,0.3871791362762451,2895.38206857098,0.07259850925684806,0.5120020111687084,0.4994950774152178,0.02654000853753824,0.04066776406296266,136008.98155550173,9.36829586948511,10.0,0,3,0,10,1568.7,51.8,50.9,0.7567307692307692,0.17980469965764084 +GBM (tuned),3436.114175393031,7.708258518805871,795.9694443599602,1.1322853845142717,0.5677497712335507,0.5971788709429797,0.057205566292941865,0.06444142112740564,19731.156544048128,203.98211974063437,11.461538461538462,4.36733,3069.166275501251,2.4075584411621094,686.4649311851701,0.7428209858877407,0.5552965190223861,0.5775907044637494,0.03596218031983012,0.046546503764294436,19944.502036932092,125.83374247654176,11.0,0,0,0,13,1541.9,39.6,48.5,0.7384615384615385,0.11020062139971643 +XGB (tuned),6347.206248815243,2.662817588219276,1403.828603839992,0.830905023108991,0.6023900941776976,0.6115387620417754,0.055949938636663046,0.06011214144598406,25057.523759861193,88.40573298970912,12.653846153846153,4.27126,2595.2154359817505,1.905684232711792,848.994191986563,0.4748247737108275,0.6620816436761664,0.59435546752595,0.03917366894901786,0.04545778114458191,28391.44761906215,33.71919899313387,13.0,0,0,0,13,1504.2,52.9,53.6,0.7086538461538462,0.09171668075514229 +XGB (tuned + ensemble),6347.206248815243,13.31823913867657,1403.828603839992,4.96016839745584,0.567688570586379,0.6088979541996152,0.0561655691809741,0.061030527298114604,25057.523759861193,462.0201618770655,12.846153846153847,4.28931,2595.2154359817505,11.448588609695435,848.994191986563,2.377886790782213,0.6256631137377601,0.6320251511646616,0.04155329070129643,0.0444118848326924,28391.44761906215,165.90428541165622,14.0,0,0,0,13,1495.6,61.7,49.9,0.7038461538461539,0.0858849483849484 +MNCA (default),99.03008703085092,2.04661226272583,17.95183203493229,0.696529515302073,0.7905734390801098,0.6465871709608848,0.07064833607782565,0.07253476019625761,536.9851811460794,28.619994682567945,13.0,5.11399,53.54817509651184,0.404191255569458,16.074996445495383,0.28976407064988385,0.9481480616823943,0.5815078512793247,0.03066701733950883,0.07837929327865152,512.2293777300066,19.36698671755419,11.0,0,0,0,13,1490.1,60.3,44.3,0.7,0.1079631153160565 +TABM (tuned),43863.87365627289,0.6320198682638315,7630.0436857309,0.26286272507788355,0.6195303355392976,0.6302074899164011,0.039435120144479426,0.08010589208339923,178405.92798162796,12.46052452929135,13.307692307692308,4.38922,20695.145789146423,0.3208010196685791,4228.525932170142,0.1251857412799802,0.5658117450288995,0.6078612660255207,0.03878632440402585,0.05035859553249802,167683.34522914552,9.015021272319185,12.0,0,1,0,12,1484.1,48.0,55.6,0.6923076923076923,0.11253776138391522 +REALMLP (tuned),67720.63958248726,0.544457747386052,7060.621324893922,0.3613057143261696,0.655870427194709,0.6779837137329545,0.04902952080276177,0.07208539599966252,300508.62324682355,13.279279128405507,14.461538461538462,4.49659,28985.167714595795,0.20874404907226562,6534.342759080719,0.07393883227200762,0.67949922992921,0.7776222579387733,0.04525760113385646,0.051582123834098645,215284.61589654352,8.366633852135228,15.0,0,0,0,13,1452.4,47.3,49.3,0.6634615384615384,0.09079086448995952 +CAT (default),91.84739507161655,0.4293701098515437,19.94101966337271,0.1772958974443293,0.6107641551712003,0.6735486897823615,0.06850437211216202,0.08021242812847439,430.2934190313639,15.190031737648651,14.576923076923077,4.47999,56.422000885009766,0.48146557807922363,8.349037958570749,0.08723198972790447,0.5847419575275037,0.6846448015241358,0.04026143373457458,0.06617086571419911,318.4902885443583,10.727539533685711,14.0,0,0,0,13,1442.5,63.0,52.9,0.6605769230769231,0.09408568484655441 +MNCA (tuned),32095.7095044026,2.2542913326850305,6167.970103891367,0.8125346038667304,0.6494157477338965,0.6213858052348565,0.06960543626953819,0.08827562471076143,183828.00911751532,39.103427878901556,15.307692307692308,5.75005,16410.6605322361,0.6251811981201172,3811.431210311487,0.45193767318844,0.686862000079896,0.5534523234870296,0.029675454824982017,0.05466681384890431,215831.1755427445,27.98327705018131,14.0,0,2,1,10,1423.8,59.5,40.9,0.6423076923076924,0.15579826806227182 +TABM (default),115.03358120184679,0.551762947669396,30.805250424733348,0.23944113770930792,0.7132806572850755,0.7348728947837725,0.05720318447316548,0.11018510191639486,591.7176035490969,9.427562415305996,17.153846153846153,4.49975,77.85998892784119,0.17319130897521973,13.902122931630087,0.12286842671210456,0.6984407490367052,0.7838146351744427,0.05244844314354469,0.06961332841715165,376.39915054227686,7.849330306811178,16.0,0,0,0,13,1366.2,55.8,43.9,0.5961538461538461,0.0713211076672615 +NN_TORCH (tuned + ensemble),44566.48122985546,3.9406251907348633,5669.080821343483,1.3428149317173896,0.8294336223884518,0.8268743771551563,0.07006690226317708,0.10115654213067894,212623.11870515408,100.05462140638757,18.923076923076923,4.45415,15782.205469369888,3.293855905532837,4452.108570637395,0.8515826242412635,0.9419042422995464,0.9217443353117214,0.057496871027315266,0.06589276319353497,180041.55346239166,102.16386050718569,18.0,0,0,0,13,1323.8,42.3,44.2,0.551923076923077,0.06249820684660957 +REALMLP (default),351.8482951934521,0.5515201458564172,35.503559222074486,0.3453931510429455,0.8625329178435776,0.8284360727102305,0.07054687457234544,0.11745960984498935,1536.8505495640943,11.94254014053474,19.384615384615383,4.72117,142.23176908493042,0.1978776454925537,32.236338542328866,0.07855930724815739,0.9645369038929025,0.9665659615721934,0.06218045116441895,0.07552484491529415,969.8530395782191,8.889469027651435,20.0,0,1,0,12,1306.2,52.3,51.8,0.5403846153846154,0.08915212095713312 +XT (tuned + ensemble),1203.8377161942994,3.407132093723004,436.75693766012125,1.1793024261894192,0.8432098887288747,0.7953106698979718,0.10134862406569099,0.11587952405107362,7927.278685066689,97.16356713108524,19.615384615384617,5.19051,775.9451651573181,4.3725128173828125,161.7254597218303,0.7839402487111646,1.0,1.0,0.05459719178375644,0.11727451534539897,4965.162637285519,114.56582435147743,24.0,0,0,0,13,1300.8,54.3,50.9,0.5346153846153846,0.08114086393438621 +XT (tuned),1203.8377161942994,0.42836998059199405,436.75693766012125,0.15974479700708394,0.8708387105454042,0.7895102019411862,0.104759905314283,0.1186252146376152,7927.278685066689,12.996585928646535,20.46153846153846,5.21222,775.9451651573181,0.3841361999511719,161.7254597218303,0.1196965961875078,1.0,1.0,0.059483366316838704,0.12062350958135247,4965.162637285519,11.747884708843724,23.0,0,0,1,12,1278.3,53.5,52.7,0.5134615384615384,0.085924146502842 +NN_TORCH (tuned),44566.48122985546,0.300307035446167,5669.080821343483,0.10884087554478102,0.8767833734464541,0.846121875980796,0.0762277788365098,0.12251784835575029,212623.11870515408,7.435699914505718,21.076923076923077,4.35043,15782.205469369888,0.15927791595458984,4452.108570637395,0.08851260312676379,1.0,0.9982781675836314,0.058485958619429845,0.06737150510700278,180041.55346239166,7.021581615200423,20.0,0,0,0,13,1262.1,57.5,47.6,0.4980769230769231,0.0582967115822335 +GBM (default),11.557473989633413,2.3389387864332933,3.7830693017108263,0.6131180352644026,0.9303753212721771,0.9288976301921357,0.08645002304906553,0.11558395217934979,86.42318767453573,78.28800186182016,22.153846153846153,4.60997,7.728894472122192,1.0601136684417725,2.0295221938774763,0.2962542098282052,0.9919628610131755,1.0,0.05561299294970734,0.11425457724875789,79.9077479005168,21.84399881745161,23.0,0,0,0,13,1224.2,46.3,44.4,0.47115384615384615,0.047103298394815536 +RF (tuned + ensemble),2061.32346032216,3.516887463056124,498.80047788723755,1.0331010904881306,0.9341058521704168,0.9179679725245055,0.11172340178275471,0.12349380141849947,10568.524251535091,92.6089977104098,23.46153846153846,5.35575,1100.1833860874176,1.5810585021972656,526.1674899414485,0.7701151896148807,1.0,1.0,0.07253345631610286,0.1057055634465074,11284.229205513133,73.76895458081503,25.0,0,0,0,13,1195.6,44.5,51.4,0.43846153846153846,0.048839326438349505 +XGB (default),12.744810801285963,0.8711951879354624,3.3672477352464436,0.269627728742662,0.8890754852284094,0.8921844375943818,0.09393275581156563,0.12952227276204129,67.4237557433486,29.92998850138266,23.653846153846153,5.03782,8.503239631652832,0.5015158653259277,2.1458947295259945,0.17973603002282956,1.0,1.0,0.05502551213589513,0.10113109363104307,79.4223078814837,14.470859449961994,24.0,0,0,0,13,1187.7,60.8,53.9,0.43365384615384617,0.04539055909542838 +EBM (tuned + ensemble),26006.099437456865,0.9856858620276818,3356.515573705329,0.34153237266067143,0.8401103872067855,0.8672182272049187,0.13753898517152402,0.18536743008861836,98028.229877245,12.716029695527471,25.153846153846153,4.4992,6071.96945476532,0.23789739608764648,2124.784934448033,0.12211837692413026,1.0,1.0,0.11314433759685272,0.12050352778728698,95277.20874636172,10.244440895844145,30.0,0,0,1,12,1135.3,68.9,50.8,0.39615384615384613,0.0646128755099593 +RF (tuned),2061.32346032216,0.35773491859436035,498.80047788723755,0.13352674389299507,0.9521440935358421,0.9352822429434912,0.11956176374689925,0.13468679853825555,10568.524251535091,11.403218373596392,25.23076923076923,5.50373,1100.1833860874176,0.2583794593811035,526.1674899414485,0.11934386114600624,1.0,1.0,0.07200067617043104,0.13515078861195354,11284.229205513133,12.018495346547116,26.5,0,0,0,13,1136.6,52.5,51.8,0.3942307692307692,0.04407023061475774 +XT (default),4.47036992586576,0.27958295895503116,0.8320963666478268,0.08259457701463509,0.9388750296989964,0.9297943914194666,0.12092195952722153,0.1710098499377586,12.3176265493243,7.870904574627616,26.0,5.21222,1.8384346961975098,0.15966486930847168,0.424311872114215,0.060998774854278015,1.0,1.0,0.08211867366377534,0.14076583918709085,10.170662473104239,6.482341208432202,28.0,0,0,0,13,1112.1,54.4,45.5,0.375,0.04397062618178138 +FASTAI (tuned + ensemble),4587.4864926521595,8.088074555763832,1228.3496620568533,4.774504314817022,0.976857214707512,0.9827882276027652,0.12139036764490176,0.18646371498536363,31416.410399557783,300.5500778279919,26.53846153846154,5.22937,3495.172525405884,6.275083303451538,527.2149841541068,2.8305834490487536,1.0,1.0,0.0883144170495136,0.17798536123127906,32094.70665313662,284.14659267835043,27.0,0,0,0,13,1092.9,59.3,39.5,0.36153846153846153,0.03907784066226461 +EBM (tuned),26006.099437456865,0.08786681982187125,3356.515573705329,0.03542955498326605,0.8629348413551933,0.8583097442453099,0.14432622400906528,0.21501047801625847,98028.229877245,1.247331364408003,27.076923076923077,4.53788,6071.96945476532,0.03220105171203613,2124.784934448033,0.011920453069690697,1.0,1.0,0.11467541102152934,0.1392477172116637,95277.20874636172,1.0,32.0,1,0,0,12,1084.8,58.8,65.8,0.34807692307692306,0.11070171034879414 +NN_TORCH (default),149.93631285887497,0.22603647525493914,24.53274626856709,0.08820389410472865,0.9842745385355711,0.9733305251076131,0.11891120219929098,0.19124345982678712,789.2682924687753,6.761490179412745,27.384615384615383,4.48918,79.02296853065491,0.1783456802368164,20.501721600246,0.08139732073405338,1.0,1.0,0.08808381765426243,0.127190652207085,464.3796175172265,6.690027290059689,26.0,0,0,0,13,1076.6,59.6,58.2,0.3403846153846154,0.03933724171166253 +FASTAI (tuned),4587.4864926521595,0.8590086423433744,1228.3496620568533,0.4517426116430779,0.9865659937937807,1.0,0.12622793993539416,0.2078593727254878,31416.410399557783,33.24632209827138,27.923076923076923,5.20475,3495.172525405884,0.7054669857025146,527.2149841541068,0.31148362904787064,1.0,1.0,0.10136144633642485,0.18602614231405568,32094.70665313662,33.069549879991385,27.0,0,0,0,13,1048.0,47.5,53.1,0.3269230769230769,0.036688546381474424 +RF (default),9.625078421372633,0.3119058792407696,1.3105120203011107,0.08833919573142503,1.0,1.0,0.13282962694876124,0.1819546904345078,28.977692368539365,8.6618037567804,29.307692307692307,5.50373,5.83245849609375,0.15849757194519043,0.6330948821924387,0.06385546519532179,1.0,1.0,0.08318481827952429,0.18396643424995313,24.89266133507776,6.5528732074693075,30.0,0,0,0,13,1000.0,0.0,0.0,0.2923076923076923,0.034810294563395565 +EBM (default),59.33740012462322,0.1286466121673584,9.101757338940997,0.07531792942152694,0.9074728958123806,0.9350652433056991,0.1512971478563086,0.2195610478948347,235.3944936698285,3.0829910156446747,29.384615384615383,4.59891,16.830075979232788,0.058573246002197266,7.250631064699407,0.04116066154478904,1.0,1.0,0.1212644465105922,0.14578267826230468,199.27951947649242,2.3994193074057,34.0,0,0,0,13,998.6,50.0,55.6,0.2903846153846154,0.03704085101456019 +FASTAI (default),19.02570968407851,0.9372756664569561,4.5150492334878205,0.3838557095432917,1.0,1.0,0.16983706046763158,0.2622596508606666,122.59124416864653,31.37127570550211,31.076923076923077,6.27121,14.524733304977417,1.090883731842041,3.0836050750366097,0.29293149190497314,1.0,1.0,0.11103713173333196,0.29927743293530584,144.4018162786092,35.82998039822552,31.0,0,0,0,13,919.4,49.3,59.2,0.24807692307692308,0.03271323890093992 +KNN (tuned + ensemble),130.0447974755214,2.3221586850973277,31.413578991094717,0.3839981934688547,1.0,1.0,0.36756485806297295,0.6734558517564605,86.62047407939465,27.45502773232453,37.07692307692308,8.16815,12.739671230316162,0.2766997814178467,2.253836358499806,0.14740763433303566,1.0,1.0,0.4085005377667177,0.797964481423868,68.17976708247701,13.891874242000187,38.0,0,0,0,13,488.5,80.2,124.7,0.09807692307692308,0.027092595293357377 +LR (tuned + ensemble),259.4397970529703,0.5564901095170242,90.58997126979446,0.25975101191673244,1.0,1.0,0.3493121067236557,0.6282022661768178,1967.1950270790398,11.602054705435593,37.26923076923077,7.90301,156.73520398139954,0.17154169082641602,46.49516183285896,0.14113267629856877,1.0,1.0,0.3712901272900253,0.6624491377852132,1676.6799766486058,9.252055102805237,36.0,0,0,0,13,467.2,68.9,80.1,0.09326923076923077,0.026875828301800728 +LR (tuned),259.4397970529703,0.12883753042954665,90.58997126979446,0.058721919597199,1.0,1.0,0.35117627377232247,0.637664314344675,1967.1950270790398,2.9663690915463836,37.92307692307692,7.95008,156.73520398139954,0.04031491279602051,46.49516183285896,0.04248926416039467,1.0,1.0,0.3720471503072048,0.6706483938757745,1676.6799766486058,3.0754592783196695,38.0,0,0,0,13,397.0,85.2,91.5,0.07692307692307693,0.026438318069682023 +KNN (tuned),130.0447974755214,0.2600700121659499,31.413578991094717,0.054814694748458494,1.0,1.0,0.3742426273573644,0.7065181173770695,86.62047407939465,3.596722365416154,38.23076923076923,8.28075,12.739671230316162,0.11144423484802246,2.253836358499806,0.029449351131916046,1.0,1.0,0.41308029193402596,0.8249023799133182,68.17976708247701,2.325030564010288,39.0,0,0,0,13,369.3,102.6,78.8,0.06923076923076923,0.02623730508345893 +LR (default),4.9569381567148065,0.18706706854013297,2.04240300519074,0.0952681059723661,1.0,1.0,0.3762906108037262,0.7449242295439796,46.37076890234856,6.575927550408862,39.03846153846154,8.07907,5.157731294631958,0.10304999351501465,1.1643471259929907,0.0800294014177239,1.0,1.0,0.3720269301046141,0.7403523158287414,40.21502356299305,3.589990624937666,38.0,0,0,0,13,284.2,93.6,111.7,0.04903846153846154,0.025668142221415648 +KNN (default),2.3537445801955004,0.07271528244018555,0.6011442901693675,0.029873606599490884,1.0,1.0,0.4112787475186339,0.8492654887800806,1.1881523781374654,1.663589934493969,39.69230769230769,8.5423,0.15627789497375488,0.04499936103820801,0.03787513578449361,0.02182507289554364,1.0,1.0,0.4499088247272143,1.0,1.0,1.6403093647746019,41.0,0,0,0,13,203.6,107.0,106.1,0.032692307692307694,0.025282024351343992 diff --git a/data/lite/full-reg/time_plot.pdf b/data/lite/full-reg/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b4802f771861d719bdf5520effd5bbac1b5146f7 Binary files /dev/null and b/data/lite/full-reg/time_plot.pdf differ diff --git a/data/lite/full-reg/time_plot.png.zip b/data/lite/full-reg/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..263ba6a3c2cb850a1ba7da44ac69dda4162238c3 --- /dev/null +++ b/data/lite/full-reg/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3fe29fdec65f03c7d3bae994bc888e7c7ec498e3800a153239b08e6703208a4a +size 79911 diff --git a/data/lite/full-reg/tuning-impact-elo-horizontal.pdf b/data/lite/full-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8427817b6fb94a00fb9d33028da42717551415ac Binary files /dev/null and b/data/lite/full-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/full-reg/tuning-impact-elo-horizontal.png.zip b/data/lite/full-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..1e01391e7e232af509f3fb9d964758fb24628af6 --- /dev/null +++ b/data/lite/full-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c6c1fe19f28c3138bf15b5a4b8ba0daa68f702c17056dc4cb9ec570d9e18105 +size 129444 diff --git a/data/lite/full-reg/tuning-impact-elo.pdf b/data/lite/full-reg/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a780a95e469545c019f40be2b37631144e2e54cc Binary files /dev/null and b/data/lite/full-reg/tuning-impact-elo.pdf differ diff --git a/data/lite/full-reg/tuning-impact-elo.png.zip b/data/lite/full-reg/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..78690e07dfc9a688eac9bdf64ef6e234840e997c --- /dev/null +++ b/data/lite/full-reg/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48e44fb1167bb8a9344c3a7b3bf8f4b48876308965ed7c1d23cae687e763f871 +size 129280 diff --git a/data/lite/full/figures/critical-diagram.pdf b/data/lite/full/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..603a4f6d89b2f1615ca5dee7bdb3f9eb18f8c611 Binary files /dev/null and b/data/lite/full/figures/critical-diagram.pdf differ diff --git a/data/lite/full/figures/critical-diagram.png.zip b/data/lite/full/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..7c6b7f6cf494dea9d0918f3fa500b978623344b4 --- /dev/null +++ b/data/lite/full/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8ff960f4ae97033ab8ca8cd56ce1f4d54e7f49be4b3f0cf96b17e2cb81321c5a +size 296265 diff --git a/data/lite/full/leaderboard.tex b/data/lite/full/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..ecba59b431c65cfd55adcb50fc1875f6e65610c3 --- /dev/null +++ b/data/lite/full/leaderboard.tex @@ -0,0 +1,49 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1582${}_{-29,+36}$}} & \textcolor{gold}{\textbf{0.608}} & \textcolor{gold}{\textbf{6.3}} & \textcolor{gold}{\textbf{2.7}} & \textcolor{gold}{\textbf{10}} & \textcolor{gold}{\textbf{5.3\%}} & 1453.27 & 3.15 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1479${}_{-34,+27}$}} & \textcolor{silver}{\textbf{0.499}} & \textcolor{silver}{\textbf{9.4}} & 5.1 & 2 & \textcolor{bronze}{\textbf{7.9\%}} & 6026.47 & 3.13 \\ +LightGBM (T+E) & \textcolor{bronze}{\textbf{1457${}_{-27,+30}$}} & 0.437 & \textcolor{bronze}{\textbf{10.2}} & 7.3 & 0 & 9.3\% & 416.56 & 2.24 \\ +TabM (T+E) & 1449${}_{-34,+24}$ & \textcolor{bronze}{\textbf{0.491}} & 10.5 & \textcolor{bronze}{\textbf{4.6}} & 4 & \textcolor{silver}{\textbf{7.7\%}} & 3133.91 & 1.27 \\ +CatBoost (T+E) & 1422${}_{-25,+30}$ & 0.419 & 11.5 & 7.2 & 0 & 8.4\% & 1665.53 & 0.56 \\ +ModernNCA (T+E) & 1406${}_{-32,+29}$ & 0.432 & 12.1 & \textcolor{silver}{\textbf{3.9}} & \textcolor{bronze}{\textbf{6}} & 8.7\% & 4618.50 & 7.74 \\ +CatBoost (T) & 1400${}_{-34,+20}$ & 0.403 & 12.4 & 7.4 & 0 & 9.0\% & 1665.53 & 0.07 \\ +XGBoost (T+E) & 1398${}_{-23,+28}$ & 0.349 & 12.4 & 6.8 & 2 & 9.8\% & 700.96 & 1.44 \\ +TabM (T) & 1381${}_{-32,+21}$ & 0.400 & 13.2 & 6.0 & 2 & 8.7\% & 3133.91 & 0.13 \\ +LightGBM (T) & 1381${}_{-20,+24}$ & 0.347 & 13.2 & 9.3 & 0 & 10.2\% & 416.56 & 0.38 \\ +XGBoost (T) & 1372${}_{-23,+28}$ & 0.299 & 13.5 & 8.1 & 1 & 10.1\% & 700.96 & 0.21 \\ +CatBoost (D) & 1370${}_{-32,+29}$ & 0.356 & 13.5 & 8.0 & 2 & 9.8\% & 6.70 & 0.09 \\ +ModernNCA (T) & 1363${}_{-27,+21}$ & 0.297 & 13.9 & 6.1 & 2 & 9.4\% & 4618.50 & 0.47 \\ +RealMLP (T) & 1308${}_{-29,+31}$ & 0.238 & 16.3 & 11.3 & 0 & 11.5\% & 6026.47 & 0.17 \\ +TabM (D) & 1301${}_{-19,+27}$ & 0.275 & 16.5 & 8.7 & 1 & 11.8\% & 11.56 & 0.13 \\ +TorchMLP (T+E) & 1277${}_{-26,+26}$ & 0.209 & 17.6 & 10.6 & 1 & 11.5\% & 2832.80 & 1.80 \\ +EBM (T+E) & 1243${}_{-24,+23}$ & 0.175 & 19.1 & 8.3 & 2 & 14.9\% & 1323.39 & 0.18 \\ +ModernNCA (D) & 1237${}_{-29,+21}$ & 0.153 & 19.6 & 10.7 & 1 & 14.6\% & 13.74 & 0.32 \\ +TabDPT (D) & 1233${}_{-27,+25}$ & 0.293 & 19.6 & 4.9 & \textcolor{silver}{\textbf{7}} & 13.8\% & 20.56 & 8.62 \\ +ExtraTrees (T+E) & 1209${}_{-27,+24}$ & 0.117 & 20.7 & 11.9 & 0 & 15.0\% & 191.44 & 0.76 \\ +RealMLP (D) & 1204${}_{-23,+23}$ & 0.102 & 20.9 & 11.6 & 1 & 13.4\% & 34.70 & 0.18 \\ +EBM (T) & 1201${}_{-30,+24}$ & 0.126 & 21.0 & 9.1 & 2 & 15.6\% & 1323.39 & 0.02 \\ +TorchMLP (T) & 1201${}_{-25,+25}$ & 0.103 & 21.0 & 15.6 & 0 & 13.4\% & 2832.80 & 0.11 \\ +XGBoost (D) & 1191${}_{-25,+32}$ & 0.106 & 21.4 & 13.0 & 1 & 13.4\% & 2.06 & 0.12 \\ +FastaiMLP (T+E) & 1179${}_{-27,+27}$ & 0.152 & 22.0 & 13.4 & 0 & 15.7\% & 594.95 & 4.65 \\ +ExtraTrees (T) & 1178${}_{-24,+22}$ & 0.092 & 22.1 & 10.6 & 0 & 15.9\% & 191.44 & 0.10 \\ +RandomForest (T+E) & 1167${}_{-27,+20}$ & 0.096 & 22.6 & 12.0 & 1 & 15.5\% & 377.08 & 0.75 \\ +EBM (D) & 1164${}_{-24,+24}$ & 0.125 & 22.7 & 11.7 & 1 & 16.5\% & 5.48 & 0.06 \\ +LightGBM (D) & 1162${}_{-33,+25}$ & 0.074 & 22.8 & 19.5 & 0 & 14.5\% & 2.20 & 0.17 \\ +RandomForest (T) & 1120${}_{-24,+21}$ & 0.069 & 24.6 & 17.9 & 0 & 16.4\% & 377.08 & 0.09 \\ +FastaiMLP (T) & 1109${}_{-22,+25}$ & 0.070 & 25.0 & 16.3 & 0 & 17.4\% & 594.95 & 0.34 \\ +TorchMLP (D) & 1029${}_{-27,+26}$ & 0.019 & 28.2 & 25.1 & 0 & 19.1\% & 8.96 & 0.13 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.008 & 29.2 & 24.0 & 0 & 22.0\% & 0.43 & 0.05 \\ +FastaiMLP (D) & 983${}_{-28,+23}$ & 0.019 & 29.9 & 26.8 & 0 & 21.6\% & 3.12 & 0.31 \\ +ExtraTrees (D) & 971${}_{-28,+28}$ & 0.023 & 30.2 & 26.2 & 0 & 23.9\% & 0.26 & 0.05 \\ +Linear (T+E) & 878${}_{-25,+30}$ & 0.031 & 33.1 & 20.1 & 1 & 32.2\% & 47.11 & 0.16 \\ +Linear (T) & 842${}_{-29,+31}$ & 0.019 & 34.0 & 28.0 & 0 & 32.8\% & 47.11 & 0.06 \\ +Linear (D) & 810${}_{-31,+29}$ & 0.014 & 34.8 & 32.8 & 0 & 34.2\% & 1.53 & 0.09 \\ +KNN (T+E) & 724${}_{-30,+42}$ & 0.000 & 36.5 & 31.4 & 0 & 45.4\% & 3.13 & 0.16 \\ +KNN (T) & 651${}_{-34,+35}$ & 0.000 & 37.7 & 28.1 & 0 & 47.1\% & 3.13 & 0.03 \\ +KNN (D) & 462${}_{-59,+47}$ & 0.000 & 39.8 & 39.4 & 0 & 54.4\% & 0.05 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/full/tabarena_leaderboard.csv b/data/lite/full/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..371d69e0973c4710e9432400b4a80989bddba4e1 --- /dev/null +++ b/data/lite/full/tabarena_leaderboard.csv @@ -0,0 +1,42 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),8355.364120885437,32.82596967734543,2898.240203433117,4.500812758268476,0.39210856520445686,0.3302537893793854,0.05262855529794473,0.034206828128183674,41965.2048341603,616.5290381859439,6.284313725490196,0.20731,7964.962425470352,4.336968660354614,1453.2679445956664,3.1473262800156863,0.33725328327072535,0.2650602409638539,0.023462982365024088,0.012475884365151627,27194.492186888157,175.7696866507318,5.0,10,8,4,29,1582.5,35.5,28.9,0.8678921568627451,0.37611282305234645 +REALMLP (tuned + ensemble),73262.05650625509,15.495973965700935,6221.197059136006,7.773886071139863,0.5008360344149994,0.46824768418838447,0.07940034886892017,0.054136007823604344,176934.089463731,288.3346092703114,9.431372549019608,0.20743,24379.27822804451,5.209537744522095,6026.466890881138,3.134808014069114,0.48215030896347355,0.4879313729393421,0.03337765485241673,0.021116734936897676,123158.45385063179,235.70660025917869,9.0,2,3,5,41,1479.4,26.5,33.2,0.7892156862745098,0.19658770498159192 +GBM (tuned + ensemble),3051.8140357896395,23.19597311113395,765.6312795719994,3.893554958066526,0.5633713731527658,0.5405886174512831,0.09276981388879593,0.06907753671930644,11175.64311228686,518.7842315699711,10.156862745098039,0.2094,1670.2684843540192,3.933397054672241,416.5640684155986,2.2357814013957977,0.5860060403960241,0.5529709435194232,0.05121532164333675,0.02690185765083355,9062.280137688022,129.84563236620332,10.5,0,0,6,45,1457.3,29.1,26.5,0.7710784313725491,0.13743547238356926 +TABM (tuned + ensemble),36745.76943855192,8.411393100140142,5149.522818881001,3.299204733305397,0.5087786520610308,0.4909227993621131,0.07731815980678898,0.07456986260613699,79103.63593427226,131.94770411319155,10.470588235294118,0.20702,8818.62617468834,3.054008960723877,3133.9115732644937,1.2727714166408632,0.47020083976979304,0.5017082979395318,0.03947783977260777,0.02626206444228642,49987.707956643724,112.26280083003438,8.0,4,2,6,39,1449.2,23.6,33.3,0.763235294117647,0.21701138349898094 +CAT (tuned + ensemble),19506.186602929058,3.204344211840162,4009.454268521621,1.0312463789689519,0.580901607522528,0.5397008238063394,0.08446793583063153,0.05985441446823578,53079.26276755286,76.04544084479011,11.470588235294118,0.20994,7158.277770042419,1.5355591773986816,1665.5315644683697,0.5594151152585728,0.5700946601892775,0.4887362232633289,0.04436795044877406,0.03206777169603318,22918.764355268395,47.71552750484114,10.0,0,1,7,43,1422.4,29.3,24.2,0.7382352941176471,0.1395912997606414 +MNCA (tuned + ensemble),50606.46933756155,274.8289722134085,6200.95174646742,33.492227920869496,0.568211153208397,0.500598405650223,0.087197295730926,0.07544599505597414,106030.16699977385,1982.0660894780217,12.07843137254902,0.2069,14787.927460432053,13.568246603012085,4618.502341732761,7.737494511886756,0.5565369335340675,0.42525853964274546,0.028958734104604367,0.01997226346851291,92410.46095689958,607.7040280210158,9.0,6,4,4,37,1405.8,28.6,32.0,0.7230392156862745,0.2565106522328091 +XGB (tuned + ensemble),5938.998393348619,7.841493718764362,1213.0777679819278,3.1002279660820715,0.6509288335184475,0.6072657031369854,0.0983470702750016,0.07760111598562064,13936.971832341356,215.6442792684636,12.362745098039216,0.20933,2063.2708826065063,2.946662425994873,700.9607998746845,1.4386570273653,0.6793464009669915,0.6107024069682365,0.04410663224092293,0.036526131727220496,11430.712273277264,98.30128646762262,11.0,2,2,1,46,1398.1,27.5,22.8,0.7159313725490196,0.147424742058385 +CAT (tuned),19506.186602929058,0.4801775754666796,4009.454268521621,0.12298834764615349,0.5972104481929726,0.5865958122800419,0.08963737214853942,0.06500898144509747,53079.26276755286,9.224590724231657,12.411764705882353,0.2099,7158.277770042419,0.14083647727966309,1665.5315644683697,0.06528969211261772,0.601029599608834,0.5971428571428661,0.04664890240719677,0.03347381396493121,22918.764355268395,4.080719588963731,12.5,0,5,0,46,1399.5,19.5,33.9,0.7147058823529412,0.13479362351626448 +TABM (tuned),36745.76943855192,0.9327322455013499,5149.522818881001,0.3522030333025916,0.5996801665884344,0.5784702074771745,0.08660174882794694,0.08901417029096503,79103.63593427226,14.22083187601248,13.166666666666666,0.20774,8818.62617468834,0.25295114517211914,3133.9115732644937,0.13006170213303378,0.5745991505979262,0.565324316015965,0.04427102570326069,0.03586860769423643,49987.707956643724,9.039602142310635,12.0,2,4,1,44,1380.9,20.3,31.6,0.6958333333333333,0.16665774782829965 +GBM (tuned),3051.8140357896395,3.3178744736839745,765.6312795719994,0.6787985114862476,0.6529668965012456,0.6273401467707052,0.10219421875555401,0.08422696989128986,11175.64311228686,83.45556234970765,13.176470588235293,0.20979,1670.2684843540192,0.7316880226135254,416.5640684155986,0.3811195826851199,0.6262832446301,0.6457292820689768,0.0528253340173378,0.037227060560013035,9062.280137688022,21.854692683376335,12.0,0,1,0,50,1380.6,23.9,19.4,0.6955882352941176,0.10739573015819111 +XGB (tuned),5938.998393348619,1.6265712625840132,1213.0777679819278,0.6984321316309747,0.7007828758839686,0.6411202633019268,0.10053526954461836,0.08174521787840415,13936.971832341356,46.56690686937765,13.46078431372549,0.20927,2063.2708826065063,0.5078930854797363,700.9607998746845,0.21305428930075773,0.7223432295573234,0.651749877408692,0.050694852221570774,0.03966653110949333,11430.712273277264,15.141274536165636,13.0,1,2,0,48,1371.7,27.3,22.3,0.6884803921568627,0.12338399887380234 +CAT (default),186.54889291408014,0.4368261870215921,85.73305725415607,0.1697456044717195,0.643860524899134,0.6395421177166652,0.09823054386877146,0.07062879671333311,425.6698111187737,11.747370317227338,13.549019607843137,0.20726,25.447056531906128,0.4332277774810791,6.700347814449044,0.08827101352602937,0.6746502933037238,0.7011272074768353,0.050052492511389324,0.028281581362960058,120.27803545982526,7.036160265837067,13.5,2,0,0,49,1370.3,28.8,32.0,0.6862745098039216,0.12443112696564412 +MNCA (tuned),50606.46933756155,14.969161491768032,6200.95174646742,1.6696617082037302,0.7032607657750378,0.6010415266062749,0.0943412932745321,0.08735862037826053,106030.16699977385,104.17477306753874,13.911764705882353,0.20447,14787.927460432053,0.6251811981201172,4618.502341732761,0.46952933073043823,0.7724401098237351,0.5534523234870296,0.04798611670957431,0.039621112495213905,92410.46095689958,31.933384765725453,13.0,2,3,4,42,1363.3,21.0,26.2,0.6772058823529412,0.16498142344866515 +REALMLP (tuned),73262.05650625509,0.7424195560754514,6221.197059136006,0.41086245878139677,0.7615648419724279,0.7341614913730751,0.1152999215039502,0.09460083387981072,176934.089463731,14.464289249160522,16.333333333333332,0.21789,24379.27822804451,0.25364184379577637,6026.466890881138,0.1692739561746046,0.8260416027840659,0.7952574950381255,0.07487670278496006,0.051842729070846964,123158.45385063179,11.197261335744736,16.0,0,1,0,50,1307.5,30.1,28.9,0.6166666666666667,0.08856876619137789 +TABM (default),142.54126416468154,1.0708042359819598,23.88140219729834,0.4005178443372889,0.7249223512608948,0.6929778392911431,0.11797881241786173,0.11460578352917376,305.2739016434669,13.641685471262758,16.50980392156863,0.2124,38.16493201255798,0.17726969718933105,11.555005609392405,0.12732556548192603,0.8325407781946508,0.7373154550084271,0.05541802374036453,0.04944187412792811,191.5780246731349,9.78619298588067,15.0,1,2,1,47,1301.0,26.3,18.7,0.6122549019607844,0.1151241157988863 +NN_TORCH (tuned + ensemble),29357.971773493522,13.246924213334626,3694.4132392234865,3.45030206949641,0.7913423906704912,0.7848484091526847,0.11464529256213314,0.0944932666061636,94556.98181855923,212.38450473432735,17.647058823529413,0.21375,10620.000384569168,3.6842262744903564,2832.7960851387297,1.800873875617981,0.9196144626274988,0.8952879581151866,0.06973898858075078,0.061555288264305126,57294.450258986595,164.18272158737435,18.0,1,0,1,49,1277.4,25.5,25.2,0.5838235294117647,0.09392436864717116 +EBM (tuned + ensemble),33815.09879914452,1.2886618586147534,6555.968895975794,0.4924074534971456,0.8251828316021278,0.7795172067824816,0.14927591002247803,0.13869027102812415,41186.37921180388,19.776872049406894,19.127450980392158,0.21048,2774.86767077446,0.33999180793762207,1323.3940540554784,0.18426990509033203,0.9668458098028044,0.9579617052614268,0.09999810015670285,0.04670632297862963,17200.38772644666,11.089686715499157,19.5,2,1,2,46,1242.7,22.3,23.9,0.546813725490196,0.12047883893000756 +MNCA (default),245.15467273020278,8.083605813045128,18.524091854412294,1.1280259623754931,0.8466123430768121,0.7891582774496332,0.14592530761518233,0.10470719556789347,315.39790519179047,62.504831773745536,19.57843137254902,0.21297,40.294944524765015,0.5322227478027344,13.739717303881024,0.31576028319217697,1.0,0.9486847305783144,0.07931711197987557,0.07837929327865152,220.32243700895648,24.025524470708156,19.0,1,1,0,49,1236.8,20.3,28.4,0.5355392156862745,0.09382572707879192 +TABDPT (default),165.2279131459255,62.46656582402248,26.956914471170485,21.63883475001047,0.7067604944091354,0.7087494058546768,0.13817507824773126,0.11021274915603842,555.3035574597842,1632.683869591928,19.61764705882353,0.21757,99.63741898536682,27.656949520111084,20.559618245438234,8.61670085798515,1.0,1.0,0.06312122430720712,0.043924126087546374,445.8805538455051,1247.4102886322382,23.0,7,0,1,43,1233.0,24.7,26.9,0.5345588235294118,0.20266609507250863 +XT (tuned + ensemble),1293.0170739772273,3.137296905704573,465.97356682221573,1.4220614441065493,0.8827760158511792,0.8220010106411182,0.15003532717872833,0.12832700332494215,5360.97691412289,92.16814260531523,20.666666666666668,0.21292,775.4602122306824,2.0695900917053223,191.43562446750065,0.7604422990939085,1.0,1.0,0.10450618878352169,0.06621678695786776,3382.254534218774,83.98689534392173,23.5,0,1,1,49,1208.6,23.2,26.7,0.5083333333333333,0.08408535366388699 +REALMLP (default),445.0601024861429,0.7650933592927223,36.1037613876711,0.41223276685751575,0.8980359281995941,0.8308509834523723,0.13373726025215607,0.11326477910502476,1019.9231218617758,14.453050422610211,20.91176470588235,0.22192,142.23176908493042,0.21722769737243652,34.70485605272871,0.17513688577887485,1.0,1.0,0.09472456897469028,0.07471453513004697,689.4249262469413,12.24685971375083,22.0,1,1,1,48,1204.2,22.3,22.3,0.5022058823529412,0.08604289405151948 +EBM (tuned),33815.09879914452,0.15010464892667882,6555.968895975794,0.06527171144492658,0.8740079837962136,0.8162843951648827,0.15568360011622834,0.15027678698326022,41186.37921180388,2.297509957092046,20.99019607843137,0.21528,2774.86767077446,0.03941011428833008,1323.3940540554784,0.01867789003336745,1.0,1.0,0.10542648840521152,0.07625505458415748,17200.38772644666,1.1641602772099668,21.0,2,1,1,47,1201.2,23.5,29.3,0.5002450980392157,0.10949609386075893 +NN_TORCH (tuned),29357.971773493522,0.60710202478895,3694.4132392234865,0.18506171262100615,0.8968088714508368,0.8243014181152737,0.13401199749800358,0.11692743837018824,94556.98181855923,10.572861046995973,21.03921568627451,0.21109,10620.000384569168,0.22349023818969727,2832.7960851387297,0.11152923107147217,1.0,1.0,0.09275938505548831,0.0645493473133498,57294.450258986595,8.23840338809871,22.0,0,0,0,51,1200.8,24.8,24.3,0.49901960784313726,0.0640386668776797 +XGB (default),13.003299437317194,0.6573358236574659,3.2763217873435058,0.3062962827156932,0.8935881423667761,0.832351464501469,0.13407876791637047,0.1340688961878949,39.750102662431935,19.332008402042312,21.41176470588235,0.21746,6.575343370437622,0.39388179779052734,2.0589472404367264,0.12183857766160212,1.0,1.0,0.09289181987806672,0.06166036749443532,32.87694416794321,9.490148793740056,21.0,1,0,0,50,1191.2,31.9,24.1,0.48970588235294116,0.07672499582763596 +FASTAI (tuned + ensemble),6591.795950641819,16.245619264303468,1342.3410902155488,7.489539632560419,0.847763610444884,0.850041417566421,0.1574687700895802,0.12492134132315576,21804.052352671944,459.2322661590378,22.00980392156863,0.22665,3142.961499929428,10.290910243988037,594.9528585230638,4.650812904660155,1.0,1.0,0.09536664926417637,0.083798254174272,17514.935036133815,369.5983579935041,25.0,0,1,0,50,1179.3,26.1,26.1,0.4747549019607843,0.074815250504812 +XT (tuned),1293.0170739772273,0.34508594344643984,465.97356682221573,0.17473237760756105,0.9078855110953847,0.8260312207745636,0.1592405326093513,0.13428456008408696,5360.97691412289,10.510664591668581,22.08823529411765,0.21281,775.4602122306824,0.17961668968200684,191.43562446750065,0.10098353169828128,1.0,1.0,0.09380491800248536,0.08021624259716303,3382.254534218774,9.053845119655822,26.0,0,3,2,46,1178.1,21.3,23.8,0.4727941176470588,0.09438366419577518 +RF (tuned + ensemble),2227.1437476008546,2.46178646181144,528.9202336737816,1.2212931064922863,0.9036228102132694,0.8786814902891319,0.1552753725864734,0.14242365006205202,6570.203557563642,82.81791093485884,22.627450980392158,0.21651,876.37784075737,1.7210140228271484,377.08301133934634,0.7469802601991967,1.0,1.0,0.0779021735596892,0.0805366812460619,5196.472002023166,66.34473572972412,25.0,1,1,0,49,1166.7,19.1,26.2,0.45931372549019606,0.0830044776524399 +EBM (default),83.37771165604686,0.18329166431053012,10.41915882393762,0.09845206874115144,0.8753701223112293,0.8398716035121305,0.16546206780921466,0.15741137890587475,128.33936987969741,3.956299459273312,22.735294117647058,0.21098,11.009112119674683,0.06519913673400879,5.48102419993599,0.05915899401478989,1.0,1.0,0.11849000724321845,0.05913846723637446,66.69318849626488,3.0422808378588053,22.0,1,1,0,49,1164.5,23.6,24.0,0.4566176470588235,0.08576505654731183 +GBM (default),9.322186521455354,1.1440442029167623,3.199181885596564,0.3147197775938453,0.9255861200115366,0.8853545889728972,0.14468947804456891,0.13537583122171876,46.93362314737936,31.99287107427668,22.833333333333332,0.21883,6.97411584854126,0.6232860088348389,2.2021467310523017,0.17114277689525664,1.0,1.0,0.10001855631842682,0.07199602620901613,26.977037743860404,12.72044936701149,23.0,0,0,0,51,1161.6,24.3,33.0,0.45416666666666666,0.05125613970314824 +RF (tuned),2227.1437476008546,0.2597549288880591,528.9202336737816,0.14418858473403337,0.9311148829801792,0.92375209078571,0.1641560700434038,0.15388188759783827,6570.203557563642,8.720931698010416,24.57843137254902,0.21762,876.37784075737,0.15849757194519043,377.08301133934634,0.09141294871228287,1.0,1.0,0.10216101280237799,0.10332282318699906,5196.472002023166,7.735832448019426,26.0,0,1,0,50,1120.1,20.5,23.9,0.4105392156862745,0.05571163028158867 +FASTAI (tuned),6591.795950641819,1.031626776152966,1342.3410902155488,0.6256477184380718,0.930127335805145,0.9022827733096105,0.1739573780334621,0.15385229476843082,21804.052352671944,39.15043952663921,25.019607843137255,0.23007,3142.961499929428,0.8062961101531982,594.9528585230638,0.33651872811068495,1.0,1.0,0.11504721620397895,0.09939086593924433,17514.935036133815,29.379474918801876,27.0,0,1,0,50,1108.9,24.3,21.9,0.39950980392156865,0.06120749367857115 +NN_TORCH (default),73.82550601865731,0.5703827493331012,14.544520367121898,0.24243997995483998,0.9806438127346997,0.9595306239056958,0.19082179548446107,0.1747856758204516,302.60238317827014,12.920116170611008,28.186274509803923,0.21872,30.76021695137024,0.2736239433288574,8.95763915486452,0.12885630130767822,1.0,1.0,0.12094953272557762,0.11098259528530495,159.50005812712556,8.385666110468058,29.0,0,0,0,51,1029.3,26.0,26.1,0.320343137254902,0.03990968700701428 +RF (default),5.493931228039312,0.18195290658988206,0.9395711378622816,0.07441601332167096,0.9921615843984544,0.9757632253928593,0.21950789037203453,0.22433371107540817,12.413845548911803,5.3715309875205834,29.19607843137255,0.23701,1.763962745666504,0.09250092506408691,0.4316611380184793,0.05251745318464858,1.0,1.0,0.16975626521036435,0.14994321754718537,7.440479734614894,4.454035040702154,31.0,0,0,1,50,1000.0,0.0,0.0,0.2950980392156863,0.041602392383395734 +FASTAI (default),27.62558492492227,1.0500701642503925,4.982714620465621,0.48138517234406897,0.980597127305428,0.957774206718109,0.21621464409768376,0.2026444235642231,85.33850461343444,31.816885619382305,29.88235294117647,0.23658,14.524733304977417,0.6894059181213379,3.1153293528650394,0.31195542895805195,1.0,1.0,0.14919440786218285,0.12871609012091897,80.02261780548187,26.24796931235529,31.0,0,0,0,51,982.7,22.9,27.1,0.27794117647058825,0.037299790135517735 +XT (default),3.237064623365215,0.20349772771199545,0.7705107029297205,0.07802443108906316,0.976846527522819,0.960990770880773,0.23896991919713237,0.2466584585336881,6.714510413684847,5.481544853676871,30.19607843137255,0.24149,0.9253263473510742,0.08609175682067871,0.25976606260770435,0.0542029349444295,1.0,1.0,0.17303432896286253,0.14892207194946744,5.482206453649765,4.486837961877324,34.0,0,0,0,51,970.8,27.3,27.4,0.27009803921568626,0.03820248213176148 +LR (tuned + ensemble),297.7768230718725,1.372081803340538,106.57523410916248,0.517828397925699,0.9690155979901051,0.9782757252764236,0.32233254993967364,0.36987288970790627,1284.9569849451952,23.77736863105658,33.088235294117645,0.24535,167.935129404068,0.30171775817871094,47.11049555587997,0.15632276260139144,1.0,1.0,0.26503228118443345,0.25374607764754514,878.5162567525001,9.250054132082282,36.0,1,0,0,50,878.4,29.7,24.9,0.19779411764705881,0.04979838546974998 +LR (tuned),297.7768230718725,0.41669238314909096,106.57523410916248,0.1447260883613805,0.9805057099085136,0.984953842895582,0.3284338273581061,0.3824677470626182,1284.9569849451952,6.250094556288328,33.98039215686274,0.25329,167.935129404068,0.11377191543579102,47.11049555587997,0.0626428060263515,1.0,1.0,0.2768258727778956,0.2536050260691558,878.5162567525001,4.008480462038967,36.5,0,0,1,50,842.1,30.3,28.7,0.17549019607843136,0.03571176058478315 +LR (default),7.020498261732214,0.4395605956806856,2.5089988631155826,0.17274864462745904,0.9855950536843827,0.9895940411825562,0.3421563043618267,0.42987229655143505,32.25963024407083,8.287063835475498,34.81372549019608,0.25334,5.345212697982788,0.13631606101989746,1.534023110424607,0.08993275118189932,1.0,1.0,0.2768258727778956,0.31807101339614474,17.290213667932342,5.167879066497201,37.0,0,0,0,51,810.4,28.3,31.0,0.15465686274509804,0.030479041022879558 +KNN (tuned + ensemble),191.27347564697266,6.6349917159361,29.50398939167977,0.5506054195292707,1.0,0.987210683055718,0.4540324837867108,0.6121364924236414,81.3768199133465,45.17733544160728,36.509803921568626,0.34601,13.195080518722534,0.23362517356872559,3.133583813905716,0.15550157631257452,1.0,1.0,0.4085005377667177,0.6874431886605319,62.764963980484396,12.85103075181078,39.0,0,0,0,51,723.5,41.5,30.0,0.11225490196078432,0.031857808234694314 +KNN (tuned),191.27347564697266,1.469562432345222,29.50398939167977,0.11124081145987941,1.0,0.9885254568237573,0.47147883710225513,0.6579092058528593,81.3768199133465,9.204095260134709,37.69607843137255,0.34503,13.195080518722534,0.07799005508422852,3.133583813905716,0.030069828033447266,1.0,1.0,0.456326948689309,0.7494456544265635,62.764963980484396,2.2192320534223704,40.0,0,1,0,50,650.8,34.4,33.8,0.08259803921568627,0.03557390516175632 +KNN (default),1.8986088995840036,0.1759517753825468,0.5259164666571766,0.03511341794167653,1.0,1.0,0.5437359502735426,0.9177342094214581,1.049764217774364,2.1650106683540193,39.794117647058826,0.42464,0.2273859977722168,0.03137516975402832,0.0509044812778989,0.01926569938659668,1.0,1.0,0.5148544563369014,1.0,1.0,1.4495271718572689,41.0,0,0,0,51,462.3,46.1,58.9,0.030147058823529412,0.02537743831589345 diff --git a/data/lite/full/time_plot.pdf b/data/lite/full/time_plot.pdf new file mode 100644 index 0000000000000000000000000000000000000000..969dfadbd4236ef005d0ee18494eb219ce9112a4 Binary files /dev/null and b/data/lite/full/time_plot.pdf differ diff --git a/data/lite/full/time_plot.png.zip b/data/lite/full/time_plot.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..8b42c4b6e2232a76fd9e3d1f5ee7ee2f360f2d09 --- /dev/null +++ b/data/lite/full/time_plot.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f0389b7f203eb4d39d6b6a3da311337a46c01621e12fc206d904eff8ce58527 +size 337899 diff --git a/data/lite/full/tuning-impact-elo-horizontal.pdf b/data/lite/full/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0d17bd96b8ec531addaf466b220f5684d2c39680 Binary files /dev/null and b/data/lite/full/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/full/tuning-impact-elo-horizontal.png.zip b/data/lite/full/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..b65ab41d5845c5dcf678a9654d6135af282cba29 --- /dev/null +++ b/data/lite/full/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:717887ad1b2151bca8f71879dd6dc9ea0173eebf51e41d1f983b03864a6be794 +size 128322 diff --git a/data/lite/full/tuning-impact-elo.pdf b/data/lite/full/tuning-impact-elo.pdf new file mode 100644 index 0000000000000000000000000000000000000000..912f03361cb9baf857f7b1e25c537adcc1b1bcbe Binary files /dev/null and b/data/lite/full/tuning-impact-elo.pdf differ diff --git a/data/lite/full/tuning-impact-elo.png.zip b/data/lite/full/tuning-impact-elo.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..ee51afa28d858e85d8a16f6aeaf3dc7e86aef758 --- /dev/null +++ b/data/lite/full/tuning-impact-elo.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a76c62115b0ed7fe8d657e2a60f9345f3b722b683797f51beddab991413d1c0 +size 122269 diff --git a/data/lite/tabicl-cls/figures/critical-diagram.pdf b/data/lite/tabicl-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..230e5237b8ba9e27c899ddb6d270c0ef3939635a Binary files /dev/null and b/data/lite/tabicl-cls/figures/critical-diagram.pdf differ diff --git a/data/lite/tabicl-cls/figures/critical-diagram.png.zip b/data/lite/tabicl-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..5473e45838602b2408c048ae6f15679b205af04e --- /dev/null +++ b/data/lite/tabicl-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b62643ca543a5b490148fb916ed1fdf862546abf3d6ecb1a20ed75b16e37b3f +size 312252 diff --git a/data/lite/tabicl-cls/leaderboard.tex b/data/lite/tabicl-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..cfdf59ad945f6644a84f41b36416d63959828e82 --- /dev/null +++ b/data/lite/tabicl-cls/leaderboard.tex @@ -0,0 +1,50 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1544${}_{-34,+36}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{7.4}} & \textcolor{gold}{\textbf{2.9}} & \textcolor{gold}{\textbf{7}} & \textcolor{gold}{\textbf{6.9\%}} & 1239.48 & 1.98 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1445${}_{-33,+31}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{silver}{\textbf{10.9}} & \textcolor{bronze}{\textbf{4.4}} & \textcolor{bronze}{\textbf{3}} & \textcolor{bronze}{\textbf{9.8\%}} & 2261.57 & 1.26 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1441${}_{-37,+31}$}} & 0.418 & \textcolor{bronze}{\textbf{11.0}} & 6.6 & 0 & 10.5\% & 5429.82 & 2.94 \\ +LightGBM (T+E) & 1420${}_{-29,+29}$ & 0.389 & 11.8 & 8.6 & 0 & 11.6\% & 368.47 & 1.22 \\ +TabICL (D) & 1407${}_{-35,+24}$ & \textcolor{silver}{\textbf{0.521}} & 12.4 & \textcolor{silver}{\textbf{4.2}} & \textcolor{silver}{\textbf{5}} & \textcolor{silver}{\textbf{9.3\%}} & 9.07 & 2.11 \\ +CatBoost (T+E) & 1403${}_{-33,+28}$ & 0.377 & 12.6 & 7.3 & 0 & 10.5\% & 1207.49 & 0.52 \\ +XGBoost (T+E) & 1395${}_{-22,+30}$ & 0.320 & 12.7 & 6.8 & 1 & 12.1\% & 655.54 & 0.78 \\ +ModernNCA (T) & 1385${}_{-31,+30}$ & 0.275 & 13.3 & 6.2 & 2 & 10.7\% & 4498.66 & 0.45 \\ +CatBoost (T) & 1375${}_{-26,+30}$ & 0.360 & 13.7 & 9.0 & 0 & 11.2\% & 1207.49 & 0.05 \\ +ModernNCA (T+E) & 1375${}_{-33,+32}$ & 0.383 & 13.6 & 5.2 & 2 & 10.8\% & 4498.66 & 8.48 \\ +TabM (T) & 1373${}_{-32,+31}$ & 0.409 & 13.7 & 5.5 & 2 & 10.8\% & 2261.57 & 0.13 \\ +CatBoost (D) & 1372${}_{-28,+27}$ & 0.343 & 13.8 & 7.4 & 2 & 11.6\% & 5.18 & 0.09 \\ +XGBoost (T) & 1359${}_{-27,+27}$ & 0.264 & 14.3 & 8.1 & 1 & 12.4\% & 655.54 & 0.16 \\ +LightGBM (T) & 1355${}_{-31,+27}$ & 0.302 & 14.6 & 9.8 & 0 & 12.6\% & 368.47 & 0.12 \\ +TabM (D) & 1308${}_{-24,+27}$ & 0.272 & 16.7 & 7.6 & 1 & 14.3\% & 9.30 & 0.13 \\ +RealMLP (T) & 1289${}_{-28,+35}$ & 0.198 & 17.5 & 11.4 & 0 & 14.3\% & 5429.82 & 0.17 \\ +EBM (T+E) & 1289${}_{-37,+29}$ & 0.170 & 17.5 & 7.2 & 2 & 16.1\% & 852.29 & 0.21 \\ +TorchMLP (T+E) & 1283${}_{-34,+27}$ & 0.228 & 17.7 & 11.2 & 0 & 13.5\% & 2406.10 & 2.04 \\ +EBM (T) & 1242${}_{-28,+33}$ & 0.110 & 19.7 & 10.4 & 0 & 16.8\% & 852.29 & 0.02 \\ +FastaiMLP (T+E) & 1224${}_{-32,+25}$ & 0.207 & 20.7 & 11.5 & 0 & 17.6\% & 581.37 & 4.78 \\ +EBM (D) & 1211${}_{-32,+27}$ & 0.119 & 21.3 & 10.0 & 1 & 17.6\% & 4.40 & 0.05 \\ +TorchMLP (T) & 1199${}_{-30,+29}$ & 0.085 & 21.9 & 16.9 & 0 & 15.9\% & 2406.10 & 0.11 \\ +XGBoost (D) & 1198${}_{-26,+31}$ & 0.102 & 21.7 & 12.0 & 1 & 15.7\% & 1.66 & 0.12 \\ +RealMLP (D) & 1197${}_{-28,+35}$ & 0.095 & 21.8 & 11.7 & 1 & 15.9\% & 34.62 & 0.17 \\ +ModernNCA (D) & 1192${}_{-27,+28}$ & 0.142 & 22.1 & 11.1 & 1 & 17.3\% & 12.99 & 0.34 \\ +ExtraTrees (T+E) & 1182${}_{-28,+26}$ & 0.082 & 22.6 & 15.7 & 0 & 17.7\% & 187.87 & 0.72 \\ +RandomForest (T+E) & 1155${}_{-25,+37}$ & 0.067 & 23.8 & 11.0 & 1 & 18.0\% & 266.25 & 0.64 \\ +TabDPT (D) & 1148${}_{-35,+28}$ & 0.186 & 24.3 & 7.6 & 2 & 18.6\% & 20.26 & 8.55 \\ +ExtraTrees (T) & 1146${}_{-34,+33}$ & 0.056 & 24.3 & 12.6 & 0 & 18.8\% & 187.87 & 0.08 \\ +FastaiMLP (T) & 1142${}_{-36,+29}$ & 0.094 & 24.5 & 15.3 & 0 & 19.7\% & 581.37 & 0.33 \\ +LightGBM (D) & 1135${}_{-33,+24}$ & 0.053 & 24.8 & 21.5 & 0 & 17.6\% & 1.64 & 0.15 \\ +RandomForest (T) & 1110${}_{-28,+35}$ & 0.045 & 25.8 & 17.3 & 0 & 18.8\% & 266.25 & 0.07 \\ +TorchMLP (D) & 1026${}_{-34,+29}$ & 0.022 & 29.3 & 25.6 & 0 & 22.2\% & 5.44 & 0.14 \\ +FastaiMLP (D) & 1005${}_{-34,+29}$ & 0.027 & 30.1 & 26.3 & 0 & 23.6\% & 3.04 & 0.30 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 30.3 & 23.4 & 0 & 25.3\% & 0.35 & 0.03 \\ +Linear (T+E) & 946${}_{-30,+33}$ & 0.044 & 32.2 & 17.1 & 1 & 30.4\% & 44.56 & 0.16 \\ +ExtraTrees (D) & 928${}_{-33,+33}$ & 0.011 & 32.8 & 28.7 & 0 & 28.3\% & 0.24 & 0.04 \\ +Linear (T) & 912${}_{-27,+36}$ & 0.028 & 33.2 & 25.8 & 0 & 31.1\% & 44.56 & 0.06 \\ +Linear (D) & 888${}_{-34,+26}$ & 0.020 & 34.0 & 31.7 & 0 & 32.1\% & 1.42 & 0.09 \\ +KNN (T+E) & 748${}_{-36,+40}$ & 0.000 & 37.3 & 30.3 & 0 & 49.6\% & 3.50 & 0.16 \\ +KNN (T) & 676${}_{-43,+42}$ & 0.000 & 38.5 & 25.8 & 0 & 51.7\% & 3.50 & 0.03 \\ +KNN (D) & 457${}_{-75,+55}$ & 0.000 & 40.9 & 40.5 & 0 & 60.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabicl-cls/tabarena_leaderboard.csv b/data/lite/tabicl-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..a1569fad5517e62455bb72ecc7789a0de91e3da1 --- /dev/null +++ b/data/lite/tabicl-cls/tabarena_leaderboard.csv @@ -0,0 +1,43 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7748.990037812127,20.667584995428722,2786.367822030483,2.9686980518587505,0.4305752039617605,0.3877041747388167,0.06883082832229188,0.04066258382676245,31469.589611836727,278.09709780834015,7.402777777777778,0.157955,6468.6270185709,3.7256141901016235,1239.480006130907,1.9841131308583402,0.3593825434326649,0.3109231173840635,0.04103436500005686,0.014103198961668492,23460.49845320325,142.30518626073365,6.0,7,3,3,23,1543.7,35.2,34.0,0.8438346883468835,0.3436916810221074 +TABM (tuned + ensemble),32822.329592857095,6.718376735846202,3197.494145595591,1.5968882956786812,0.5092283925039975,0.507000043838314,0.0984603164003323,0.08012828297402202,47368.16918956869,126.38148088436719,10.916666666666666,0.179425,8087.822376012802,2.9963064193725586,2261.5736774780025,1.258481528676095,0.47136645123500964,0.5108291907783599,0.060615421067887476,0.023331148793904504,38436.80992013264,113.73588992793104,8.5,3,2,4,27,1444.7,30.6,32.5,0.758130081300813,0.22495824385743393 +REALMLP (tuned + ensemble),78236.15068201887,12.508955962128109,5830.1148293797,4.304066546062194,0.5822217846671746,0.540978236726168,0.1054473766976048,0.06442528212733184,142054.31198219568,287.5042196718009,11.0,0.172085,23325.48352110386,5.581948518753052,5429.819541400706,2.9376822787963643,0.5258237901453162,0.524882615107501,0.06227050818890584,0.031582684665151774,93177.39784523446,229.06556046190957,11.0,0,2,3,31,1441.2,30.9,36.7,0.7560975609756098,0.1515892708417218 +GBM (tuned + ensemble),2716.464209013515,11.650436507331001,652.7081231589766,2.3316901429136103,0.6113071815088934,0.6115648021204206,0.11626709694511878,0.0824232864688556,8680.115930381902,215.8804964577198,11.75,0.17347,1527.4595284461975,3.0867438316345215,368.4672845277482,1.2233637093106904,0.6534872634143774,0.6466968889868911,0.07038437664020675,0.02975866915001093,6991.1315945904835,101.20971107477635,12.0,0,0,3,33,1420.3,28.8,28.8,0.7378048780487805,0.11670989654152485 +TABICL (default),122.91359008020825,22.535752693812054,9.963658601166212,2.414261101064252,0.4788051732855598,0.5443303646525952,0.09266040289556793,0.06762151146287841,179.76964423723584,285.61149302454663,12.402777777777779,0.17301,25.354220509529114,3.4422082901000977,9.073519792448254,2.1146179912287675,0.4992564187967412,0.5686403350371668,0.050317184697893425,0.021088283054317383,144.31099154558206,133.6960355402875,12.25,5,2,1,28,1406.6,23.5,34.5,0.7218834688346883,0.24049138766638822 +CAT (tuned + ensemble),14495.012520273527,2.4179346097840204,2435.7734425933513,0.7431584046715475,0.6226459246322733,0.5724409794042533,0.10496616447275113,0.0672300454324769,26875.894999800756,51.46768059921607,12.583333333333334,0.16073500000000002,4554.886538267136,1.2364250421524048,1207.4876040106037,0.5178688292431316,0.6153632608570143,0.555056424681614,0.05369915486940119,0.03126669058324277,20500.32079395515,42.031537823549,11.75,0,1,4,31,1402.6,27.7,32.6,0.717479674796748,0.13630557293973747 +XGB (tuned + ensemble),5342.747825033135,6.078431023491754,903.1395181375539,2.4252994962785444,0.6804014135907539,0.6264870966428019,0.12111620645907134,0.08995137231528924,10629.483194714896,137.278973628425,12.680555555555555,0.17348,1566.1410616636276,2.2440043687820435,655.5367556867432,0.7811829897319023,0.7126419091535116,0.6325401095781271,0.09321543161193863,0.03379706826049186,7509.936420425425,78.99719729544988,10.5,1,2,1,32,1395.2,29.8,21.5,0.7151084010840109,0.14600195845562308 +MNCA (tuned),57242.54890214073,19.57283937268787,5418.777655567625,1.42346301375637,0.7251534847407544,0.5849087413461024,0.10710451817631175,0.08805735190470587,83536.32309061421,128.61468675559033,13.319444444444445,0.17875999999999997,13889.081128954887,0.5763018131256104,4498.662765534045,0.4537708228509102,0.7744612480983963,0.5544791639834478,0.0863080541794865,0.030308265873669055,68177.28399410362,31.783037323749284,11.5,2,0,1,33,1384.6,29.3,30.1,0.6995257452574526,0.1615009946329935 +MNCA (tuned + ensemble),57242.54890214073,352.26783115996255,5418.777655567625,26.036546991587294,0.6174214208973879,0.5493942592841631,0.10758973700607603,0.09075466007453252,83536.32309061421,2422.151854876793,13.63888888888889,0.18984499999999999,13889.081128954887,13.45927095413208,4498.662765534045,8.47783782257851,0.6265388929939845,0.5206641944860473,0.06347089264029215,0.03645962776745639,68177.28399410362,597.4215710672797,12.0,2,1,6,27,1374.6,32.0,32.5,0.6917344173441734,0.19092899712791367 +TABM (tuned),32822.329592857095,0.8157637053065829,3197.494145595591,0.18466029782681834,0.5912555585094392,0.5781476073453056,0.10824063992837979,0.09522303955431258,47368.16918956869,14.021720853154184,13.680555555555555,0.180525,8087.822376012802,0.23180127143859863,2261.5736774780025,0.12874845474998858,0.5877464635288101,0.5650956393005605,0.06311117459522003,0.03460069818708005,38436.80992013264,9.17800659262771,11.0,2,3,0,31,1373.2,30.4,32.0,0.6907181571815718,0.1806193600709681 +CAT (tuned),14495.012520273527,0.51534503698349,2435.7734425933513,0.10399437886071032,0.6397756322608608,0.6322254665313207,0.11211575515013829,0.07212514514540504,26875.894999800756,8.46801242481876,13.708333333333334,0.16238999999999998,4554.886538267136,0.0815126895904541,1207.4876040106037,0.0500282797740022,0.6663747291561838,0.6867488983507399,0.06911800649977579,0.037329065440735124,20500.32079395515,3.2560454821039713,13.0,0,1,1,34,1375.2,29.1,25.7,0.6900406504065041,0.11055045510865025 +CAT (default),224.62928151422076,0.4154767062928941,111.70337568642236,0.13827596961428462,0.6569717560090025,0.6468762263410741,0.11626850131699006,0.07278525873435686,447.23099485428673,10.877554497735453,13.76388888888889,0.161695,17.568647384643555,0.40540146827697754,5.180417923757872,0.0935400303065973,0.6872314742824219,0.7382324728488332,0.07896512392539878,0.02932904358303992,99.87500924350084,7.319630010463586,13.75,2,0,0,34,1371.6,26.1,27.9,0.6886856368563685,0.13443131358206103 +XGB (tuned),5342.747825033135,1.2801591290367975,903.1395181375539,0.639985957335127,0.7363747567107989,0.6713345405972482,0.12390832771884491,0.09571764627414511,10629.483194714896,33.68776538848152,14.305555555555555,0.176915,1566.1410616636276,0.3626103401184082,655.5367556867432,0.15681948639835536,0.7444722954009122,0.6983685877302974,0.10245426870569208,0.04170721330284183,7509.936420425425,13.242651903663184,13.0,1,1,0,34,1359.0,26.7,26.8,0.6754742547425474,0.12394509846685876 +GBM (tuned),2716.464209013515,1.8839820755852594,652.7081231589766,0.5269472540571296,0.6982904100452502,0.6749309433374803,0.12589415781062138,0.09744390266808721,8680.115930381902,44.39170809497161,14.555555555555555,0.17537,1527.4595284461975,0.5379931926727295,368.4672845277482,0.11972777459115919,0.7332165833714938,0.7275226403809433,0.07614432042371466,0.03541051328529879,6991.1315945904835,14.435205317774814,15.5,0,1,0,35,1355.3,26.2,30.5,0.6693766937669376,0.10207008030434894 +TABM (default),140.8987251520157,0.9393433663580153,14.983795328833262,0.18062205057314412,0.7280741356398281,0.6887171968757086,0.1434354286809727,0.11695380949114573,217.11963453230365,13.588062902891982,16.694444444444443,0.18336999999999998,29.720080256462097,0.17920684814453125,9.303649390343331,0.12625846443862987,0.8404844924976476,0.7247027492327622,0.09137336521284833,0.04657437619358792,136.86363214905396,9.928949591639647,15.0,1,2,1,32,1308.3,26.2,23.4,0.6172086720867209,0.1309363094547008 +REALMLP (tuned),78236.15068201887,0.5794834030999078,5830.1148293797,0.2343189481249011,0.8023310570642167,0.7611882470431272,0.14345077264424028,0.10287611571863375,142054.31198219568,14.164064071067262,17.47222222222222,0.17396499999999998,23325.48352110386,0.2515758275985718,5429.819541400706,0.1667243421007542,0.8787741747777617,0.848557552697969,0.12726966477565288,0.052454140783326594,93177.39784523446,11.047988768711498,17.0,0,1,0,35,1289.1,34.7,27.8,0.5982384823848238,0.08738586997653082 +EBM (tuned + ensemble),23286.664896190166,1.091496765613556,2120.3766035360445,0.27276807805390907,0.8303965578080257,0.7468568741683155,0.16075450104299963,0.12694848030107259,22277.212479524587,20.94625622434944,17.541666666666668,0.181755,2063.8647414445877,0.34018075466156006,852.2924105567174,0.2147416876316487,0.9326307523783561,0.9041078057334899,0.07435566243697694,0.040508730984801566,14796.589958532411,12.4343949892923,19.0,2,1,0,33,1289.1,28.5,36.1,0.5965447154471545,0.1391964347831879 +NN_TORCH (tuned + ensemble),24905.959744539527,15.398705760637919,2952.9414864535283,2.849701545461187,0.7723767872546564,0.7802343502313487,0.13521206862305007,0.09239504445258413,57128.74445660633,251.675577261934,17.72222222222222,0.174655,8823.152364969254,4.023689031600952,2406.0981194606593,2.04339307286537,0.9031626632034823,0.896307171089784,0.07810139442157471,0.051810123946366915,44532.50472025028,186.31979424186875,18.0,0,1,1,34,1282.8,26.1,33.4,0.592140921409214,0.08923776569295938 +EBM (tuned),23286.664896190166,0.12520347701178658,2120.3766035360445,0.03546501889806546,0.8902973079447266,0.8051099726251761,0.1679622875400496,0.13349605155397093,22277.212479524587,2.4619900653558613,19.708333333333332,0.181165,2063.8647414445877,0.03867542743682861,852.2924105567174,0.019590150951601572,1.0,0.996706192358367,0.08520888190451481,0.038628140113403854,14796.589958532411,1.2081964035400725,21.5,0,2,1,33,1241.5,32.3,27.7,0.5436991869918699,0.09584534731902562 +FASTAI (tuned + ensemble),7289.840611391597,19.15695581171248,1303.7963220052682,8.14447528633287,0.7926888983747619,0.80077618248164,0.17570272525312422,0.10287009121624455,19518.83385555907,536.4983396151715,20.680555555555557,0.17774,2935.3402432203293,12.184540390968323,581.3698825429726,4.7809972969941885,0.9972327674497973,1.0,0.10018841132165368,0.07078877634641503,12902.237935802625,480.50623521802163,22.0,0,1,0,35,1224.4,24.6,31.2,0.5199864498644986,0.08662996472004297 +EBM (default),88.55436639653311,0.15727221965789795,8.239269939196605,0.06798490198186677,0.8813849391122723,0.8284798529778744,0.17628689881385806,0.13673033295881595,96.02410546318727,4.157976350403235,21.34722222222222,0.185075,9.609492063522339,0.06619513034820557,4.39583132423955,0.04826106459931703,1.0,1.0,0.10083235741300572,0.04426810265651604,56.77501308270701,3.212150379842276,22.0,1,1,0,34,1210.6,26.2,31.2,0.5037262872628726,0.10005999473376052 +XGB (default),11.691550334294638,0.582956936624315,2.5863360706414102,0.3100034135849125,0.8979377675030106,0.8350402509472741,0.15712415912888847,0.1440988639628879,31.813142204435906,16.390368754261612,21.65277777777778,0.175445,5.739119291305542,0.3213019371032715,1.66399954826743,0.12057611518072474,1.0,0.9803123668140437,0.12284267447258906,0.05749824237647426,27.152708445757774,9.436867401020109,21.0,1,0,0,35,1197.9,31.0,25.1,0.4962737127371274,0.08303292342961185 +REALMLP (default),497.1762997441822,0.6067886617448595,35.85201952385433,0.2401830912617534,0.9051917890614664,0.8310467424092711,0.15917290341146056,0.11078024980560792,889.4575438184943,14.614115185514347,21.76388888888889,0.17567,146.45613551139832,0.23558390140533447,34.61854805295221,0.16884822006186367,1.0,1.0,0.10725973083151863,0.06418095087854436,602.5345759525801,11.596995428149068,24.5,1,0,1,34,1197.4,34.7,27.4,0.4935636856368564,0.08575069891200408 +NN_TORCH (tuned),24905.959744539527,0.6660838723182678,2952.9414864535283,0.15459242992627908,0.9148909672592942,0.8446960260537524,0.1590590600544738,0.1132570686054671,57128.74445660633,11.722847631461935,21.88888888888889,0.17361500000000002,8823.152364969254,0.2007770538330078,2406.0981194606593,0.11202253911830268,1.0,1.0,0.10327565910431935,0.06338916104312123,44532.50472025028,8.64392602078908,23.5,0,0,0,36,1199.2,28.2,29.8,0.4905149051490515,0.059310078186054126 +MNCA (default),306.5998376806577,9.98978877067566,17.805999463280088,0.7747951629896741,0.8583270774687776,0.8382175783752852,0.17271697778050027,0.11066382849299157,252.51128046279433,73.98229627081949,22.055555555555557,0.190215,27.28103244304657,0.5536909103393555,12.987204209764974,0.33894040253174296,1.0,0.9979757085020253,0.13240284359564197,0.06962042836732543,169.66480804983473,24.653041834920074,22.0,1,1,0,34,1191.7,28.0,26.1,0.48644986449864497,0.08988082766087606 +XT (tuned + ensemble),1037.372560845481,3.071019298500485,360.7132393580917,1.4744924910300816,0.9176899118580594,0.8698246935572322,0.17680045603807865,0.14004244432772361,4707.816009404905,94.53215065372743,22.59722222222222,0.183685,752.3897469043732,2.041845917701721,187.8732763026905,0.7207715417972143,1.0,1.0,0.1315762721206637,0.06415374880272719,3200.195519746224,83.245630982298,25.0,0,0,0,36,1181.7,25.6,27.9,0.47323848238482386,0.06358541416626037 +RF (tuned + ensemble),2020.4760966300964,2.134269985887739,415.1583564929086,1.2910427322323617,0.932518287859348,0.8954785691697733,0.18024531301876434,0.15672676475141362,5462.814816714475,83.37351619032778,23.805555555555557,0.180965,851.531911611557,1.8475559949874878,266.2539346689623,0.6395657388264155,1.0,1.0,0.1226880904436346,0.0790409010893038,4305.508903299853,67.08260239114765,26.5,1,1,0,34,1155.2,36.6,25.0,0.4437669376693767,0.09119806161138436 +TABDPT (default),175.0330073899693,68.71396222379472,26.097389455873152,21.943944030413235,0.8137945017331583,0.8082409814199439,0.1855263261830053,0.13701159964452117,492.7334429125401,1620.9148184577618,24.26388888888889,0.198215,97.51883804798126,29.261866688728333,20.255521876560408,8.552781426594889,1.0,1.0,0.13114122736509654,0.07097771501831898,407.04692448648916,1198.388331768314,28.5,2,2,0,32,1147.6,27.2,34.3,0.4325880758807588,0.13132767354712327 +XT (tuned),1037.372560845481,0.3051542573504978,360.7132393580917,0.16689901565151677,0.9439228284777492,0.8694633243464369,0.18779409245774725,0.14640398817829617,4707.816009404905,10.02739657365076,24.333333333333332,0.18688500000000002,752.3897469043732,0.1615079641342163,187.8732763026905,0.07985268047821353,1.0,1.0,0.13372024318384995,0.07682782754093201,3200.195519746224,8.479842721007039,27.0,0,2,1,33,1145.6,32.4,33.2,0.43089430894308944,0.07940241885003267 +FASTAI (tuned),7289.840611391597,1.1152561836772494,1303.7963220052682,0.6947586912123508,0.9058648946317567,0.8672615100898629,0.19682184978792786,0.1349210968164207,19518.83385555907,43.25082874084665,24.47222222222222,0.18023,2935.3402432203293,0.9242111444473267,581.3698825429726,0.334061010051534,1.0,1.0,0.16429853291480473,0.07553682415502974,12902.237935802625,29.54949110419716,26.0,0,0,1,35,1141.7,28.7,35.4,0.4275067750677507,0.06550251739970477 +GBM (default),7.365004950099522,0.7322817908393012,2.509446514786595,0.18984168667689244,0.9471769132881858,0.9127183714331323,0.17557516568639736,0.1512943493378248,35.17208033262076,16.8021003794707,24.791666666666668,0.18529,6.238969683647156,0.6098566055297852,1.6407068695252,0.14810769896236448,1.0,1.0,0.14940090168048453,0.06327021804096736,22.04913243774461,11.229856118149964,25.0,0,0,0,36,1135.3,24.0,32.5,0.41971544715447157,0.046601993319410735 +RF (tuned),2020.4760966300964,0.22272791465123495,415.1583564929086,0.1434182534422469,0.9551019371507523,0.93231001885197,0.1882659625779667,0.16633452106057722,5462.814816714475,8.151832659415685,25.75,0.18201499999999998,851.531911611557,0.14735674858093262,266.2539346689623,0.0714840059306155,1.0,1.0,0.1210948587048516,0.09657799136409392,4305.508903299853,7.364430017401964,27.0,0,1,0,35,1110.3,34.7,27.2,0.39634146341463417,0.0578995665449669 +NN_TORCH (default),47.65417258607017,0.6302466657426622,10.641224850569223,0.23501087478485314,0.978257373569646,0.9546286579228285,0.22199681565167823,0.16718768988466395,143.43581528304946,15.239602876479248,29.319444444444443,0.18706,22.84017276763916,0.28995954990386963,5.436935257156724,0.1377204507988835,1.0,1.0,0.1627665585076386,0.09575304821587584,137.2254146054324,9.730207537012728,30.5,0,0,0,36,1026.0,29.0,33.1,0.3092818428184282,0.039017409649225274 +FASTAI (default),30.521977769003975,1.0843763152758281,4.742334418927322,0.49201194647444324,0.972512597016023,0.9440329717104782,0.2355821153886495,0.17839935665336887,76.51706745050629,33.35318180861937,30.13888888888889,0.187165,13.653389573097229,0.6859067678451538,3.0439688517356123,0.3001709716511216,1.0,1.0,0.1549184931797169,0.11731493973750912,56.69169806857949,28.502895225657927,32.0,0,0,0,36,1005.1,28.9,33.6,0.28929539295392953,0.03801249783928372 +RF (default),3.2084562049971685,0.13506737020280626,0.4263514561749847,0.06558284712037367,0.9965256868256966,0.9704660955206462,0.2530292640512533,0.24048643309547021,7.059782904150193,4.425232204599554,30.305555555555557,0.20493,1.2789119482040405,0.09202027320861816,0.34809529918260984,0.03472589740080406,1.0,1.0,0.22217921675183,0.11658001888686689,5.376229465336108,3.6309495335392015,32.75,0,0,1,35,1000.0,0.0,0.0,0.28523035230352306,0.04264840740930118 +LR (tuned + ensemble),246.97270359595618,1.3169628845320807,86.24001785898447,0.299396801053486,0.9561054304859822,0.9692891800955401,0.3036771075929869,0.24600205726125118,1104.1489965774379,26.605204102831735,32.208333333333336,0.20664,169.60151755809784,0.2824718952178955,44.56368864780743,0.15651788282525403,1.0,1.0,0.2429229288355661,0.14985483348209602,675.130801866569,8.851961834833686,35.5,1,0,0,35,946.3,32.9,29.9,0.23882113821138212,0.05837351309196938 +XT (default),1.8672458993064032,0.17208321889241537,0.35727892050479015,0.06872435387895405,0.9892721532660226,0.9742756012359293,0.2830213570676806,0.27312061168279556,4.999840482001418,4.841906560022478,32.75,0.20700000000000002,0.8974905014038086,0.08268320560455322,0.23619265983619955,0.036597834553610306,1.0,1.0,0.2174961946642111,0.1456189920656607,3.672313260209499,4.261733262225922,35.5,0,0,0,36,927.7,32.4,32.9,0.22560975609756098,0.034861652371361254 +LR (tuned),246.97270359595618,0.43901119629542035,86.24001785898447,0.1002982819551287,0.9723830890370608,0.9786926253905683,0.3109086001235896,0.25775113200017263,1104.1489965774379,7.096822360818471,33.208333333333336,0.206755,169.60151755809784,0.12632572650909424,44.56368864780743,0.062293080471580324,1.0,1.0,0.250867124655856,0.1684820136513116,675.130801866569,5.062717123136234,36.0,0,0,1,35,911.7,35.1,26.4,0.21443089430894308,0.03868985665909902 +LR (default),6.324005815717909,0.45038049750857884,2.0933349209116243,0.12679966936326295,0.9795929927195421,0.9853721744946168,0.3212404515646151,0.28617086703013384,28.81662586680685,8.679077173966622,34.0,0.21439,5.391173958778381,0.12632572650909424,1.4157886113918772,0.09164922519151064,1.0,1.0,0.2508658887883501,0.16848092022763028,16.81966393701253,5.719143257720329,37.0,0,0,0,36,888.3,25.9,33.4,0.1951219512195122,0.031563196056924266 +KNN (tuned + ensemble),168.93849890761905,8.359918620851305,8.837249824100754,0.4805954727942703,1.0,0.9823277278077026,0.4959032419783757,0.6046074386086431,80.93770906535752,52.719394620298914,37.34722222222222,0.321975,13.986609816551208,0.22182130813598633,3.4970040049929065,0.1592449370444793,1.0,1.0,0.44534583678290146,0.6925990746528932,56.97784214902664,12.39378247919743,40.0,0,0,0,36,748.1,39.6,35.8,0.11348238482384825,0.03301741936100167 +KNN (tuned),168.93849890761905,1.9504973888397217,8.837249824100754,0.10793579395855045,1.0,0.9833301560380661,0.5170601568840968,0.6549991426818225,80.93770906535752,11.495707208766925,38.52777777777778,0.32563,13.986609816551208,0.06376731395721436,3.4970040049929065,0.03322185201916636,1.0,1.0,0.5143946348760557,0.7341673886277578,56.97784214902664,2.1231636318644824,41.0,0,1,0,35,675.9,41.2,43.0,0.08468834688346884,0.03874547851606605 +KNN (default),0.7941402528021071,0.21308696932262844,0.1178118784481507,0.0311341997736934,1.0,1.0,0.6015201216816846,0.9574097789264635,1.001944530363665,2.4108020816009272,40.94444444444444,0.38312,0.23321354389190674,0.030547380447387695,0.06547009031681006,0.018147653154379814,1.0,1.0,0.6027815264295836,1.0,1.0,1.409117664778126,42.0,0,0,0,36,456.6,54.2,74.7,0.025745257452574527,0.024701125775697483 diff --git a/data/lite/tabicl-cls/tuning-impact-elo-horizontal.pdf b/data/lite/tabicl-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c80bd4877fff9df6358355cf4e013022404bc8be Binary files /dev/null and b/data/lite/tabicl-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabicl-cls/tuning-impact-elo-horizontal.png.zip b/data/lite/tabicl-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..a82e75a5f4e77ce83a36fe4ab772e61e785bbf53 --- /dev/null +++ b/data/lite/tabicl-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a0e767745b3612add5803b4bb49d6445f96161eea7a533f54f123ac9244056 +size 131668 diff --git a/data/lite/tabicl/figures/critical-diagram.pdf b/data/lite/tabicl/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..5ee043ee0b4807541c52617dc2f11eeadc46960e Binary files /dev/null and b/data/lite/tabicl/figures/critical-diagram.pdf differ diff --git a/data/lite/tabicl/figures/critical-diagram.png.zip b/data/lite/tabicl/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..5473e45838602b2408c048ae6f15679b205af04e --- /dev/null +++ b/data/lite/tabicl/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9b62643ca543a5b490148fb916ed1fdf862546abf3d6ecb1a20ed75b16e37b3f +size 312252 diff --git a/data/lite/tabicl/leaderboard.tex b/data/lite/tabicl/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..cfdf59ad945f6644a84f41b36416d63959828e82 --- /dev/null +++ b/data/lite/tabicl/leaderboard.tex @@ -0,0 +1,50 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1544${}_{-34,+36}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{7.4}} & \textcolor{gold}{\textbf{2.9}} & \textcolor{gold}{\textbf{7}} & \textcolor{gold}{\textbf{6.9\%}} & 1239.48 & 1.98 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1445${}_{-33,+31}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{silver}{\textbf{10.9}} & \textcolor{bronze}{\textbf{4.4}} & \textcolor{bronze}{\textbf{3}} & \textcolor{bronze}{\textbf{9.8\%}} & 2261.57 & 1.26 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1441${}_{-37,+31}$}} & 0.418 & \textcolor{bronze}{\textbf{11.0}} & 6.6 & 0 & 10.5\% & 5429.82 & 2.94 \\ +LightGBM (T+E) & 1420${}_{-29,+29}$ & 0.389 & 11.8 & 8.6 & 0 & 11.6\% & 368.47 & 1.22 \\ +TabICL (D) & 1407${}_{-35,+24}$ & \textcolor{silver}{\textbf{0.521}} & 12.4 & \textcolor{silver}{\textbf{4.2}} & \textcolor{silver}{\textbf{5}} & \textcolor{silver}{\textbf{9.3\%}} & 9.07 & 2.11 \\ +CatBoost (T+E) & 1403${}_{-33,+28}$ & 0.377 & 12.6 & 7.3 & 0 & 10.5\% & 1207.49 & 0.52 \\ +XGBoost (T+E) & 1395${}_{-22,+30}$ & 0.320 & 12.7 & 6.8 & 1 & 12.1\% & 655.54 & 0.78 \\ +ModernNCA (T) & 1385${}_{-31,+30}$ & 0.275 & 13.3 & 6.2 & 2 & 10.7\% & 4498.66 & 0.45 \\ +CatBoost (T) & 1375${}_{-26,+30}$ & 0.360 & 13.7 & 9.0 & 0 & 11.2\% & 1207.49 & 0.05 \\ +ModernNCA (T+E) & 1375${}_{-33,+32}$ & 0.383 & 13.6 & 5.2 & 2 & 10.8\% & 4498.66 & 8.48 \\ +TabM (T) & 1373${}_{-32,+31}$ & 0.409 & 13.7 & 5.5 & 2 & 10.8\% & 2261.57 & 0.13 \\ +CatBoost (D) & 1372${}_{-28,+27}$ & 0.343 & 13.8 & 7.4 & 2 & 11.6\% & 5.18 & 0.09 \\ +XGBoost (T) & 1359${}_{-27,+27}$ & 0.264 & 14.3 & 8.1 & 1 & 12.4\% & 655.54 & 0.16 \\ +LightGBM (T) & 1355${}_{-31,+27}$ & 0.302 & 14.6 & 9.8 & 0 & 12.6\% & 368.47 & 0.12 \\ +TabM (D) & 1308${}_{-24,+27}$ & 0.272 & 16.7 & 7.6 & 1 & 14.3\% & 9.30 & 0.13 \\ +RealMLP (T) & 1289${}_{-28,+35}$ & 0.198 & 17.5 & 11.4 & 0 & 14.3\% & 5429.82 & 0.17 \\ +EBM (T+E) & 1289${}_{-37,+29}$ & 0.170 & 17.5 & 7.2 & 2 & 16.1\% & 852.29 & 0.21 \\ +TorchMLP (T+E) & 1283${}_{-34,+27}$ & 0.228 & 17.7 & 11.2 & 0 & 13.5\% & 2406.10 & 2.04 \\ +EBM (T) & 1242${}_{-28,+33}$ & 0.110 & 19.7 & 10.4 & 0 & 16.8\% & 852.29 & 0.02 \\ +FastaiMLP (T+E) & 1224${}_{-32,+25}$ & 0.207 & 20.7 & 11.5 & 0 & 17.6\% & 581.37 & 4.78 \\ +EBM (D) & 1211${}_{-32,+27}$ & 0.119 & 21.3 & 10.0 & 1 & 17.6\% & 4.40 & 0.05 \\ +TorchMLP (T) & 1199${}_{-30,+29}$ & 0.085 & 21.9 & 16.9 & 0 & 15.9\% & 2406.10 & 0.11 \\ +XGBoost (D) & 1198${}_{-26,+31}$ & 0.102 & 21.7 & 12.0 & 1 & 15.7\% & 1.66 & 0.12 \\ +RealMLP (D) & 1197${}_{-28,+35}$ & 0.095 & 21.8 & 11.7 & 1 & 15.9\% & 34.62 & 0.17 \\ +ModernNCA (D) & 1192${}_{-27,+28}$ & 0.142 & 22.1 & 11.1 & 1 & 17.3\% & 12.99 & 0.34 \\ +ExtraTrees (T+E) & 1182${}_{-28,+26}$ & 0.082 & 22.6 & 15.7 & 0 & 17.7\% & 187.87 & 0.72 \\ +RandomForest (T+E) & 1155${}_{-25,+37}$ & 0.067 & 23.8 & 11.0 & 1 & 18.0\% & 266.25 & 0.64 \\ +TabDPT (D) & 1148${}_{-35,+28}$ & 0.186 & 24.3 & 7.6 & 2 & 18.6\% & 20.26 & 8.55 \\ +ExtraTrees (T) & 1146${}_{-34,+33}$ & 0.056 & 24.3 & 12.6 & 0 & 18.8\% & 187.87 & 0.08 \\ +FastaiMLP (T) & 1142${}_{-36,+29}$ & 0.094 & 24.5 & 15.3 & 0 & 19.7\% & 581.37 & 0.33 \\ +LightGBM (D) & 1135${}_{-33,+24}$ & 0.053 & 24.8 & 21.5 & 0 & 17.6\% & 1.64 & 0.15 \\ +RandomForest (T) & 1110${}_{-28,+35}$ & 0.045 & 25.8 & 17.3 & 0 & 18.8\% & 266.25 & 0.07 \\ +TorchMLP (D) & 1026${}_{-34,+29}$ & 0.022 & 29.3 & 25.6 & 0 & 22.2\% & 5.44 & 0.14 \\ +FastaiMLP (D) & 1005${}_{-34,+29}$ & 0.027 & 30.1 & 26.3 & 0 & 23.6\% & 3.04 & 0.30 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 30.3 & 23.4 & 0 & 25.3\% & 0.35 & 0.03 \\ +Linear (T+E) & 946${}_{-30,+33}$ & 0.044 & 32.2 & 17.1 & 1 & 30.4\% & 44.56 & 0.16 \\ +ExtraTrees (D) & 928${}_{-33,+33}$ & 0.011 & 32.8 & 28.7 & 0 & 28.3\% & 0.24 & 0.04 \\ +Linear (T) & 912${}_{-27,+36}$ & 0.028 & 33.2 & 25.8 & 0 & 31.1\% & 44.56 & 0.06 \\ +Linear (D) & 888${}_{-34,+26}$ & 0.020 & 34.0 & 31.7 & 0 & 32.1\% & 1.42 & 0.09 \\ +KNN (T+E) & 748${}_{-36,+40}$ & 0.000 & 37.3 & 30.3 & 0 & 49.6\% & 3.50 & 0.16 \\ +KNN (T) & 676${}_{-43,+42}$ & 0.000 & 38.5 & 25.8 & 0 & 51.7\% & 3.50 & 0.03 \\ +KNN (D) & 457${}_{-75,+55}$ & 0.000 & 40.9 & 40.5 & 0 & 60.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabicl/tabarena_leaderboard.csv b/data/lite/tabicl/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..a1569fad5517e62455bb72ecc7789a0de91e3da1 --- /dev/null +++ b/data/lite/tabicl/tabarena_leaderboard.csv @@ -0,0 +1,43 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7748.990037812127,20.667584995428722,2786.367822030483,2.9686980518587505,0.4305752039617605,0.3877041747388167,0.06883082832229188,0.04066258382676245,31469.589611836727,278.09709780834015,7.402777777777778,0.157955,6468.6270185709,3.7256141901016235,1239.480006130907,1.9841131308583402,0.3593825434326649,0.3109231173840635,0.04103436500005686,0.014103198961668492,23460.49845320325,142.30518626073365,6.0,7,3,3,23,1543.7,35.2,34.0,0.8438346883468835,0.3436916810221074 +TABM (tuned + ensemble),32822.329592857095,6.718376735846202,3197.494145595591,1.5968882956786812,0.5092283925039975,0.507000043838314,0.0984603164003323,0.08012828297402202,47368.16918956869,126.38148088436719,10.916666666666666,0.179425,8087.822376012802,2.9963064193725586,2261.5736774780025,1.258481528676095,0.47136645123500964,0.5108291907783599,0.060615421067887476,0.023331148793904504,38436.80992013264,113.73588992793104,8.5,3,2,4,27,1444.7,30.6,32.5,0.758130081300813,0.22495824385743393 +REALMLP (tuned + ensemble),78236.15068201887,12.508955962128109,5830.1148293797,4.304066546062194,0.5822217846671746,0.540978236726168,0.1054473766976048,0.06442528212733184,142054.31198219568,287.5042196718009,11.0,0.172085,23325.48352110386,5.581948518753052,5429.819541400706,2.9376822787963643,0.5258237901453162,0.524882615107501,0.06227050818890584,0.031582684665151774,93177.39784523446,229.06556046190957,11.0,0,2,3,31,1441.2,30.9,36.7,0.7560975609756098,0.1515892708417218 +GBM (tuned + ensemble),2716.464209013515,11.650436507331001,652.7081231589766,2.3316901429136103,0.6113071815088934,0.6115648021204206,0.11626709694511878,0.0824232864688556,8680.115930381902,215.8804964577198,11.75,0.17347,1527.4595284461975,3.0867438316345215,368.4672845277482,1.2233637093106904,0.6534872634143774,0.6466968889868911,0.07038437664020675,0.02975866915001093,6991.1315945904835,101.20971107477635,12.0,0,0,3,33,1420.3,28.8,28.8,0.7378048780487805,0.11670989654152485 +TABICL (default),122.91359008020825,22.535752693812054,9.963658601166212,2.414261101064252,0.4788051732855598,0.5443303646525952,0.09266040289556793,0.06762151146287841,179.76964423723584,285.61149302454663,12.402777777777779,0.17301,25.354220509529114,3.4422082901000977,9.073519792448254,2.1146179912287675,0.4992564187967412,0.5686403350371668,0.050317184697893425,0.021088283054317383,144.31099154558206,133.6960355402875,12.25,5,2,1,28,1406.6,23.5,34.5,0.7218834688346883,0.24049138766638822 +CAT (tuned + ensemble),14495.012520273527,2.4179346097840204,2435.7734425933513,0.7431584046715475,0.6226459246322733,0.5724409794042533,0.10496616447275113,0.0672300454324769,26875.894999800756,51.46768059921607,12.583333333333334,0.16073500000000002,4554.886538267136,1.2364250421524048,1207.4876040106037,0.5178688292431316,0.6153632608570143,0.555056424681614,0.05369915486940119,0.03126669058324277,20500.32079395515,42.031537823549,11.75,0,1,4,31,1402.6,27.7,32.6,0.717479674796748,0.13630557293973747 +XGB (tuned + ensemble),5342.747825033135,6.078431023491754,903.1395181375539,2.4252994962785444,0.6804014135907539,0.6264870966428019,0.12111620645907134,0.08995137231528924,10629.483194714896,137.278973628425,12.680555555555555,0.17348,1566.1410616636276,2.2440043687820435,655.5367556867432,0.7811829897319023,0.7126419091535116,0.6325401095781271,0.09321543161193863,0.03379706826049186,7509.936420425425,78.99719729544988,10.5,1,2,1,32,1395.2,29.8,21.5,0.7151084010840109,0.14600195845562308 +MNCA (tuned),57242.54890214073,19.57283937268787,5418.777655567625,1.42346301375637,0.7251534847407544,0.5849087413461024,0.10710451817631175,0.08805735190470587,83536.32309061421,128.61468675559033,13.319444444444445,0.17875999999999997,13889.081128954887,0.5763018131256104,4498.662765534045,0.4537708228509102,0.7744612480983963,0.5544791639834478,0.0863080541794865,0.030308265873669055,68177.28399410362,31.783037323749284,11.5,2,0,1,33,1384.6,29.3,30.1,0.6995257452574526,0.1615009946329935 +MNCA (tuned + ensemble),57242.54890214073,352.26783115996255,5418.777655567625,26.036546991587294,0.6174214208973879,0.5493942592841631,0.10758973700607603,0.09075466007453252,83536.32309061421,2422.151854876793,13.63888888888889,0.18984499999999999,13889.081128954887,13.45927095413208,4498.662765534045,8.47783782257851,0.6265388929939845,0.5206641944860473,0.06347089264029215,0.03645962776745639,68177.28399410362,597.4215710672797,12.0,2,1,6,27,1374.6,32.0,32.5,0.6917344173441734,0.19092899712791367 +TABM (tuned),32822.329592857095,0.8157637053065829,3197.494145595591,0.18466029782681834,0.5912555585094392,0.5781476073453056,0.10824063992837979,0.09522303955431258,47368.16918956869,14.021720853154184,13.680555555555555,0.180525,8087.822376012802,0.23180127143859863,2261.5736774780025,0.12874845474998858,0.5877464635288101,0.5650956393005605,0.06311117459522003,0.03460069818708005,38436.80992013264,9.17800659262771,11.0,2,3,0,31,1373.2,30.4,32.0,0.6907181571815718,0.1806193600709681 +CAT (tuned),14495.012520273527,0.51534503698349,2435.7734425933513,0.10399437886071032,0.6397756322608608,0.6322254665313207,0.11211575515013829,0.07212514514540504,26875.894999800756,8.46801242481876,13.708333333333334,0.16238999999999998,4554.886538267136,0.0815126895904541,1207.4876040106037,0.0500282797740022,0.6663747291561838,0.6867488983507399,0.06911800649977579,0.037329065440735124,20500.32079395515,3.2560454821039713,13.0,0,1,1,34,1375.2,29.1,25.7,0.6900406504065041,0.11055045510865025 +CAT (default),224.62928151422076,0.4154767062928941,111.70337568642236,0.13827596961428462,0.6569717560090025,0.6468762263410741,0.11626850131699006,0.07278525873435686,447.23099485428673,10.877554497735453,13.76388888888889,0.161695,17.568647384643555,0.40540146827697754,5.180417923757872,0.0935400303065973,0.6872314742824219,0.7382324728488332,0.07896512392539878,0.02932904358303992,99.87500924350084,7.319630010463586,13.75,2,0,0,34,1371.6,26.1,27.9,0.6886856368563685,0.13443131358206103 +XGB (tuned),5342.747825033135,1.2801591290367975,903.1395181375539,0.639985957335127,0.7363747567107989,0.6713345405972482,0.12390832771884491,0.09571764627414511,10629.483194714896,33.68776538848152,14.305555555555555,0.176915,1566.1410616636276,0.3626103401184082,655.5367556867432,0.15681948639835536,0.7444722954009122,0.6983685877302974,0.10245426870569208,0.04170721330284183,7509.936420425425,13.242651903663184,13.0,1,1,0,34,1359.0,26.7,26.8,0.6754742547425474,0.12394509846685876 +GBM (tuned),2716.464209013515,1.8839820755852594,652.7081231589766,0.5269472540571296,0.6982904100452502,0.6749309433374803,0.12589415781062138,0.09744390266808721,8680.115930381902,44.39170809497161,14.555555555555555,0.17537,1527.4595284461975,0.5379931926727295,368.4672845277482,0.11972777459115919,0.7332165833714938,0.7275226403809433,0.07614432042371466,0.03541051328529879,6991.1315945904835,14.435205317774814,15.5,0,1,0,35,1355.3,26.2,30.5,0.6693766937669376,0.10207008030434894 +TABM (default),140.8987251520157,0.9393433663580153,14.983795328833262,0.18062205057314412,0.7280741356398281,0.6887171968757086,0.1434354286809727,0.11695380949114573,217.11963453230365,13.588062902891982,16.694444444444443,0.18336999999999998,29.720080256462097,0.17920684814453125,9.303649390343331,0.12625846443862987,0.8404844924976476,0.7247027492327622,0.09137336521284833,0.04657437619358792,136.86363214905396,9.928949591639647,15.0,1,2,1,32,1308.3,26.2,23.4,0.6172086720867209,0.1309363094547008 +REALMLP (tuned),78236.15068201887,0.5794834030999078,5830.1148293797,0.2343189481249011,0.8023310570642167,0.7611882470431272,0.14345077264424028,0.10287611571863375,142054.31198219568,14.164064071067262,17.47222222222222,0.17396499999999998,23325.48352110386,0.2515758275985718,5429.819541400706,0.1667243421007542,0.8787741747777617,0.848557552697969,0.12726966477565288,0.052454140783326594,93177.39784523446,11.047988768711498,17.0,0,1,0,35,1289.1,34.7,27.8,0.5982384823848238,0.08738586997653082 +EBM (tuned + ensemble),23286.664896190166,1.091496765613556,2120.3766035360445,0.27276807805390907,0.8303965578080257,0.7468568741683155,0.16075450104299963,0.12694848030107259,22277.212479524587,20.94625622434944,17.541666666666668,0.181755,2063.8647414445877,0.34018075466156006,852.2924105567174,0.2147416876316487,0.9326307523783561,0.9041078057334899,0.07435566243697694,0.040508730984801566,14796.589958532411,12.4343949892923,19.0,2,1,0,33,1289.1,28.5,36.1,0.5965447154471545,0.1391964347831879 +NN_TORCH (tuned + ensemble),24905.959744539527,15.398705760637919,2952.9414864535283,2.849701545461187,0.7723767872546564,0.7802343502313487,0.13521206862305007,0.09239504445258413,57128.74445660633,251.675577261934,17.72222222222222,0.174655,8823.152364969254,4.023689031600952,2406.0981194606593,2.04339307286537,0.9031626632034823,0.896307171089784,0.07810139442157471,0.051810123946366915,44532.50472025028,186.31979424186875,18.0,0,1,1,34,1282.8,26.1,33.4,0.592140921409214,0.08923776569295938 +EBM (tuned),23286.664896190166,0.12520347701178658,2120.3766035360445,0.03546501889806546,0.8902973079447266,0.8051099726251761,0.1679622875400496,0.13349605155397093,22277.212479524587,2.4619900653558613,19.708333333333332,0.181165,2063.8647414445877,0.03867542743682861,852.2924105567174,0.019590150951601572,1.0,0.996706192358367,0.08520888190451481,0.038628140113403854,14796.589958532411,1.2081964035400725,21.5,0,2,1,33,1241.5,32.3,27.7,0.5436991869918699,0.09584534731902562 +FASTAI (tuned + ensemble),7289.840611391597,19.15695581171248,1303.7963220052682,8.14447528633287,0.7926888983747619,0.80077618248164,0.17570272525312422,0.10287009121624455,19518.83385555907,536.4983396151715,20.680555555555557,0.17774,2935.3402432203293,12.184540390968323,581.3698825429726,4.7809972969941885,0.9972327674497973,1.0,0.10018841132165368,0.07078877634641503,12902.237935802625,480.50623521802163,22.0,0,1,0,35,1224.4,24.6,31.2,0.5199864498644986,0.08662996472004297 +EBM (default),88.55436639653311,0.15727221965789795,8.239269939196605,0.06798490198186677,0.8813849391122723,0.8284798529778744,0.17628689881385806,0.13673033295881595,96.02410546318727,4.157976350403235,21.34722222222222,0.185075,9.609492063522339,0.06619513034820557,4.39583132423955,0.04826106459931703,1.0,1.0,0.10083235741300572,0.04426810265651604,56.77501308270701,3.212150379842276,22.0,1,1,0,34,1210.6,26.2,31.2,0.5037262872628726,0.10005999473376052 +XGB (default),11.691550334294638,0.582956936624315,2.5863360706414102,0.3100034135849125,0.8979377675030106,0.8350402509472741,0.15712415912888847,0.1440988639628879,31.813142204435906,16.390368754261612,21.65277777777778,0.175445,5.739119291305542,0.3213019371032715,1.66399954826743,0.12057611518072474,1.0,0.9803123668140437,0.12284267447258906,0.05749824237647426,27.152708445757774,9.436867401020109,21.0,1,0,0,35,1197.9,31.0,25.1,0.4962737127371274,0.08303292342961185 +REALMLP (default),497.1762997441822,0.6067886617448595,35.85201952385433,0.2401830912617534,0.9051917890614664,0.8310467424092711,0.15917290341146056,0.11078024980560792,889.4575438184943,14.614115185514347,21.76388888888889,0.17567,146.45613551139832,0.23558390140533447,34.61854805295221,0.16884822006186367,1.0,1.0,0.10725973083151863,0.06418095087854436,602.5345759525801,11.596995428149068,24.5,1,0,1,34,1197.4,34.7,27.4,0.4935636856368564,0.08575069891200408 +NN_TORCH (tuned),24905.959744539527,0.6660838723182678,2952.9414864535283,0.15459242992627908,0.9148909672592942,0.8446960260537524,0.1590590600544738,0.1132570686054671,57128.74445660633,11.722847631461935,21.88888888888889,0.17361500000000002,8823.152364969254,0.2007770538330078,2406.0981194606593,0.11202253911830268,1.0,1.0,0.10327565910431935,0.06338916104312123,44532.50472025028,8.64392602078908,23.5,0,0,0,36,1199.2,28.2,29.8,0.4905149051490515,0.059310078186054126 +MNCA (default),306.5998376806577,9.98978877067566,17.805999463280088,0.7747951629896741,0.8583270774687776,0.8382175783752852,0.17271697778050027,0.11066382849299157,252.51128046279433,73.98229627081949,22.055555555555557,0.190215,27.28103244304657,0.5536909103393555,12.987204209764974,0.33894040253174296,1.0,0.9979757085020253,0.13240284359564197,0.06962042836732543,169.66480804983473,24.653041834920074,22.0,1,1,0,34,1191.7,28.0,26.1,0.48644986449864497,0.08988082766087606 +XT (tuned + ensemble),1037.372560845481,3.071019298500485,360.7132393580917,1.4744924910300816,0.9176899118580594,0.8698246935572322,0.17680045603807865,0.14004244432772361,4707.816009404905,94.53215065372743,22.59722222222222,0.183685,752.3897469043732,2.041845917701721,187.8732763026905,0.7207715417972143,1.0,1.0,0.1315762721206637,0.06415374880272719,3200.195519746224,83.245630982298,25.0,0,0,0,36,1181.7,25.6,27.9,0.47323848238482386,0.06358541416626037 +RF (tuned + ensemble),2020.4760966300964,2.134269985887739,415.1583564929086,1.2910427322323617,0.932518287859348,0.8954785691697733,0.18024531301876434,0.15672676475141362,5462.814816714475,83.37351619032778,23.805555555555557,0.180965,851.531911611557,1.8475559949874878,266.2539346689623,0.6395657388264155,1.0,1.0,0.1226880904436346,0.0790409010893038,4305.508903299853,67.08260239114765,26.5,1,1,0,34,1155.2,36.6,25.0,0.4437669376693767,0.09119806161138436 +TABDPT (default),175.0330073899693,68.71396222379472,26.097389455873152,21.943944030413235,0.8137945017331583,0.8082409814199439,0.1855263261830053,0.13701159964452117,492.7334429125401,1620.9148184577618,24.26388888888889,0.198215,97.51883804798126,29.261866688728333,20.255521876560408,8.552781426594889,1.0,1.0,0.13114122736509654,0.07097771501831898,407.04692448648916,1198.388331768314,28.5,2,2,0,32,1147.6,27.2,34.3,0.4325880758807588,0.13132767354712327 +XT (tuned),1037.372560845481,0.3051542573504978,360.7132393580917,0.16689901565151677,0.9439228284777492,0.8694633243464369,0.18779409245774725,0.14640398817829617,4707.816009404905,10.02739657365076,24.333333333333332,0.18688500000000002,752.3897469043732,0.1615079641342163,187.8732763026905,0.07985268047821353,1.0,1.0,0.13372024318384995,0.07682782754093201,3200.195519746224,8.479842721007039,27.0,0,2,1,33,1145.6,32.4,33.2,0.43089430894308944,0.07940241885003267 +FASTAI (tuned),7289.840611391597,1.1152561836772494,1303.7963220052682,0.6947586912123508,0.9058648946317567,0.8672615100898629,0.19682184978792786,0.1349210968164207,19518.83385555907,43.25082874084665,24.47222222222222,0.18023,2935.3402432203293,0.9242111444473267,581.3698825429726,0.334061010051534,1.0,1.0,0.16429853291480473,0.07553682415502974,12902.237935802625,29.54949110419716,26.0,0,0,1,35,1141.7,28.7,35.4,0.4275067750677507,0.06550251739970477 +GBM (default),7.365004950099522,0.7322817908393012,2.509446514786595,0.18984168667689244,0.9471769132881858,0.9127183714331323,0.17557516568639736,0.1512943493378248,35.17208033262076,16.8021003794707,24.791666666666668,0.18529,6.238969683647156,0.6098566055297852,1.6407068695252,0.14810769896236448,1.0,1.0,0.14940090168048453,0.06327021804096736,22.04913243774461,11.229856118149964,25.0,0,0,0,36,1135.3,24.0,32.5,0.41971544715447157,0.046601993319410735 +RF (tuned),2020.4760966300964,0.22272791465123495,415.1583564929086,0.1434182534422469,0.9551019371507523,0.93231001885197,0.1882659625779667,0.16633452106057722,5462.814816714475,8.151832659415685,25.75,0.18201499999999998,851.531911611557,0.14735674858093262,266.2539346689623,0.0714840059306155,1.0,1.0,0.1210948587048516,0.09657799136409392,4305.508903299853,7.364430017401964,27.0,0,1,0,35,1110.3,34.7,27.2,0.39634146341463417,0.0578995665449669 +NN_TORCH (default),47.65417258607017,0.6302466657426622,10.641224850569223,0.23501087478485314,0.978257373569646,0.9546286579228285,0.22199681565167823,0.16718768988466395,143.43581528304946,15.239602876479248,29.319444444444443,0.18706,22.84017276763916,0.28995954990386963,5.436935257156724,0.1377204507988835,1.0,1.0,0.1627665585076386,0.09575304821587584,137.2254146054324,9.730207537012728,30.5,0,0,0,36,1026.0,29.0,33.1,0.3092818428184282,0.039017409649225274 +FASTAI (default),30.521977769003975,1.0843763152758281,4.742334418927322,0.49201194647444324,0.972512597016023,0.9440329717104782,0.2355821153886495,0.17839935665336887,76.51706745050629,33.35318180861937,30.13888888888889,0.187165,13.653389573097229,0.6859067678451538,3.0439688517356123,0.3001709716511216,1.0,1.0,0.1549184931797169,0.11731493973750912,56.69169806857949,28.502895225657927,32.0,0,0,0,36,1005.1,28.9,33.6,0.28929539295392953,0.03801249783928372 +RF (default),3.2084562049971685,0.13506737020280626,0.4263514561749847,0.06558284712037367,0.9965256868256966,0.9704660955206462,0.2530292640512533,0.24048643309547021,7.059782904150193,4.425232204599554,30.305555555555557,0.20493,1.2789119482040405,0.09202027320861816,0.34809529918260984,0.03472589740080406,1.0,1.0,0.22217921675183,0.11658001888686689,5.376229465336108,3.6309495335392015,32.75,0,0,1,35,1000.0,0.0,0.0,0.28523035230352306,0.04264840740930118 +LR (tuned + ensemble),246.97270359595618,1.3169628845320807,86.24001785898447,0.299396801053486,0.9561054304859822,0.9692891800955401,0.3036771075929869,0.24600205726125118,1104.1489965774379,26.605204102831735,32.208333333333336,0.20664,169.60151755809784,0.2824718952178955,44.56368864780743,0.15651788282525403,1.0,1.0,0.2429229288355661,0.14985483348209602,675.130801866569,8.851961834833686,35.5,1,0,0,35,946.3,32.9,29.9,0.23882113821138212,0.05837351309196938 +XT (default),1.8672458993064032,0.17208321889241537,0.35727892050479015,0.06872435387895405,0.9892721532660226,0.9742756012359293,0.2830213570676806,0.27312061168279556,4.999840482001418,4.841906560022478,32.75,0.20700000000000002,0.8974905014038086,0.08268320560455322,0.23619265983619955,0.036597834553610306,1.0,1.0,0.2174961946642111,0.1456189920656607,3.672313260209499,4.261733262225922,35.5,0,0,0,36,927.7,32.4,32.9,0.22560975609756098,0.034861652371361254 +LR (tuned),246.97270359595618,0.43901119629542035,86.24001785898447,0.1002982819551287,0.9723830890370608,0.9786926253905683,0.3109086001235896,0.25775113200017263,1104.1489965774379,7.096822360818471,33.208333333333336,0.206755,169.60151755809784,0.12632572650909424,44.56368864780743,0.062293080471580324,1.0,1.0,0.250867124655856,0.1684820136513116,675.130801866569,5.062717123136234,36.0,0,0,1,35,911.7,35.1,26.4,0.21443089430894308,0.03868985665909902 +LR (default),6.324005815717909,0.45038049750857884,2.0933349209116243,0.12679966936326295,0.9795929927195421,0.9853721744946168,0.3212404515646151,0.28617086703013384,28.81662586680685,8.679077173966622,34.0,0.21439,5.391173958778381,0.12632572650909424,1.4157886113918772,0.09164922519151064,1.0,1.0,0.2508658887883501,0.16848092022763028,16.81966393701253,5.719143257720329,37.0,0,0,0,36,888.3,25.9,33.4,0.1951219512195122,0.031563196056924266 +KNN (tuned + ensemble),168.93849890761905,8.359918620851305,8.837249824100754,0.4805954727942703,1.0,0.9823277278077026,0.4959032419783757,0.6046074386086431,80.93770906535752,52.719394620298914,37.34722222222222,0.321975,13.986609816551208,0.22182130813598633,3.4970040049929065,0.1592449370444793,1.0,1.0,0.44534583678290146,0.6925990746528932,56.97784214902664,12.39378247919743,40.0,0,0,0,36,748.1,39.6,35.8,0.11348238482384825,0.03301741936100167 +KNN (tuned),168.93849890761905,1.9504973888397217,8.837249824100754,0.10793579395855045,1.0,0.9833301560380661,0.5170601568840968,0.6549991426818225,80.93770906535752,11.495707208766925,38.52777777777778,0.32563,13.986609816551208,0.06376731395721436,3.4970040049929065,0.03322185201916636,1.0,1.0,0.5143946348760557,0.7341673886277578,56.97784214902664,2.1231636318644824,41.0,0,1,0,35,675.9,41.2,43.0,0.08468834688346884,0.03874547851606605 +KNN (default),0.7941402528021071,0.21308696932262844,0.1178118784481507,0.0311341997736934,1.0,1.0,0.6015201216816846,0.9574097789264635,1.001944530363665,2.4108020816009272,40.94444444444444,0.38312,0.23321354389190674,0.030547380447387695,0.06547009031681006,0.018147653154379814,1.0,1.0,0.6027815264295836,1.0,1.0,1.409117664778126,42.0,0,0,0,36,456.6,54.2,74.7,0.025745257452574527,0.024701125775697483 diff --git a/data/lite/tabicl/tuning-impact-elo-horizontal.pdf b/data/lite/tabicl/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..06be05d6dbe301b4226b2f0253c06c6a1f715a13 Binary files /dev/null and b/data/lite/tabicl/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabicl/tuning-impact-elo-horizontal.png.zip b/data/lite/tabicl/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..a82e75a5f4e77ce83a36fe4ab772e61e785bbf53 --- /dev/null +++ b/data/lite/tabicl/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:90a0e767745b3612add5803b4bb49d6445f96161eea7a533f54f123ac9244056 +size 131668 diff --git a/data/lite/tabpfn-cls/figures/critical-diagram.pdf b/data/lite/tabpfn-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6bfd1724fd13f4e074ba791a9eb80a0e721fd27a Binary files /dev/null and b/data/lite/tabpfn-cls/figures/critical-diagram.pdf differ diff --git a/data/lite/tabpfn-cls/figures/critical-diagram.png.zip b/data/lite/tabpfn-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..45ee2eb3b7d9536c80d1b09ccd1aaa64ed42fb2f --- /dev/null +++ b/data/lite/tabpfn-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5fb8945a9b227c0087f10717cfe7d833f84cbbff633c8937aa161f906278af17 +size 315033 diff --git a/data/lite/tabpfn-cls/leaderboard.tex b/data/lite/tabpfn-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..32b9a4b8fe2b8f306885f1904cc9c2f9ccc806cc --- /dev/null +++ b/data/lite/tabpfn-cls/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1501${}_{-28,+39}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{8.1}} & \textcolor{gold}{\textbf{2.7}} & \textcolor{gold}{\textbf{6}} & \textcolor{gold}{\textbf{7.6\%}} & 3361.32 & 51.67 \\ +AutoGluon 1.3 (4h) & \textcolor{silver}{\textbf{1435${}_{-30,+47}$}} & 0.440 & \textcolor{silver}{\textbf{10.6}} & 6.0 & 1 & \textcolor{silver}{\textbf{10.0\%}} & 2085.73 & 2.16 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1409${}_{-33,+36}$}} & 0.402 & \textcolor{bronze}{\textbf{11.7}} & 6.0 & 1 & 11.8\% & 5575.69 & 4.58 \\ +TabPFNv2 (T) & 1406${}_{-37,+37}$ & \textcolor{silver}{\textbf{0.544}} & 11.8 & \textcolor{silver}{\textbf{4.3}} & 1 & \textcolor{bronze}{\textbf{10.2\%}} & 3361.32 & 0.53 \\ +TabM (T+E) & 1389${}_{-24,+39}$ & 0.483 & 12.4 & 6.0 & 1 & 11.5\% & 2899.02 & 1.46 \\ +LightGBM (T+E) & 1350${}_{-36,+39}$ & 0.319 & 14.2 & 11.2 & 0 & 13.8\% & 691.46 & 1.46 \\ +TabPFNv2 (D) & 1334${}_{-30,+34}$ & \textcolor{bronze}{\textbf{0.499}} & 15.0 & \textcolor{bronze}{\textbf{4.4}} & \textcolor{silver}{\textbf{3}} & 11.6\% & 4.28 & 0.51 \\ +TabM (T) & 1332${}_{-31,+42}$ & 0.387 & 15.2 & 7.5 & 0 & 12.7\% & 2899.02 & 0.17 \\ +CatBoost (T+E) & 1329${}_{-28,+37}$ & 0.271 & 15.3 & 9.5 & 0 & 13.1\% & 1394.45 & 0.62 \\ +ModernNCA (T) & 1326${}_{-27,+40}$ & 0.255 & 15.4 & 6.1 & 2 & 12.2\% & 6020.89 & 0.45 \\ +XGBoost (T+E) & 1314${}_{-28,+29}$ & 0.222 & 16.0 & 11.1 & 0 & 14.9\% & 785.90 & 1.28 \\ +CatBoost (D) & 1312${}_{-40,+34}$ & 0.228 & 16.1 & 9.4 & 1 & 14.4\% & 5.74 & 0.14 \\ +CatBoost (T) & 1304${}_{-36,+31}$ & 0.247 & 16.5 & 12.4 & 0 & 14.0\% & 1394.45 & 0.05 \\ +ModernNCA (T+E) & 1292${}_{-34,+36}$ & 0.307 & 17.1 & 8.4 & 0 & 14.0\% & 6020.89 & 8.48 \\ +LightGBM (T) & 1292${}_{-34,+35}$ & 0.262 & 17.1 & 11.5 & 0 & 14.8\% & 691.46 & 0.26 \\ +XGBoost (T) & 1291${}_{-34,+30}$ & 0.181 & 17.2 & 9.5 & 1 & 15.1\% & 785.90 & 0.21 \\ +TabM (D) & 1283${}_{-34,+36}$ & 0.288 & 17.5 & 7.9 & 1 & 15.9\% & 11.60 & 0.15 \\ +EBM (T+E) & 1278${}_{-32,+29}$ & 0.186 & 17.8 & 7.9 & 0 & 16.8\% & 1142.31 & 0.24 \\ +TorchMLP (T+E) & 1249${}_{-29,+36}$ & 0.199 & 19.2 & 11.6 & 0 & 15.3\% & 2792.14 & 2.18 \\ +RealMLP (T) & 1232${}_{-29,+38}$ & 0.201 & 20.1 & 13.0 & 0 & 16.1\% & 5575.69 & 0.24 \\ +EBM (T) & 1230${}_{-38,+37}$ & 0.120 & 20.2 & 9.0 & 1 & 17.7\% & 1142.31 & 0.03 \\ +EBM (D) & 1206${}_{-33,+38}$ & 0.153 & 21.5 & 9.5 & 1 & 18.4\% & 5.57 & 0.07 \\ +FastaiMLP (T+E) & 1204${}_{-36,+42}$ & 0.230 & 21.6 & 11.9 & 0 & 18.7\% & 1350.04 & 6.57 \\ +ModernNCA (D) & 1194${}_{-31,+33}$ & 0.183 & 22.1 & 9.6 & 1 & 19.0\% & 13.67 & 0.34 \\ +TabDPT (D) & 1183${}_{-43,+34}$ & 0.258 & 22.7 & 5.5 & \textcolor{silver}{\textbf{3}} & 17.9\% & 28.08 & 8.76 \\ +RealMLP (D) & 1176${}_{-28,+43}$ & 0.083 & 22.9 & 10.8 & 1 & 17.1\% & 35.27 & 0.25 \\ +TorchMLP (T) & 1168${}_{-31,+40}$ & 0.086 & 23.3 & 17.2 & 0 & 17.3\% & 2792.14 & 0.13 \\ +ExtraTrees (T+E) & 1156${}_{-36,+31}$ & 0.102 & 24.1 & 15.4 & 0 & 20.0\% & 378.34 & 1.37 \\ +FastaiMLP (T) & 1140${}_{-33,+34}$ & 0.113 & 24.7 & 13.2 & 0 & 20.2\% & 1350.04 & 0.61 \\ +XGBoost (D) & 1134${}_{-32,+40}$ & 0.032 & 25.2 & 21.1 & 0 & 18.8\% & 2.66 & 0.17 \\ +ExtraTrees (T) & 1131${}_{-25,+36}$ & 0.072 & 25.2 & 13.4 & 0 & 20.3\% & 378.34 & 0.15 \\ +RandomForest (T+E) & 1111${}_{-35,+36}$ & 0.057 & 26.2 & 14.1 & 0 & 20.7\% & 527.27 & 1.16 \\ +LightGBM (D) & 1104${}_{-32,+34}$ & 0.030 & 26.7 & 23.2 & 0 & 19.8\% & 3.01 & 0.18 \\ +RandomForest (T) & 1068${}_{-38,+34}$ & 0.023 & 28.3 & 21.6 & 0 & 21.6\% & 527.27 & 0.13 \\ +TorchMLP (D) & 1010${}_{-28,+33}$ & 0.030 & 30.8 & 25.8 & 0 & 24.4\% & 8.93 & 0.17 \\ +FastaiMLP (D) & 1001${}_{-39,+36}$ & 0.019 & 31.2 & 26.7 & 0 & 24.2\% & 4.68 & 0.55 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 31.3 & 24.0 & 0 & 27.1\% & 0.44 & 0.06 \\ +Linear (T+E) & 948${}_{-34,+36}$ & 0.055 & 33.2 & 14.8 & 1 & 31.1\% & 89.12 & 0.20 \\ +ExtraTrees (D) & 948${}_{-34,+43}$ & 0.015 & 33.2 & 28.2 & 0 & 29.0\% & 0.36 & 0.07 \\ +Linear (T) & 928${}_{-39,+36}$ & 0.035 & 34.0 & 24.2 & 0 & 31.7\% & 89.12 & 0.08 \\ +Linear (D) & 894${}_{-36,+44}$ & 0.025 & 35.0 & 32.1 & 0 & 33.1\% & 2.43 & 0.10 \\ +KNN (T+E) & 747${}_{-45,+40}$ & 0.000 & 39.0 & 32.4 & 0 & 47.5\% & 3.50 & 0.16 \\ +KNN (T) & 672${}_{-50,+46}$ & 0.000 & 40.2 & 27.8 & 0 & 50.0\% & 3.50 & 0.03 \\ +KNN (D) & 380${}_{-82,+54}$ & 0.000 & 43.3 & 43.2 & 0 & 59.3\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabpfn-cls/tabarena_leaderboard.csv b/data/lite/tabpfn-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..92a212f0633c2f81c84f668f29903ffa52671362 --- /dev/null +++ b/data/lite/tabpfn-cls/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14624.10810300937,166.6936253951146,3837.093570770289,71.22922446033486,0.2999799624009034,0.36808629417652067,0.07554916766810474,0.0510069061361108,73712.17169915051,6326.444952096131,8.115384615384615,0.1749,8447.90291428566,59.9476752281189,3361.317280362242,51.668880171806535,0.21441671773452015,0.3610327130222525,0.03548854933740464,0.027745461982937078,32774.70384527536,4094.471326370238,5.5,6,3,1,16,1501.4,38.8,27.3,0.8345259391771019,0.37210235277542975 +AutoGluon 1.3 (4h),5811.540137483524,3.0737958137805643,3715.2599271484114,3.044362316385498,0.5599854226187498,0.5374717793951389,0.09973935537830267,0.056003751651180655,37983.87924037632,165.48830983683717,10.557692307692308,0.17751,5213.819254875183,2.0895845890045166,2085.7270446537464,2.161349128608518,0.5600808158121042,0.5370720772033579,0.06078825021132733,0.052199978656074855,30345.10100676535,118.3021811054546,8.5,1,1,0,24,1434.9,46.6,29.2,0.7777280858676208,0.1677722095709093 +REALMLP (tuned + ensemble),17853.666671056013,5.060677555891184,6069.468633023759,5.352972929535142,0.5983187605703805,0.5442978650575347,0.1181226678591906,0.06571886126067156,114488.59335090281,312.9299756303972,11.653846153846153,0.180765,10652.850141763687,4.7910391092300415,5575.693006637695,4.577857176513236,0.5696681015084324,0.5160105348256356,0.06341551053522204,0.040531971105630976,88499.22644289241,286.2007603461078,11.0,1,0,2,23,1409.1,35.1,32.7,0.7522361359570662,0.16703892033583626 +TABPFNV2 (tuned),14624.10810300937,5.890562671881455,3837.093570770289,2.9216612176257093,0.4562783530638654,0.5323385828743338,0.10174827499736666,0.07249836526386379,73712.17169915051,208.74723417223507,11.75,0.19132,8447.90291428566,0.5261704921722412,3361.317280362242,0.52729181819818,0.34109768962419973,0.5750212986748566,0.08017059655741715,0.04680159037032619,32774.70384527536,36.401677545743524,10.5,1,6,2,17,1406.3,36.8,36.9,0.75,0.2346239346014619 +TABM (tuned + ensemble),7183.65333878994,2.169150572556716,3570.712955098323,1.861754182648195,0.5166525238887076,0.5375782342693337,0.11533720579631103,0.07923336429395993,42989.483218042995,121.47528176372664,12.384615384615385,0.18310500000000002,6058.230099320412,1.6269241571426392,2899.0220559668596,1.460880880345262,0.47136645123500964,0.5091829772196896,0.060569889934732224,0.040297742432766875,37327.19260107514,109.27107326295308,9.0,1,0,2,23,1388.6,38.5,23.6,0.7352415026833632,0.16758336662017792 +GBM (tuned + ensemble),1386.6633568268555,2.647538212629465,855.5354588276103,2.6861264004396594,0.6806482110513491,0.6915952965141915,0.1377345967845975,0.08426211130205394,10078.423717398238,182.57724180654643,14.25,0.18084,1317.1929507255554,2.008246660232544,691.4641967411458,1.4553717527055694,0.7005535661954161,0.7687759906277638,0.07096598380907965,0.06332006129013433,9059.644744264673,90.72358893879789,14.0,0,0,1,25,1350.4,38.4,35.8,0.6918604651162791,0.08898620150852825 +TABPFNV2 (default),12.006092300781837,1.2122490681134737,5.3148066442099084,0.6434373636958165,0.5013642769155608,0.5985164446892546,0.11636281251306395,0.08655794673306556,74.57934257049592,50.66248833495304,15.0,0.194575,9.273962259292603,0.4255213737487793,4.283948847917442,0.5110342254172261,0.3771347187714357,0.749999609789962,0.07359963710195983,0.07175881685986511,62.366726819804335,28.80821414198816,13.5,3,2,2,19,1334.4,33.9,29.5,0.6744186046511628,0.2263587639687604 +TABM (tuned),7183.65333878994,0.2019361532651461,3570.712955098323,0.2168923191387062,0.6127855914191946,0.6015979357053903,0.12668145442804887,0.09163884859097308,42989.483218042995,13.062348029072668,15.153846153846153,0.18589499999999998,6058.230099320412,0.1839456558227539,2899.0220559668596,0.171251855223312,0.5915500043256848,0.608898387323711,0.07076230328752658,0.05588464476989181,37327.19260107514,9.17800659262771,11.5,0,2,2,22,1331.5,41.5,30.7,0.6708407871198568,0.13338822762296182 +CAT (tuned + ensemble),4861.749095384891,1.0365588940106905,3035.7630585976663,0.9271931106606893,0.728920075896456,0.6586722747505791,0.13137203388016178,0.08732478746102613,26561.38119334446,53.29220521181529,15.26923076923077,0.178855,3259.4502482414246,0.9432240724563599,1394.4481643221718,0.6180738220288715,0.7992439551459185,0.7086230805879283,0.06547094816637722,0.051124014208117546,20607.594190900367,47.175332427485074,14.5,0,1,1,24,1329.0,37.0,27.4,0.6681574239713775,0.10571294421823016 +MNCA (tuned),11104.727954295966,0.5286179322462815,5977.187221364461,0.4814995680859316,0.7448490525012423,0.6254269268193292,0.12195590153519999,0.09981792358357669,78433.1838326286,30.191390941699975,15.403846153846153,0.18580999999999998,11212.920736551285,0.4423438310623169,6020.892835479641,0.4537708228509102,0.8177816514691232,0.6675386576808539,0.08289705547642268,0.05461255954908243,67721.7850080898,25.373417518222958,14.0,2,0,1,23,1326.1,39.2,26.8,0.6650268336314848,0.16427089788095786 +XGB (tuned + ensemble),1868.2429302564035,2.459977333362286,1131.3403541498637,3.0908212073715786,0.7784275988369416,0.7199035203924502,0.1486241682315518,0.10152906173151074,11151.77336671448,133.9681639809692,16.0,0.184395,1441.2213016748428,1.317580223083496,785.8975953484313,1.2809013038285242,0.8316341855574652,0.7356064100282319,0.0929411488865951,0.054690456125919155,11226.485943771408,74.70529109142498,13.5,0,0,2,24,1314.2,28.3,28.0,0.6511627906976745,0.08975992178572184 +CAT (default),173.12264615755814,0.2530022859573364,152.19981148640542,0.1746769705615129,0.7721520925382083,0.7330908057220229,0.14409596218236434,0.09317729878661425,503.5985284620293,11.812145121206303,16.096153846153847,0.18153,12.185660362243652,0.2613861560821533,5.736216485552211,0.13945330874004275,0.8708469956136012,0.8336733935247667,0.08972393497982317,0.048694739189140704,101.89620933712393,7.710770185087604,18.25,1,0,0,25,1312.3,33.9,39.1,0.6489266547406082,0.10632669819168675 +CAT (tuned),4861.749095384891,0.10756405500265268,3035.7630585976663,0.11792367197674246,0.7533425274831995,0.7249538160567979,0.14044086628366612,0.0943615544793235,26561.38119334446,7.02054598025311,16.53846153846154,0.179485,3259.4502482414246,0.05912292003631592,1394.4481643221718,0.05275041900206211,0.8504867849422679,0.791074414481965,0.08259970760162288,0.05413571715045103,20607.594190900367,2.886943526422101,16.0,0,0,1,25,1304.1,30.2,35.5,0.6386404293381037,0.08070978014425713 +GBM (tuned),1386.6633568268555,0.6239433655372033,855.5354588276103,0.6651164927916418,0.737683616453633,0.7394663884062609,0.14768864875816456,0.09950734711225898,10078.423717398238,42.45419816933193,17.057692307692307,0.18125,1317.1929507255554,0.32417428493499756,691.4641967411458,0.2643910314838089,0.7826548575969626,0.7975693661177521,0.07901314149648692,0.06910038121186715,9059.644744264673,20.34411810847653,17.5,0,1,0,25,1291.8,34.5,33.7,0.6265652951699463,0.08667185975660585 +MNCA (tuned + ensemble),11104.727954295966,11.626816547833956,5977.187221364461,9.077822725159232,0.6933896363411076,0.6487475573068993,0.13992523616123162,0.11211871841613495,78433.1838326286,625.8156905319904,17.115384615384617,0.196755,11212.920736551285,9.569536089897156,6020.892835479641,8.47783782257851,0.722368050936999,0.6598703101341699,0.07839828824042583,0.06140957049904748,67721.7850080898,462.66839224080115,15.75,0,1,2,23,1292.4,35.8,33.6,0.6252236135957067,0.11906413304593146 +XGB (tuned),1868.2429302564035,0.6344939470291138,1131.3403541498637,0.845858557757576,0.8187393857188691,0.7534881250355465,0.15104001847209594,0.10532513671555101,11151.77336671448,35.07178353015141,17.192307692307693,0.18658,1441.2213016748428,0.2507840394973755,785.8975953484313,0.21279594317419753,0.9017943832788667,0.7297268207379917,0.10088305879986287,0.06269228828209442,11226.485943771408,15.348416723363933,15.5,1,0,0,25,1291.3,30.0,33.7,0.6234347048300537,0.10484293084036392 +TABM (default),26.77778566800631,0.20130640726823074,16.983704596070464,0.1991903141757489,0.7120650416184634,0.6923516563380268,0.15893030218385468,0.11297435774742692,212.38711719883958,11.49822311803021,17.53846153846154,0.18572,20.835053324699402,0.1561201810836792,11.598826273977398,0.14759862387674094,0.7417274447397321,0.7700731238711777,0.09071877675178053,0.06275561870896407,109.84095896574044,8.743598466674035,18.0,1,0,1,24,1282.8,35.4,33.8,0.6153846153846154,0.12718248968645465 +EBM (tuned + ensemble),2866.551095375648,0.3536289380146907,1716.6253639310917,0.31694700723366953,0.8144475987903695,0.7258303382715928,0.16832760766481616,0.12971565077834002,16005.580219570395,21.915829440477204,17.846153846153847,0.18934,1826.563945055008,0.30398499965667725,1142.3094021828929,0.2383204663966832,0.9368603191709515,0.8837415909105775,0.07492264885303435,0.04550198604731012,15335.47890204938,12.950523458443941,20.5,0,3,1,22,1278.3,28.6,31.2,0.6082289803220036,0.12613924248740332 +NN_TORCH (tuned + ensemble),8269.36125588417,3.6064914281551657,3548.397093003735,3.1069146452932506,0.8005534385996745,0.7705080916586386,0.1529691514160514,0.09729166486770355,54687.42205511106,206.78432079512532,19.153846153846153,0.186795,7021.069797754288,2.925987720489502,2792.139673756541,2.175346818589723,0.9508009219506811,0.9124525941492485,0.08120693152281688,0.06776685850668523,44532.50472025028,168.68500762383525,19.5,0,0,2,24,1249.2,36.0,28.4,0.5778175313059034,0.08657128307328764 +REALMLP (tuned),17853.666671056013,0.2349767226439256,6069.468633023759,0.2967994896112342,0.7992732991174807,0.7977993865865203,0.16124952982559976,0.1082766257578261,114488.59335090281,15.66992951032636,20.076923076923077,0.179705,10652.850141763687,0.22168636322021484,5575.693006637695,0.23576453293502733,0.895201963654964,0.9731977379559409,0.11192425937454437,0.08568734793187416,88499.22644289241,12.792606929662101,20.0,0,1,0,25,1231.8,37.4,28.6,0.556350626118068,0.07665354220614971 +EBM (tuned),2866.551095375648,0.04391795855302077,1716.6253639310917,0.042622423373582956,0.8796773591220505,0.7910593906123241,0.17682875196366907,0.13680041849059105,16005.580219570395,2.6200464262605005,20.21153846153846,0.19075999999999999,1826.563945055008,0.031216979026794434,1142.3094021828929,0.02993136666804911,1.0,1.0,0.08520888190451481,0.06958046400412875,15335.47890204938,1.3258183958395588,23.25,1,0,1,24,1229.9,36.9,37.8,0.5532200357781754,0.11052293589229499 +EBM (default),11.305762657752403,0.07217896901644193,6.708906519026521,0.08642030214738769,0.8468070324259693,0.8042920507016109,0.1835565890773308,0.13945315415243004,76.4056328740172,4.809914969274192,21.5,0.19205,8.478393912315369,0.060648202896118164,5.571264059656597,0.07129895504487138,1.0,1.0,0.10192934818149862,0.0536012172764834,66.99828845613939,3.601780880785588,24.25,1,0,0,25,1206.2,37.4,32.9,0.5232558139534884,0.10548819198142113 +FASTAI (tuned + ensemble),2663.3156125178702,9.71036501114185,1626.2333617037298,10.35375213848753,0.7698173487890432,0.7766239266765838,0.1868895729064691,0.11051193072704785,20608.72361153851,575.9084247062348,21.596153846153847,0.18179499999999998,2361.516105532646,9.277374625205994,1350.0449662692658,6.569572430045136,0.9823907390988809,0.9882257597451694,0.09835962628115286,0.07519988054335719,13001.178549428518,549.1440903510916,23.0,0,0,0,26,1203.9,41.1,36.0,0.5210196779964222,0.08398545799471152 +MNCA (default),29.761946293023918,0.45968284056736874,15.939524477151517,0.3666958147944243,0.8169283513779425,0.811254423026345,0.1897696514350087,0.10664147250973509,187.22763978272167,23.803143758716253,22.076923076923077,0.19357,23.457369804382324,0.36945486068725586,13.667502577653629,0.33894040253174296,0.9808195818002508,0.9565271309741105,0.13240284359564197,0.08931940966654422,154.11579909730304,19.118442709424343,20.5,1,1,0,24,1194.4,32.1,30.3,0.5098389982110912,0.10393125600187421 +TABDPT (default),72.55373939184042,21.63674608560709,32.1753170626944,27.14316288922542,0.7421770023997575,0.7379344820244905,0.17889973268661238,0.11787057214291427,513.8148563269098,1550.6457261531152,22.71153846153846,0.20388,69.82611036300659,21.28597331047058,28.076698775708525,8.760817737139146,1.0,1.0,0.10626890668528338,0.07851722429401914,429.0853998300959,1143.1582415263433,28.5,3,1,0,22,1182.9,33.4,42.1,0.4950805008944544,0.18081994397557372 +REALMLP (default),110.65054762363434,0.2512630499326266,36.51262336985926,0.304026477962315,0.9170988508971886,0.8163716271989994,0.17056141001660144,0.10645596269359855,716.9172586317429,16.106317028225142,22.903846153846153,0.179885,61.38514173030853,0.19377470016479492,35.26610472383891,0.25260632993704635,1.0,1.0,0.10526489760140278,0.08609150677290009,562.1682174661271,13.084740916081461,25.25,1,0,0,25,1175.9,42.7,27.2,0.490608228980322,0.09252393979779666 +NN_TORCH (tuned),8269.36125588417,0.1732868873156034,3548.397093003735,0.17387289280012472,0.9141063957909149,0.8319781576315332,0.17321260520075385,0.11516570994085543,54687.42205511106,10.378521059679809,23.26923076923077,0.184685,7021.069797754288,0.11897110939025879,2792.139673756541,0.13285542411599796,1.0,1.0,0.10523373712280287,0.07835117203069997,44532.50472025028,7.793728816499673,24.5,0,0,0,26,1168.4,39.7,30.8,0.4821109123434705,0.057995980576954605 +XT (tuned + ensemble),723.008487197069,1.6600552705618052,480.9309873883464,1.918436680526551,0.8984673764067701,0.8615918416720734,0.20042189537659844,0.15074330109339612,5917.229990900821,105.19348400260608,24.115384615384617,0.188425,702.5076073408127,1.3369704484939575,378.34316304246136,1.3734038445009942,1.0,1.0,0.12983254619362938,0.09230140454586463,3777.3315625688056,84.73782880976256,27.25,0,0,0,26,1155.5,30.5,35.4,0.462432915921288,0.06481340822013704 +FASTAI (tuned),2663.3156125178702,0.7872952864720271,1626.2333617037298,0.9244876829128756,0.8865326975991655,0.8353449333488617,0.2024783858100459,0.13557244771371685,20608.72361153851,52.86743788138416,24.73076923076923,0.18316,2361.516105532646,0.6897614002227783,1350.0449662692658,0.612778202858813,1.0,1.0,0.1341179613403088,0.08670903152513522,13001.178549428518,36.29076458115673,26.5,0,1,0,25,1140.5,33.4,32.5,0.4481216457960644,0.07589397427831245 +XGB (default),5.406896040989802,0.29052331814399135,3.2395402815762044,0.40571388124316665,0.967663807523489,0.9116618321036818,0.18812949818613264,0.1554497475997079,36.135643016538445,18.079059576557942,25.173076923076923,0.18918000000000001,4.358284831047058,0.2170184850692749,2.659926162500852,0.1702856674842303,1.0,1.0,0.13057372192600636,0.08590682453074228,32.63170368298432,10.069746654781287,23.25,0,0,0,26,1134.3,39.8,31.5,0.43783542039355994,0.047340744916115174 +XT (tuned),723.008487197069,0.16853698400350717,480.9309873883464,0.21789332007450063,0.9275522820640911,0.8502727907926151,0.20316160721865117,0.15357661687349197,5917.229990900821,11.278027514695932,25.192307692307693,0.189205,702.5076073408127,0.14003515243530273,378.34316304246136,0.15364019245636173,1.0,1.0,0.14877588435193756,0.09210625451126611,3777.3315625688056,8.882695855316769,29.0,0,1,0,25,1131.3,35.2,24.6,0.43738819320214667,0.07435308360156041 +RF (tuned + ensemble),911.769657428448,1.55073488675631,531.6502580092293,1.7081982422422,0.9428934399339288,0.8976007641768068,0.20699357076646183,0.17971889690964657,6577.080710276883,100.32600883678194,26.153846153846153,0.18825,798.9582680463791,1.2464795112609863,527.2734284753022,1.1554577826899797,1.0,1.0,0.15587829515575163,0.09511037806823772,5029.2193365439925,80.30745598154598,29.0,0,1,0,25,1111.1,35.8,34.2,0.4150268336314848,0.07070953250453581 +GBM (default),5.678787359824548,0.359198652780973,3.357556396858338,0.23593366680622332,0.9701181894525962,0.9213091643891652,0.19772682228406555,0.15808701393323302,43.778662055665606,16.224185451530264,26.673076923076923,0.19459,5.089803218841553,0.34710729122161865,3.0120719035705887,0.17665561056592852,1.0,1.0,0.14755545692480904,0.08991864451874558,30.743592100025186,14.007062707115537,27.0,0,0,0,26,1104.4,33.7,31.4,0.4029516994633274,0.04311426672898806 +RF (tuned),911.769657428448,0.15133640399345985,531.6502580092293,0.19063529883897276,0.9772506936041817,0.9353279698897032,0.21613457314836487,0.18735604671013067,6577.080710276883,9.67186030508054,28.326923076923077,0.18775,798.9582680463791,0.13221728801727295,527.2734284753022,0.1275233993660041,1.0,1.0,0.16269556435701765,0.11206528012392108,5029.2193365439925,7.908293435451007,30.5,0,0,0,26,1068.5,33.3,37.5,0.3644901610017889,0.04626235230028146 +NN_TORCH (default),23.867143649321335,0.24185629991384652,13.87033788200038,0.29581404152824126,0.9698948249425869,0.9409565710721252,0.24388256911099576,0.1775617977275435,159.99336582070887,16.359564345871572,30.78846153846154,0.19833499999999998,19.20789933204651,0.18236374855041504,8.931817801759234,0.17241159209016133,1.0,1.0,0.16226576016463984,0.12408070043758834,162.04458259531137,9.195674192084631,32.0,0,0,0,26,1010.4,32.8,27.9,0.30724508050089444,0.03869750422106356 +FASTAI (default),10.92040595641503,0.607048539014963,5.789474679574097,0.6364629667404095,0.9807465319111015,0.9291317753712203,0.24187630841624147,0.18104975558606934,78.41470283692,37.58331718018816,31.23076923076923,0.19146000000000002,9.578287720680237,0.6503585577011108,4.679521651364521,0.5473457672031852,1.0,1.0,0.15595107304948763,0.12788596621138626,61.09577848128312,35.985779724786894,34.0,0,0,0,26,1000.9,35.8,38.7,0.29695885509839,0.03745948746709997 +RF (default),1.1224354964036207,0.07520443659562331,0.5160590091828214,0.08504536046248315,0.9951894125278877,0.9602396143548918,0.2706685583158233,0.2595837889755988,7.699488729035172,4.8652515281017985,31.28846153846154,0.213715,0.8405723571777344,0.06134629249572754,0.44070860935344924,0.06492877134601137,1.0,1.0,0.22217921675183,0.1387641342747614,5.263965350091326,4.042169058695611,34.75,0,0,0,26,1000.0,0.0,0.0,0.2956171735241503,0.041699517521788895 +XT (default),0.7937821791722224,0.076584870998676,0.45161836217627827,0.08730195757590078,0.9851460583683391,0.9662362130243412,0.2902582047406479,0.2915786211275019,5.4850410746785805,4.998284751448414,33.19230769230769,0.22224,0.7121238708496094,0.07150018215179443,0.3597485800421042,0.06793435113493712,1.0,1.0,0.21784002712442435,0.1545266336174043,3.777771179368423,4.378376796908203,37.0,0,0,0,26,948.1,42.1,33.8,0.2513416815742397,0.03547120611186911 +LR (tuned + ensemble),175.34701893879816,0.4311328667860765,114.81546568819135,0.35152858974096185,0.9447815704031849,0.9596772797312974,0.31052316067175334,0.2734667875092025,1394.89696800973,28.50704727597215,33.25,0.219115,157.3445065021515,0.20991504192352295,89.11614089252916,0.1966045265631563,1.0,1.0,0.260779813420035,0.18354757952857997,1033.8951605639145,9.91761243845308,36.5,1,0,0,25,948.5,35.1,33.5,0.25,0.06746825184151911 +LR (tuned),175.34701893879816,0.13578540545243484,114.81546568819135,0.11644774040693313,0.9645430999513104,0.9689368903722598,0.3172191871229454,0.2852012115896724,1394.89696800973,6.834352988282761,33.96153846153846,0.21990500000000002,157.3445065021515,0.06698489189147949,89.11614089252916,0.08340922333653941,1.0,1.0,0.2839064101424172,0.1973963943493853,1033.8951605639145,4.726037355894221,37.75,0,0,1,25,928.1,35.3,38.1,0.2334525939177102,0.041270458510439895 +LR (default),4.7979896068573,0.14980417031508225,2.784461238483506,0.15310137710602748,0.9745259999639975,0.9792712555050815,0.3305357301029744,0.3245585559032769,36.357934158034695,9.005429323015887,35.01923076923077,0.227395,4.980341196060181,0.0988316535949707,2.4349667711867333,0.10027059217560387,1.0,1.0,0.283906387231373,0.21846589850078024,26.549049340848278,5.855361941404224,38.0,0,0,0,26,894.1,44.0,35.6,0.20885509838998212,0.031122186287953883 +KNN (tuned + ensemble),16.958975911140442,0.2818152537712684,7.194345160625263,0.20399826229470497,1.0,0.9800174772017892,0.47452726909617077,0.5825736639676171,56.76087640724927,13.193730328032911,38.96153846153846,0.297435,6.915013313293457,0.11628735065460205,3.5025132784274384,0.1592449370444793,1.0,1.0,0.40572679873481093,0.6729918285122782,52.78507887384432,10.328630925855668,42.0,0,0,0,26,746.8,39.1,44.8,0.11717352415026834,0.030832961959595378 +KNN (tuned),16.958975911140442,0.05680100734417255,7.194345160625263,0.04235176068048229,1.0,0.9778347768850626,0.4999289543593624,0.6437007115269255,56.76087640724927,2.623837177847182,40.25,0.301285,6.915013313293457,0.03549385070800781,3.5025132784274384,0.03322185201916636,1.0,1.0,0.5143946348760557,0.700577928371497,52.78507887384432,1.9257403822827532,43.0,0,0,1,25,672.1,45.6,49.7,0.0872093023255814,0.035937909661402645 +KNN (default),0.30434812949253964,0.03149266426379864,0.14343197725298135,0.03163346219935531,1.0,1.0,0.5930159433676823,0.94808325358393,1.002692426657382,1.6405188884998085,43.26923076923077,0.34604,0.17687928676605225,0.01871800422668457,0.0745054444441075,0.021103033819676938,1.0,1.0,0.6151111085811185,1.0,1.0,1.0188814221661033,44.0,0,0,0,26,380.4,53.9,81.7,0.01699463327370304,0.023159675362112067 diff --git a/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.pdf b/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8211e352713b379cf3d86693384562f2d82a8887 Binary files /dev/null and b/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.png.zip b/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c0548fd33e9fc1629dd65491542324387b1b97eb --- /dev/null +++ b/data/lite/tabpfn-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4de8849fd2a503912b139318e210f7f6b400766abd9b62db02078904f750a579 +size 134767 diff --git a/data/lite/tabpfn-reg/figures/critical-diagram.pdf b/data/lite/tabpfn-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3c3dac348f15e3dfcd2bd8046b88af8a5763ba0d Binary files /dev/null and b/data/lite/tabpfn-reg/figures/critical-diagram.pdf differ diff --git a/data/lite/tabpfn-reg/figures/critical-diagram.png.zip b/data/lite/tabpfn-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c65d44c30e148888564e1a80ab7144a04860a410 --- /dev/null +++ b/data/lite/tabpfn-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ec83f43ebb5f6213a4b3f92e0a5d5f645a9b348bda4f766515094c51a222a574 +size 310561 diff --git a/data/lite/tabpfn-reg/leaderboard.tex b/data/lite/tabpfn-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..99c33cc0a82eec6afeff9f1fed04dd3991c24c8c --- /dev/null +++ b/data/lite/tabpfn-reg/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1748${}_{-74,+96}$}} & 0.691 & \textcolor{gold}{\textbf{6.3}} & 4.3 & 0 & 4.9\% & 4226.02 & 5.16 \\ +TabPFNv2 (T+E) & \textcolor{silver}{\textbf{1745${}_{-94,+79}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{silver}{\textbf{6.6}} & \textcolor{gold}{\textbf{1.4}} & \textcolor{gold}{\textbf{5}} & \textcolor{gold}{\textbf{0.7\%}} & 6601.21 & 73.56 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1739${}_{-74,+81}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{silver}{\textbf{6.6}} & 5.1 & 0 & \textcolor{bronze}{\textbf{3.9\%}} & 7718.29 & 4.52 \\ +TabDPT (D) & 1680${}_{-60,+121}$ & \textcolor{gold}{\textbf{0.787}} & 8.1 & \textcolor{bronze}{\textbf{4.0}} & \textcolor{silver}{\textbf{1}} & 4.5\% & 36.28 & 33.43 \\ +CatBoost (T+E) & 1641${}_{-68,+86}$ & 0.605 & 9.4 & 8.6 & 0 & 6.3\% & 3301.07 & 1.32 \\ +LightGBM (T+E) & 1626${}_{-67,+68}$ & 0.515 & 10.1 & 8.7 & 0 & 7.5\% & 997.12 & 3.79 \\ +CatBoost (T) & 1593${}_{-57,+101}$ & 0.572 & 10.9 & 6.1 & 0 & 6.4\% & 3301.07 & 0.13 \\ +TabPFNv2 (T) & 1589${}_{-79,+76}$ & 0.524 & 11.1 & \textcolor{silver}{\textbf{3.9}} & 0 & \textcolor{silver}{\textbf{3.6\%}} & 6601.21 & 0.59 \\ +TabM (T+E) & 1557${}_{-86,+62}$ & 0.487 & 12.6 & 9.5 & 0 & 4.8\% & 4228.53 & 1.45 \\ +ModernNCA (T+E) & 1526${}_{-67,+82}$ & 0.371 & 13.3 & 6.2 & 0 & 9.4\% & 9360.97 & 7.66 \\ +XGBoost (T+E) & 1498${}_{-65,+58}$ & 0.453 & 14.9 & 13.9 & 0 & 8.0\% & 911.22 & 3.53 \\ +XGBoost (T) & 1498${}_{-67,+60}$ & 0.407 & 14.4 & 13.4 & 0 & 8.0\% & 911.22 & 0.53 \\ +TabPFNv2 (D) & 1484${}_{-89,+88}$ & 0.424 & 15.3 & \textcolor{bronze}{\textbf{4.0}} & 0 & 5.7\% & 8.97 & 0.84 \\ +TabM (T) & 1458${}_{-70,+65}$ & 0.361 & 16.0 & 12.5 & 0 & 5.6\% & 4228.53 & 0.17 \\ +LightGBM (T) & 1455${}_{-60,+80}$ & 0.381 & 16.0 & 14.0 & 0 & 8.8\% & 997.12 & 0.64 \\ +CatBoost (D) & 1453${}_{-74,+61}$ & 0.376 & 16.1 & 10.9 & 0 & 9.7\% & 9.39 & 0.10 \\ +RealMLP (T) & 1449${}_{-64,+64}$ & 0.295 & 16.6 & 14.9 & 0 & 6.6\% & 7718.29 & 0.42 \\ +ModernNCA (D) & 1428${}_{-74,+97}$ & 0.061 & 17.3 & 13.7 & 0 & 11.7\% & 27.26 & 0.29 \\ +ExtraTrees (T+E) & 1406${}_{-64,+83}$ & 0.241 & 18.0 & 11.4 & 0 & 13.2\% & 613.95 & 0.95 \\ +ExtraTrees (T) & 1405${}_{-66,+68}$ & 0.210 & 18.3 & 10.4 & 0 & 13.5\% & 613.95 & 0.13 \\ +RealMLP (D) & 1353${}_{-77,+92}$ & 0.157 & 20.1 & 12.7 & 0 & 8.5\% & 41.85 & 0.30 \\ +TabM (D) & 1348${}_{-82,+63}$ & 0.319 & 20.6 & 17.3 & 0 & 7.7\% & 17.97 & 0.15 \\ +TorchMLP (T+E) & 1317${}_{-81,+86}$ & 0.136 & 21.9 & 20.3 & 0 & 9.6\% & 5937.36 & 1.41 \\ +ModernNCA (T) & 1262${}_{-70,+79}$ & 0.125 & 23.7 & 21.2 & 0 & 12.7\% & 9360.97 & 0.52 \\ +TorchMLP (T) & 1244${}_{-81,+71}$ & 0.139 & 24.6 & 21.0 & 0 & 10.0\% & 5937.36 & 0.12 \\ +RandomForest (T+E) & 1236${}_{-86,+88}$ & 0.122 & 25.0 & 21.4 & 0 & 15.2\% & 628.51 & 0.81 \\ +EBM (T+E) & 1223${}_{-82,+67}$ & 0.186 & 25.4 & 12.7 & 0 & 15.2\% & 2317.66 & 0.14 \\ +ExtraTrees (D) & 1185${}_{-87,+70}$ & 0.085 & 27.0 & 23.8 & 0 & 15.3\% & 0.43 & 0.12 \\ +LightGBM (D) & 1183${}_{-65,+69}$ & 0.016 & 27.0 & 26.5 & 0 & 12.3\% & 4.41 & 0.36 \\ +RandomForest (T) & 1170${}_{-71,+82}$ & 0.087 & 27.4 & 24.0 & 0 & 16.3\% & 628.51 & 0.15 \\ +FastaiMLP (T+E) & 1154${}_{-80,+67}$ & 0.000 & 27.9 & 27.5 & 0 & 14.2\% & 2163.39 & 7.62 \\ +EBM (T) & 1154${}_{-70,+86}$ & 0.151 & 27.7 & 5.9 & \textcolor{silver}{\textbf{1}} & 16.2\% & 2317.66 & 0.02 \\ +XGBoost (D) & 1128${}_{-81,+84}$ & 0.000 & 29.0 & 28.2 & 0 & 13.5\% & 3.97 & 0.24 \\ +FastaiMLP (T) & 1116${}_{-81,+62}$ & 0.000 & 29.4 & 29.0 & 0 & 14.7\% & 2163.39 & 0.71 \\ +EBM (D) & 1073${}_{-79,+80}$ & 0.074 & 30.6 & 28.5 & 0 & 16.9\% & 9.65 & 0.06 \\ +TorchMLP (D) & 1049${}_{-84,+70}$ & 0.010 & 31.3 & 29.1 & 0 & 14.2\% & 20.50 & 0.13 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 32.6 & 31.7 & 0 & 18.0\% & 0.67 & 0.12 \\ +FastaiMLP (D) & 978${}_{-70,+72}$ & 0.000 & 33.0 & 32.6 & 0 & 19.9\% & 6.88 & 0.45 \\ +Linear (T) & 457${}_{-153,+157}$ & 0.000 & 40.3 & 40.2 & 0 & 35.4\% & 154.23 & 0.08 \\ +Linear (T+E) & 455${}_{-135,+133}$ & 0.000 & 40.2 & 40.1 & 0 & 35.4\% & 154.23 & 0.16 \\ +KNN (T+E) & 381${}_{-144,+134}$ & 0.000 & 40.9 & 40.8 & 0 & 41.0\% & 2.26 & 0.15 \\ +Linear (D) & 320${}_{-143,+135}$ & 0.000 & 41.5 & 41.4 & 0 & 37.2\% & 3.09 & 0.09 \\ +KNN (T) & 282${}_{-167,+114}$ & 0.000 & 41.7 & 41.7 & 0 & 41.6\% & 2.26 & 0.03 \\ +KNN (D) & 21${}_{-227,+181}$ & 0.000 & 43.4 & 43.4 & 0 & 45.3\% & 0.05 & 0.03 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabpfn-reg/tabarena_leaderboard.csv b/data/lite/tabpfn-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..14983bb513a1372829296b8d291cbf6adc29db0a --- /dev/null +++ b/data/lite/tabpfn-reg/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),6507.599508047104,9.172560078757149,4878.0794801313095,6.788904878407515,0.3085910704411772,0.37549473479505696,0.0486082452707653,0.04602529164641145,107683.46631145268,541.3270644129536,6.285714285714286,4.10013,6398.424601554871,2.726226568222046,4226.0222111993835,5.158970598131418,0.23133371369265832,0.28724492162524173,0.02087460906626326,0.03128273627544052,97437.24755684618,393.2261425771175,5.0,0,1,1,5,1748.4,95.4,73.8,0.8770764119601329,0.23509585652442794 +REALMLP (tuned + ensemble),17294.968933514185,3.506439651761736,7783.542231376436,5.804296668854893,0.24215826947341315,0.392343081586603,0.038845120274675446,0.04229126594047559,213038.26238584044,318.83634333585195,6.571428571428571,4.1572,7049.241462230682,3.1589651107788086,7718.289864704748,4.516099027530876,0.1939333280527935,0.370923207918769,0.02624438674143237,0.040913538766118675,158412.06608302437,260.1484504479807,6.0,0,0,2,5,1739.3,81.0,73.2,0.8704318936877077,0.1967032967032967 +TABPFNV2 (tuned + ensemble),13265.934764044625,116.34418719155448,6406.189740569957,86.56063966774829,0.2415939115786189,0.22530714141967798,0.007012905956065296,0.011864420363929354,177767.73115525587,6824.09057332562,6.571428571428571,3.91474,7449.2451322078705,31.52299928665161,6601.20821444942,73.56061810307183,0.06610997682360657,0.0,0.0,0.0,161815.97140652352,4111.6099760549805,1.0,5,0,0,2,1745.2,78.2,93.7,0.8704318936877077,0.7289285714285715 +TABDPT (default),58.233478920800344,21.44670605659485,36.772525520044425,34.985783087632136,0.21328360616453232,0.41695813127194875,0.044633371588008654,0.04493187524163665,864.7561387477256,2038.6761504521721,8.142857142857142,4.24006,45.445554971694946,18.623436212539673,36.28271871618091,33.43014439125232,0.18149452492987356,0.49769575805604005,0.01506955690923395,0.0493188827159149,780.9167218453919,1672.754347061846,7.0,1,0,0,6,1680.4,120.6,59.3,0.8338870431893688,0.24873194891991884 +CAT (tuned + ensemble),10219.439358098167,1.2273011548178536,6237.092384383964,1.3764643405546468,0.3954429862751284,0.5225342283977304,0.06330676638407992,0.06060158351003724,151904.27519951403,99.66578971371487,9.428571428571429,4.34931,10386.55150604248,0.8227088451385498,3301.0682722746046,1.3169555135467037,0.36849553490903875,0.5320058320162349,0.02223613591863527,0.06198224787320109,136008.98155550173,66.23708934234935,10.0,0,0,0,7,1640.8,85.7,67.8,0.8039867109634552,0.11581275866990152 +GBM (tuned + ensemble),1519.5935083457402,15.220876693725586,1075.2207780922756,8.071953392174887,0.48543215706848314,0.557311976668291,0.07463717866296053,0.06555042697877127,24050.082073960144,732.0577620840215,10.142857142857142,4.23184,1280.6412847042084,2.1158533096313477,997.1187061258496,3.7868055514991283,0.3027896779514977,0.5076577280238057,0.021001080160577668,0.053508861901668286,21944.50900109299,305.1869734172427,8.0,0,0,0,7,1626.5,67.7,66.1,0.7873754152823921,0.1153986155865855 +CAT (tuned),10219.439358098167,0.20652290752955846,6237.092384383964,0.17520631671032216,0.42772839942623186,0.5533378290896562,0.06381219816795006,0.0719249798648832,151904.27519951403,14.407039225588733,10.857142857142858,4.33359,10386.55150604248,0.06416535377502441,3301.0682722746046,0.12532295659184456,0.5120020111687084,0.5261838554238772,0.03215961957864544,0.06130394854912813,136008.98155550173,8.436775680044018,10.0,0,1,0,6,1592.9,100.5,56.9,0.770764119601329,0.16386800334168755 +TABPFNV2 (tuned),13265.934764044625,6.892481701714652,6406.189740569957,3.565075900288693,0.4760165239054399,0.4686126452622397,0.03576915723256963,0.05812772216726451,177767.73115525587,371.838691600217,11.142857142857142,4.06636,7449.2451322078705,0.3009166717529297,6601.20821444942,0.5877278745174408,0.3837541852536315,0.2739588480569308,0.037284762407548744,0.02674626900933208,161815.97140652352,33.800294797494224,4.0,0,2,1,4,1588.8,75.8,78.1,0.7641196013289037,0.25375795479482116 +TABM (tuned + ensemble),10587.073945794788,1.1654579980032784,6469.982531368069,1.4219714334130276,0.5128718562532983,0.5637799557572225,0.04752740423249023,0.07247049692594254,159898.94570827132,89.13274144984146,12.571428571428571,4.37433,7112.7270860672,0.7802078723907471,4228.525932170142,1.4474226149502178,0.35916463746804045,0.5514632052105218,0.04867875413017375,0.06046439799308363,130257.5121002907,89.7509542969153,11.0,0,0,0,7,1557.2,61.6,85.9,0.7308970099667774,0.1049611763897478 +MNCA (tuned + ensemble),11347.046411412102,5.150112356458392,8870.126225994847,7.01101400475912,0.629033079050954,0.6047277718923637,0.09420521732745024,0.08630788018224085,198709.11442978613,432.41908111150343,13.285714285714286,5.17584,9893.872790813446,3.8368866443634033,9360.97195459853,7.658456375974857,0.5415267425434339,0.7216078202690936,0.028945351266741137,0.04011651282919255,219316.22015007408,507.3929797993456,13.0,0,1,0,6,1525.9,81.7,66.2,0.7142857142857143,0.16165797422647987 +XGB (tuned),1964.623684678759,0.8363437311989921,1258.1348891579723,1.0162165524968663,0.5931661199323147,0.7041657661531696,0.07964264245312702,0.07605759147806708,29068.33225029261,76.75307718831554,14.428571428571429,4.27126,1646.6080114841461,0.44248437881469727,911.2186578282138,0.526588345238556,0.6620816436761664,0.6725769533673283,0.028796944630651078,0.066243759828395,28408.724850613697,33.71919899313387,14.0,0,0,0,7,1498.0,59.4,66.2,0.6877076411960132,0.0748299319727891 +XGB (tuned + ensemble),1964.623684678759,5.225094931466239,1258.1348891579723,6.231431906809619,0.5469310959609862,0.7006561259920765,0.080152008776187,0.07682133754513054,29068.33225029261,469.8791112336729,14.857142857142858,4.28931,1646.6080114841461,2.946662425994873,911.2186578282138,3.5316545373471357,0.5452119463463313,0.7076179193239929,0.026750868930337512,0.07131817083383857,28408.724850613697,165.90428541165622,15.0,0,0,0,7,1498.3,57.2,64.7,0.6777408637873754,0.07177332471450118 +TABPFNV2 (default),10.536325999668666,0.6483856269291469,8.132158828132207,0.7522934493235269,0.5759929931993246,0.5072415769751218,0.057203539607642365,0.09660218627950926,179.00806062175934,48.22880991521019,15.285714285714286,3.96968,9.203422546386719,0.44887351989746094,8.969435810057584,0.8365089530186938,0.4230930305474229,0.2813323772445926,0.05552265207828644,0.034900636525002166,193.52031139495512,52.107250060845345,3.0,0,2,2,3,1483.7,87.2,88.6,0.6677740863787376,0.252437641723356 +TABM (tuned),10587.073945794788,0.15967791421072824,6469.982531368069,0.17435700805504006,0.6392492711824916,0.6999695665076091,0.05563034885495726,0.08940263902914376,159898.94570827132,12.098100035047194,16.0,4.38922,7112.7270860672,0.1314229965209961,4228.525932170142,0.17298539846653752,0.4652267330424026,0.744542337736779,0.05191534338451931,0.08006597714469382,130257.5121002907,9.015021272319185,15.0,0,0,0,7,1457.7,64.7,69.6,0.6511627906976745,0.08012166405023548 +GBM (tuned),1519.5935083457402,1.4627540792737688,1075.2207780922756,1.050105430857932,0.6186778290078211,0.7683445449024502,0.08826070516525375,0.09138656595801721,24050.082073960144,90.68047060933284,16.0,4.36733,1280.6412847042084,0.19193363189697266,997.1187061258496,0.6355418274734194,0.5552965190223861,0.7809690118769635,0.03499422009754971,0.07637313803793273,21944.50900109299,25.03430046335168,15.0,0,0,0,7,1455.1,79.1,60.0,0.6511627906976745,0.07125493554064984 +CAT (default),30.187910761151993,0.21431371143886022,21.23474421835824,0.22233607116297832,0.6237900244125646,0.7828506352265132,0.09689377152933412,0.09771959149576696,487.8800741332821,18.478250211890124,16.142857142857142,4.47999,43.05822467803955,0.04317522048950195,9.390118294059649,0.10484877571411665,0.5847419575275037,0.8647501675552545,0.045915518799006794,0.11727889352322712,318.4902885443583,6.227518140238661,18.0,0,0,0,7,1452.6,61.0,73.2,0.6478405315614618,0.09190658109162811 +REALMLP (tuned),17294.968933514185,0.1830120086669922,7783.542231376436,0.3259796662543578,0.7047967586156892,0.7455274593429628,0.06611928626088935,0.08706588082885992,213038.26238584044,18.10152693439293,16.571428571428573,4.49659,7049.241462230682,0.18346905708312988,7718.289864704748,0.41665478856739646,0.7895709633169843,0.888035701857616,0.03133979505568851,0.06965496351407507,158412.06608302437,16.898709432908465,17.0,0,0,0,7,1449.0,63.9,63.9,0.6378737541528239,0.06694456730060445 +MNCA (default),32.55938308579581,0.19486018589564733,24.957505841642334,0.25196370179633515,0.9394939857609429,0.7698114045810972,0.1167268604145739,0.1003795869866352,579.4685759941773,15.594763357138524,17.285714285714285,5.11399,27.96918511390686,0.14082908630371094,27.260414341039827,0.28976407064988385,1.0,0.8968599174854596,0.03164135806644719,0.10122870662762316,549.2641916961603,14.335706179717322,18.0,0,0,0,7,1427.5,96.4,73.5,0.6212624584717608,0.07283386960405594 +XT (tuned + ensemble),653.1345855508532,1.3570618970053536,577.6942803326062,1.4658800281842608,0.7590185126625372,0.7362657403903793,0.13182107507552596,0.10321816124521692,11988.325985095387,91.31150878008751,18.0,5.19051,619.4238421916962,1.1150686740875244,613.9510258467136,0.9522022685938181,1.0,0.8933306559798235,0.019723175236079915,0.0969336948409988,13486.685259241041,88.45578138045335,18.0,0,0,0,7,1405.8,82.8,63.4,0.6046511627906976,0.08802681992337165 +XT (tuned),653.1345855508532,0.19052549770900182,577.6942803326062,0.20596209279824323,0.7900820708578837,0.7050663135559584,0.13531068870954055,0.10206565880892562,11988.325985095387,12.425140461101606,18.285714285714285,5.21222,619.4238421916962,0.12293267250061035,613.9510258467136,0.13470561454520408,1.0,0.8750587070440755,0.019404407055781392,0.07760478346112754,13486.685259241041,12.271457921927672,16.0,0,0,0,7,1405.3,67.7,65.4,0.5980066445182725,0.0957717275170248 +REALMLP (default),85.97223772321429,0.1575484275817871,39.12093352642527,0.2743856804074748,0.8426262409401332,0.811431445405329,0.08492984112267965,0.09970894106924409,1070.0293170023945,15.38773372916076,20.142857142857142,4.72117,38.86532187461853,0.15464305877685547,41.84851132799618,0.30457759331800266,0.9775778014564183,1.0,0.05630051064737385,0.09045562976994424,871.512756220395,17.475355288117672,23.0,0,0,0,7,1352.9,91.2,76.2,0.5548172757475083,0.07870035761340109 +TABM (default),44.222292866025654,0.09508374759129115,33.52655463742946,0.14459166718033462,0.6807517607831785,0.8006243600538642,0.07746676233499121,0.11907088719148319,799.8890873088621,8.677317571690375,20.571428571428573,4.49975,33.95261740684509,0.09252333641052246,17.97321997410923,0.1450319163846654,0.6869868540300672,0.8949833068709946,0.05541802374036453,0.09871836929846278,614.4361778763692,5.7128774450352955,21.0,0,0,0,7,1348.3,62.2,81.5,0.5448504983388704,0.05791081251910745 +NN_TORCH (tuned + ensemble),12457.980165379387,1.2596015930175781,7008.335221238185,1.51976815453829,0.8644212313341298,0.8821930899105708,0.09635544815863366,0.13054872917279642,168590.23153378774,91.87300057791074,21.857142857142858,4.45415,7507.96986413002,0.7180907726287842,5937.361191915086,1.4061158013450725,1.0,0.9593685963016786,0.08724572872591152,0.09102294293315928,150677.82359954235,86.08980937276488,21.0,0,0,0,7,1317.1,85.1,80.9,0.5149501661129569,0.049260037598247694 +MNCA (tuned),11347.046411412102,0.3390509401048933,8870.126225994847,0.4537320246881022,0.8745416984197177,0.8809824771089103,0.12651248212292465,0.15226349694705085,198709.11442978613,28.22969633543519,23.714285714285715,5.75005,9893.872790813446,0.2758302688598633,9360.97195459853,0.5220087902191429,1.0,1.0,0.04948509220058683,0.10805583782920748,219316.22015007408,25.82571615254995,23.0,0,0,0,7,1262.4,78.9,69.6,0.4717607973421927,0.04720915415791192 +NN_TORCH (tuned),12457.980165379387,0.08400774002075195,7008.335221238185,0.11080666811510796,0.8608314951688506,0.8774455871830203,0.09957021763907885,0.15815621944343286,168590.23153378774,6.839931733747739,24.571428571428573,4.35043,7507.96986413002,0.06095480918884277,5937.361191915086,0.11905236169695854,1.0,1.0,0.09669761975708657,0.07439215953480095,150677.82359954235,6.913734822466806,26.0,0,0,0,7,1243.8,70.9,80.6,0.45182724252491696,0.04772219453276936 +RF (tuned + ensemble),801.2955491883414,1.11563321522304,613.3607596500245,1.26624268506503,0.877625154030774,0.8717206280423674,0.15150038451410816,0.13535733856884374,13220.038476594822,74.24085143974197,25.0,5.35575,630.387446641922,0.864675760269165,628.5083029798404,0.811527051576754,1.0,1.0,0.04140035298054934,0.16236919097501226,13750.799241897716,73.76895458081503,29.0,0,0,0,7,1235.9,87.3,85.2,0.4418604651162791,0.04683025041397519 +EBM (tuned + ensemble),5561.30316393716,0.11157175472804479,3175.063314528853,0.13502700552021676,0.8141909618196094,0.8896216728438898,0.15236144498178752,0.1898206774797642,73928.60900997189,8.51769019764669,25.428571428571427,4.4992,4016.161784172058,0.0706477165222168,2317.663761142634,0.13798382133245468,1.0,1.0,0.11314433759685272,0.19494069658306334,53673.002392900955,8.013142594445496,30.0,0,0,1,6,1223.0,66.2,82.0,0.4318936877076412,0.07850231001491505 +GBM (default),7.295478275844029,0.6803168909890311,5.774057228808984,0.7873917849760669,0.9842293100404953,0.998266642271873,0.12276167672393257,0.1375507325867256,123.55828625698926,63.969416972987915,27.0,4.60997,5.699137926101685,0.33568358421325684,4.411330137552167,0.3631429914514132,1.0,1.0,0.05861977343818392,0.12652549891600978,89.74197938321082,41.091864073072564,26.0,0,0,0,7,1183.2,68.3,64.9,0.3953488372093023,0.037689489160077395 +XT (default),0.6613446303776332,0.07277822494506836,0.42786099680297607,0.10136865529358885,0.9145279857275386,0.9203781708634962,0.15302309586297927,0.14325676648751842,9.71399832220294,6.093015438403495,27.0,5.21222,0.43534398078918457,0.059967994689941406,0.42798109873088297,0.1173228956758976,1.0,1.0,0.07346763789813626,0.16206852516277276,9.783143310259103,5.904437603010231,30.5,0,0,0,7,1185.2,69.7,86.7,0.3953488372093023,0.04208640225911888 +RF (tuned),801.2955491883414,0.1265991755894252,613.3607596500245,0.187274000354846,0.9129471887632264,0.8942717915846629,0.16343200728271487,0.15127595329353374,13220.038476594822,10.741823760393732,27.357142857142858,5.50373,630.387446641922,0.11566352844238281,628.5083029798404,0.1492024419788353,1.0,1.0,0.05545969936477957,0.19024547228638602,13750.799241897716,12.51650764501577,33.5,0,0,0,7,1170.5,82.0,70.5,0.38704318936877075,0.041677761427603766 +EBM (tuned),5561.30316393716,0.015260968889508928,3175.063314528853,0.017210140514576817,0.8494479068291391,0.8504733869471625,0.16192837219447462,0.23515542970497189,73928.60900997189,1.0432270754270017,27.714285714285715,4.53788,4016.161784172058,0.00881648063659668,2317.663761142634,0.01721968874335289,1.0,1.0,0.11467541102152934,0.2156961514785443,53673.002392900955,1.0,35.0,1,0,0,6,1154.3,85.2,69.3,0.3787375415282392,0.17073522404349473 +FASTAI (tuned + ensemble),2255.801782812391,4.754738875797817,1874.5041683637385,7.02561873695556,1.0,0.9970644338604485,0.14237984020932382,0.17188628389751054,39821.805599866035,393.3305755649491,27.857142857142858,5.22937,2167.720710515976,4.366992950439453,2163.3939226706348,7.6216664328171975,1.0,1.0,0.09544576825940065,0.16368526688049675,44341.68565486565,365.3580675403427,28.0,0,0,0,7,1154.5,66.9,79.2,0.3754152823920266,0.03638504352790067 +XGB (default),5.3146242414202005,0.2511120523725237,3.9458596806008757,0.36393582726260965,1.0,0.9964160464678224,0.13461597663145097,0.17323981254075252,88.641308375137,25.06965036903791,29.0,5.03782,4.206391334533691,0.25308847427368164,3.970691708705885,0.24026284837936607,1.0,1.0,0.06048068144140273,0.13909193858299207,83.06864309112483,14.470859449961994,27.0,0,0,0,7,1127.9,83.3,80.1,0.3488372093023256,0.03551231551231551 +FASTAI (tuned),2255.801782812391,0.49124997002737864,1874.5041683637385,0.6913652874014505,1.0,1.0,0.14682535076785339,0.20225951117254543,39821.805599866035,40.61138754197303,29.428571428571427,5.20475,2167.720710515976,0.4167633056640625,2163.3939226706348,0.7125644494366172,1.0,1.0,0.11250145730924221,0.2028343782558529,44341.68565486565,41.0657519171911,30.0,0,0,0,7,1115.5,61.2,80.3,0.3388704318936877,0.03446883804026661 +EBM (default),13.620444434029716,0.04176497459411621,8.849664427822175,0.0725423696868711,0.9258962131926525,0.986552754813623,0.16880716280386437,0.23207162894393898,199.98853409474154,3.960829194431375,30.571428571428573,4.59891,10.388706684112549,0.03829312324523926,9.65114625032283,0.0636073167690498,1.0,1.0,0.1212644465105922,0.22580812405457285,199.27951947649242,3.64412233970632,36.0,0,0,0,7,1072.6,79.8,78.8,0.3122923588039867,0.03513201825558805 +NN_TORCH (default),43.06239519800459,0.08627057075500488,31.428014747890156,0.11590329450401095,0.9903234435493359,0.9826874022847564,0.14240631859026293,0.22471185375084687,708.3643874980628,7.238187531162345,31.285714285714285,4.48918,41.744338512420654,0.06643533706665039,20.501721600246,0.12975651770830154,1.0,1.0,0.1279603352562162,0.12823638255534808,464.3796175172265,6.690027290059689,34.0,0,0,0,7,1049.0,69.1,83.5,0.2956810631229236,0.034315758971465174 +RF (default),1.6488038471766882,0.07487589972359794,0.7340231092118259,0.1055457863320713,1.0,0.9934710488617512,0.17986721711507,0.20097685324182807,19.421218249481452,6.45143964546498,32.642857142857146,5.50373,0.5004377365112305,0.060329437255859375,0.6685572767188281,0.11783093214035034,1.0,1.0,0.10385640724520351,0.20419806328614365,11.221774322893834,5.842398233666076,35.0,0,0,0,7,1000.0,0.0,0.0,0.26411960132890366,0.03157332807971695 +FASTAI (default),8.895496198109218,0.4506887027195522,6.745758004706619,0.5305722397771327,1.0,1.0,0.1992892524760812,0.2546651622823661,149.59972016881048,35.25519751231355,33.0,6.27121,7.010618448257446,0.3927772045135498,6.882870030736257,0.45197373548430714,1.0,1.0,0.11103713173333196,0.30980814568049436,155.75215591877054,35.82998039822552,32.0,0,0,0,7,977.9,71.9,69.6,0.2558139534883721,0.030677198284032307 +LR (tuned + ensemble),151.49413922854833,0.12385766846793038,140.2291087325694,0.2034921921233621,1.0,1.0,0.3536775263323216,0.5794878220593526,2871.3444089692503,12.750390729258724,40.214285714285715,7.90301,151.6921091079712,0.07966971397399902,154.22778905270818,0.15536207236991023,1.0,1.0,0.27398044406077504,0.6617005914731261,3356.9082636752537,7.635360610075989,39.0,0,0,0,7,455.3,132.5,134.8,0.08803986710963455,0.024907046897470975 +LR (tuned),151.49413922854833,0.0357013429914202,140.2291087325694,0.05877661513349659,1.0,1.0,0.3541042830323525,0.5805259668922418,2871.3444089692503,3.4075401142248447,40.285714285714285,7.95008,151.6921091079712,0.03558516502380371,154.22778905270818,0.07586279315148999,1.0,1.0,0.274940797173291,0.6653082983995465,3356.9082636752537,3.222681902578225,40.5,0,0,0,7,456.9,156.3,152.8,0.08637873754152824,0.024885473611305407 +KNN (tuned + ensemble),4.365503992353167,0.1759951114654541,2.503089425947226,0.17631688214986382,1.0,1.0,0.4095786597069403,0.743043049481299,60.321121177196595,11.281300197479359,40.857142857142854,8.84074,2.312436103820801,0.1065065860748291,2.2580906778514502,0.14868705810425048,1.0,1.0,0.42992491292377444,0.8312484819939865,51.96556010372688,12.473272386123199,42.0,0,0,0,7,381.1,133.7,143.1,0.07308970099667775,0.0245346688503645 +LR (default),3.9944423266819546,0.07546016148158483,3.291819429311604,0.12482651745436109,1.0,1.0,0.371612807074338,0.658400915114039,71.50131231042234,8.781090197151686,41.5,8.07907,2.9657371044158936,0.0455625057220459,3.0870405547822,0.0889892689883709,1.0,1.0,0.27343206074359794,0.6653082514030221,67.40864758081219,3.589990624937666,41.0,0,0,0,7,319.8,134.2,142.4,0.05813953488372093,0.024136808689654215 +KNN (tuned),4.365503992353167,0.03152152470179966,2.503089425947226,0.031078100848287667,1.0,1.0,0.41647808374429435,0.7824790590876856,60.321121177196595,2.033314994156082,41.714285714285715,9.11406,2.312436103820801,0.014968395233154297,2.2580906778514502,0.02987703639352155,1.0,1.0,0.4343387863461754,0.8773703541691247,51.96556010372688,1.9337322466384677,42.0,0,0,0,7,282.1,113.2,166.7,0.053156146179401995,0.02400393073150548 +KNN (default),0.06725828988211495,0.02155137062072754,0.043712475995924135,0.02795156394158391,1.0,1.0,0.45340288690514835,0.9405484140280839,1.0,1.6852534934851076,43.42857142857143,9.84077,0.04459524154663086,0.014516353607177734,0.045943402958487325,0.029731415464968502,1.0,1.0,0.4698333732046267,1.0,1.0,1.6403093647746019,44.0,0,0,0,7,20.6,180.5,226.4,0.013289036544850499,0.02304034592747015 diff --git a/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.pdf b/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..3409f93f92550558545a950f5d846c3ab73aaffc Binary files /dev/null and b/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.png.zip b/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4c40ae41478b8e43d4fe239a012e7b8d51ea0421 --- /dev/null +++ b/data/lite/tabpfn-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9859c5a16c8717d1573d1ed6129806a2416b285f78b12a19ecb04779cef94382 +size 135623 diff --git a/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.pdf b/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..78193941da7724a2f31fcf9a4bdc0c1cdd8fda2b Binary files /dev/null and b/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.pdf differ diff --git a/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.png.zip b/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..ee6c1a909c106e11df9c28c59d1ea06ad8d90858 --- /dev/null +++ b/data/lite/tabpfn-tabicl-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5e1e5d299642f413424cfe6d5802a988af48d82649e3847e843cf9179eb4ef +size 319000 diff --git a/data/lite/tabpfn-tabicl-cls/leaderboard.tex b/data/lite/tabpfn-tabicl-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..470e5847143808188c07aa37c1d5e5b76d097272 --- /dev/null +++ b/data/lite/tabpfn-tabicl-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1507${}_{-45,+32}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{8.5}} & \textcolor{gold}{\textbf{2.8}} & \textcolor{gold}{\textbf{6}} & \textcolor{gold}{\textbf{8.0\%}} & 3361.32 & 51.67 \\ +AutoGluon 1.3 (4h) & \textcolor{silver}{\textbf{1434${}_{-31,+33}$}} & 0.440 & \textcolor{silver}{\textbf{11.0}} & 6.4 & 1 & \textcolor{silver}{\textbf{10.4\%}} & 2085.73 & 2.16 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1408${}_{-30,+28}$}} & 0.402 & \textcolor{bronze}{\textbf{12.1}} & 6.9 & 0 & 12.3\% & 5575.69 & 4.58 \\ +TabPFNv2 (T) & 1402${}_{-33,+32}$ & \textcolor{silver}{\textbf{0.544}} & 12.2 & \textcolor{bronze}{\textbf{4.5}} & 1 & 10.6\% & 3361.32 & 0.53 \\ +TabM (T+E) & 1392${}_{-40,+33}$ & 0.483 & 12.8 & 6.2 & 1 & 12.0\% & 2899.02 & 1.46 \\ +TabICL (D) & 1378${}_{-31,+37}$ & \textcolor{bronze}{\textbf{0.543}} & 13.4 & \textcolor{silver}{\textbf{3.9}} & \textcolor{silver}{\textbf{4}} & \textcolor{bronze}{\textbf{10.5\%}} & 9.07 & 2.01 \\ +LightGBM (T+E) & 1349${}_{-33,+29}$ & 0.319 & 14.8 & 11.6 & 0 & 14.1\% & 691.46 & 1.46 \\ +TabPFNv2 (D) & 1334${}_{-35,+35}$ & 0.499 & 15.5 & \textcolor{bronze}{\textbf{4.5}} & \textcolor{bronze}{\textbf{3}} & 12.1\% & 4.28 & 0.51 \\ +CatBoost (T+E) & 1332${}_{-36,+33}$ & 0.271 & 15.8 & 9.8 & 0 & 13.5\% & 1394.45 & 0.62 \\ +TabM (T) & 1330${}_{-31,+33}$ & 0.387 & 15.7 & 7.7 & 0 & 13.1\% & 2899.02 & 0.17 \\ +ModernNCA (T) & 1326${}_{-31,+31}$ & 0.255 & 16.0 & 6.4 & 2 & 12.7\% & 6020.89 & 0.45 \\ +XGBoost (T+E) & 1314${}_{-33,+29}$ & 0.222 & 16.6 & 11.4 & 0 & 15.2\% & 785.90 & 1.28 \\ +CatBoost (D) & 1308${}_{-32,+30}$ & 0.228 & 16.8 & 9.6 & 1 & 14.8\% & 5.74 & 0.14 \\ +CatBoost (T) & 1299${}_{-28,+42}$ & 0.247 & 17.2 & 13.3 & 0 & 14.4\% & 1394.45 & 0.05 \\ +ModernNCA (T+E) & 1292${}_{-35,+27}$ & 0.307 & 17.7 & 9.4 & 0 & 14.4\% & 6020.89 & 8.48 \\ +XGBoost (T) & 1290${}_{-26,+30}$ & 0.181 & 17.8 & 9.8 & 1 & 15.4\% & 785.90 & 0.21 \\ +LightGBM (T) & 1288${}_{-29,+30}$ & 0.262 & 17.6 & 11.9 & 0 & 15.1\% & 691.46 & 0.26 \\ +TabM (D) & 1281${}_{-25,+30}$ & 0.288 & 18.2 & 8.1 & 1 & 16.2\% & 11.60 & 0.15 \\ +EBM (T+E) & 1277${}_{-29,+37}$ & 0.186 & 18.4 & 8.2 & 0 & 17.2\% & 1142.31 & 0.24 \\ +TorchMLP (T+E) & 1247${}_{-28,+31}$ & 0.199 & 19.9 & 12.7 & 0 & 15.7\% & 2792.14 & 2.18 \\ +RealMLP (T) & 1231${}_{-29,+34}$ & 0.201 & 20.8 & 13.3 & 0 & 16.5\% & 5575.69 & 0.24 \\ +EBM (T) & 1227${}_{-32,+37}$ & 0.120 & 20.9 & 11.1 & 0 & 18.0\% & 1142.31 & 0.03 \\ +EBM (D) & 1207${}_{-35,+29}$ & 0.153 & 22.2 & 9.7 & 1 & 18.7\% & 5.57 & 0.07 \\ +FastaiMLP (T+E) & 1202${}_{-34,+29}$ & 0.230 & 22.3 & 12.4 & 0 & 19.1\% & 1350.04 & 6.57 \\ +ModernNCA (D) & 1188${}_{-28,+29}$ & 0.183 & 22.8 & 9.8 & 1 & 19.4\% & 13.67 & 0.34 \\ +TabDPT (D) & 1179${}_{-36,+29}$ & 0.258 & 23.5 & 7.1 & 1 & 18.4\% & 28.08 & 8.76 \\ +RealMLP (D) & 1179${}_{-31,+24}$ & 0.083 & 23.6 & 11.0 & 1 & 17.5\% & 35.27 & 0.25 \\ +TorchMLP (T) & 1168${}_{-36,+38}$ & 0.086 & 24.0 & 17.9 & 0 & 17.8\% & 2792.14 & 0.13 \\ +ExtraTrees (T+E) & 1155${}_{-36,+29}$ & 0.102 & 24.9 & 16.7 & 0 & 20.4\% & 378.34 & 1.37 \\ +FastaiMLP (T) & 1140${}_{-32,+30}$ & 0.113 & 25.5 & 14.7 & 0 & 20.7\% & 1350.04 & 0.61 \\ +XGBoost (D) & 1130${}_{-32,+28}$ & 0.032 & 26.0 & 21.9 & 0 & 19.1\% & 2.66 & 0.17 \\ +ExtraTrees (T) & 1128${}_{-26,+31}$ & 0.072 & 26.0 & 13.8 & 0 & 20.6\% & 378.34 & 0.15 \\ +RandomForest (T+E) & 1114${}_{-34,+28}$ & 0.057 & 27.0 & 14.8 & 0 & 21.0\% & 527.27 & 1.16 \\ +LightGBM (D) & 1098${}_{-30,+32}$ & 0.030 & 27.6 & 24.0 & 0 & 20.1\% & 3.01 & 0.18 \\ +RandomForest (T) & 1064${}_{-35,+30}$ & 0.023 & 29.2 & 22.2 & 0 & 21.9\% & 527.27 & 0.13 \\ +TorchMLP (D) & 1006${}_{-30,+39}$ & 0.030 & 31.7 & 26.7 & 0 & 24.8\% & 8.93 & 0.17 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 32.2 & 24.6 & 0 & 27.3\% & 0.44 & 0.06 \\ +FastaiMLP (D) & 1000${}_{-32,+30}$ & 0.019 & 32.2 & 27.7 & 0 & 24.6\% & 4.68 & 0.55 \\ +ExtraTrees (D) & 949${}_{-32,+36}$ & 0.015 & 34.1 & 29.1 & 0 & 29.3\% & 0.36 & 0.07 \\ +Linear (T+E) & 946${}_{-35,+33}$ & 0.055 & 34.2 & 15.0 & 1 & 31.3\% & 89.12 & 0.20 \\ +Linear (T) & 925${}_{-44,+35}$ & 0.035 & 34.9 & 24.7 & 0 & 32.0\% & 89.12 & 0.08 \\ +Linear (D) & 892${}_{-29,+36}$ & 0.025 & 36.0 & 33.0 & 0 & 33.3\% & 2.43 & 0.10 \\ +KNN (T+E) & 746${}_{-31,+45}$ & 0.000 & 39.9 & 33.1 & 0 & 47.7\% & 3.50 & 0.16 \\ +KNN (T) & 676${}_{-48,+42}$ & 0.000 & 41.2 & 28.3 & 0 & 50.2\% & 3.50 & 0.03 \\ +KNN (D) & 384${}_{-116,+61}$ & 0.000 & 44.3 & 44.2 & 0 & 59.5\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabpfn-tabicl-cls/tabarena_leaderboard.csv b/data/lite/tabpfn-tabicl-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..df059e4121634d1f0f2b668e430505bc14507b44 --- /dev/null +++ b/data/lite/tabpfn-tabicl-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14624.10810300937,166.6936253951146,3837.093570770289,71.22922446033486,0.2999799624009034,0.37900004959687766,0.08031051995518385,0.05455935044977521,73712.17169915051,6326.444952096131,8.461538461538462,0.1749,8447.90291428566,59.9476752281189,3361.317280362242,51.668880171806535,0.21441671773452015,0.3480554663987826,0.03548854933740464,0.027745461982937078,32774.70384527536,4094.471326370238,5.5,6,1,3,16,1506.7,31.8,44.7,0.8304195804195804,0.358094093982103 +AutoGluon 1.3 (4h),5811.540137483524,3.0737958137805643,3715.2599271484114,3.044362316385498,0.5599854226187498,0.5617828950350577,0.10445421398736063,0.0599680756233326,37983.87924037632,165.48830983683717,11.01923076923077,0.17751,5213.819254875183,2.0895845890045166,2085.7270446537464,2.161349128608518,0.5600808158121042,0.5574000428410242,0.06078825021132733,0.06659288302785793,30345.10100676535,118.3021811054546,9.0,1,0,1,24,1433.5,32.8,30.7,0.7722902097902098,0.15518544636191695 +REALMLP (tuned + ensemble),17853.666671056013,5.060677555891184,6069.468633023759,5.352972929535142,0.5983187605703805,0.5697719962134024,0.1225244655490201,0.0697158337547381,114488.59335090281,312.9299756303972,12.115384615384615,0.180765,10652.850141763687,4.7910391092300415,5575.693006637695,4.577857176513236,0.5696681015084324,0.5408431020221902,0.06341551053522204,0.04576778429435464,88499.22644289241,286.2007603461078,11.5,0,1,2,23,1407.7,28.0,30.0,0.7473776223776224,0.1450476689017345 +TABPFNV2 (tuned),14624.10810300937,5.890562671881455,3837.093570770289,2.9216612176257093,0.4562783530638654,0.539616756848536,0.10636474393434797,0.07576264928807884,73712.17169915051,208.74723417223507,12.25,0.19132,8447.90291428566,0.5261704921722412,3361.317280362242,0.52729181819818,0.34109768962419973,0.6143846899224816,0.08017059655741715,0.050216477642017796,32774.70384527536,36.401677545743524,11.0,1,5,2,18,1402.3,31.6,32.9,0.7443181818181818,0.22277582323851844 +TABM (tuned + ensemble),7183.65333878994,2.169150572556716,3570.712955098323,1.861754182648195,0.5166525238887076,0.5534887980632361,0.11956349986158774,0.08309123144106954,42989.483218042995,121.47528176372664,12.846153846153847,0.18310500000000002,6058.230099320412,1.6269241571426392,2899.0220559668596,1.460880880345262,0.47136645123500964,0.5213443194889227,0.07336602828104005,0.040297742432766875,37327.19260107514,109.27107326295308,9.5,1,0,1,24,1392.2,32.7,39.4,0.7307692307692307,0.16156485134544094 +TABICL (default),21.54241045621725,2.6437206268310547,10.219926189931016,1.9528462364644343,0.45732829959408833,0.5557361534084676,0.10499055070178752,0.06310686388674132,140.84761892371782,137.0823191069963,13.365384615384615,0.177865,18.648964643478394,2.1483960151672363,9.073519792448254,2.007502492806867,0.46959481282562443,0.6652418475567421,0.05302697309380272,0.028777901101857722,117.57303754113096,107.76328227971135,13.5,4,2,1,19,1378.5,36.4,30.5,0.7189685314685315,0.2556026489858902 +GBM (tuned + ensemble),1386.6633568268555,2.647538212629465,855.5354588276103,2.6861264004396594,0.6806482110513491,0.7106186510085695,0.141362130997881,0.08782930779014528,10078.423717398238,182.57724180654643,14.788461538461538,0.18084,1317.1929507255554,2.008246660232544,691.4641967411458,1.4553717527055694,0.7005535661954161,0.7652961068739543,0.07965351598262033,0.06332006129013433,9059.644744264673,90.72358893879789,14.75,0,0,1,25,1349.4,28.6,33.0,0.6866258741258742,0.08652272628643233 +TABPFNV2 (default),12.006092300781837,1.2122490681134737,5.3148066442099084,0.6434373636958165,0.5013642769155608,0.6076681115357273,0.12052806276021125,0.08926430649670257,74.57934257049592,50.66248833495304,15.538461538461538,0.194575,9.273962259292603,0.4255213737487793,4.283948847917442,0.5110342254172261,0.3771347187714357,0.8078260737500734,0.07359963710195983,0.07618105155863322,62.366726819804335,28.80821414198816,14.5,3,2,2,19,1333.5,34.9,34.7,0.6695804195804196,0.22455622157365326 +TABM (tuned),7183.65333878994,0.2019361532651461,3570.712955098323,0.2168923191387062,0.6127855914191946,0.6153990479247885,0.130856425990661,0.09537399493661194,42989.483218042995,13.062348029072668,15.692307692307692,0.18589499999999998,6058.230099320412,0.1839456558227539,2899.0220559668596,0.171251855223312,0.5915500043256848,0.6288985041617685,0.09148128983327741,0.05588464476989181,37327.19260107514,9.17800659262771,12.0,0,2,2,22,1329.7,32.2,30.3,0.666083916083916,0.13000506341067208 +CAT (tuned + ensemble),4861.749095384891,1.0365588940106905,3035.7630585976663,0.9271931106606893,0.728920075896456,0.6786642048762535,0.13487992339677388,0.0909217961956513,26561.38119334446,53.29220521181529,15.76923076923077,0.178855,3259.4502482414246,0.9432240724563599,1394.4481643221718,0.6180738220288715,0.7992439551459185,0.7007978686240202,0.07405507598059591,0.051124014208117546,20607.594190900367,47.175332427485074,15.0,0,1,1,24,1331.9,32.8,35.8,0.6643356643356644,0.10243020698237862 +MNCA (tuned),11104.727954295966,0.5286179322462815,5977.187221364461,0.4814995680859316,0.7448490525012423,0.6406839211091188,0.12655502884420408,0.10326715793661159,78433.1838326286,30.191390941699975,16.01923076923077,0.18580999999999998,11212.920736551285,0.4423438310623169,6020.892835479641,0.4537708228509102,0.8177816514691232,0.7061138774961084,0.08668289690877401,0.05913687649157323,67721.7850080898,25.373417518222958,14.25,2,0,0,24,1325.6,30.3,30.3,0.6586538461538461,0.15586362718013336 +XGB (tuned + ensemble),1868.2429302564035,2.459977333362286,1131.3403541498637,3.0908212073715786,0.7784275988369416,0.7326751610286906,0.1521183469738161,0.10487866753851346,11151.77336671448,133.9681639809692,16.576923076923077,0.184395,1441.2213016748428,1.317580223083496,785.8975953484313,1.2809013038285242,0.8316341855574652,0.7341106406501618,0.1144861696628025,0.054690456125919155,11226.485943771408,74.70529109142498,13.5,0,0,2,24,1313.5,28.5,32.1,0.6459790209790209,0.08752950287187947 +CAT (default),173.12264615755814,0.2530022859573364,152.19981148640542,0.1746769705615129,0.7721520925382083,0.7508782741378199,0.14762383159906592,0.09673912909834526,503.5985284620293,11.812145121206303,16.75,0.18153,12.185660362243652,0.2613861560821533,5.736216485552211,0.13945330874004275,0.8708469956136012,0.8764795848440701,0.09472627265148731,0.048694739189140704,101.89620933712393,7.710770185087604,19.25,1,0,0,25,1308.5,29.6,31.7,0.6420454545454546,0.10408056855425275 +CAT (tuned),4861.749095384891,0.10756405500265268,3035.7630585976663,0.11792367197674246,0.7533425274831995,0.7418580085557809,0.14394572275280534,0.09786470048169181,26561.38119334446,7.02054598025311,17.192307692307693,0.179485,3259.4502482414246,0.05912292003631592,1394.4481643221718,0.05275041900206211,0.8504867849422679,0.7781645153943832,0.09050829480547845,0.05413571715045103,20607.594190900367,2.886943526422101,17.0,0,0,0,26,1299.2,41.1,27.8,0.631993006993007,0.07539691390572127 +GBM (tuned),1386.6633568268555,0.6239433655372033,855.5354588276103,0.6651164927916418,0.737683616453633,0.7596688264282848,0.15123309377717353,0.10299158861394081,10078.423717398238,42.45419816933193,17.634615384615383,0.18125,1317.1929507255554,0.32417428493499756,691.4641967411458,0.2643910314838089,0.7826548575969626,0.7981479775650631,0.08157588989660797,0.06910038121186715,9059.644744264673,20.34411810847653,18.5,0,1,0,25,1288.1,29.9,29.0,0.6219405594405595,0.08417253395400366 +MNCA (tuned + ensemble),11104.727954295966,11.626816547833956,5977.187221364461,9.077822725159232,0.6933896363411076,0.6660665157088731,0.1444822450466191,0.11574155496848519,78433.1838326286,625.8156905319904,17.653846153846153,0.196755,11212.920736551285,9.569536089897156,6020.892835479641,8.47783782257851,0.722368050936999,0.6622270828972778,0.09664772425546841,0.06590198674037494,67721.7850080898,462.66839224080115,15.75,0,0,2,24,1292.0,26.5,34.9,0.6215034965034965,0.10668541415880203 +XGB (tuned),1868.2429302564035,0.6344939470291138,1131.3403541498637,0.845858557757576,0.8187393857188691,0.7667685498472205,0.15449551107917392,0.10860550617326425,11151.77336671448,35.07178353015141,17.807692307692307,0.18658,1441.2213016748428,0.2507840394973755,785.8975953484313,0.21279594317419753,0.9017943832788667,0.7453177957281472,0.12200656150282413,0.06269228828209442,11226.485943771408,15.348416723363933,16.5,1,0,0,25,1289.8,29.4,25.3,0.618006993006993,0.10251976073586389 +TABM (default),26.77778566800631,0.20130640726823074,16.983704596070464,0.1991903141757489,0.7120650416184634,0.7086289666607134,0.1622519047697801,0.11661283756574645,212.38711719883958,11.49822311803021,18.153846153846153,0.18572,20.835053324699402,0.1561201810836792,11.598826273977398,0.14759862387674094,0.7417274447397321,0.7810887350534926,0.09137336521284833,0.06275561870896407,109.84095896574044,8.743598466674035,18.0,1,0,1,24,1281.0,29.5,24.9,0.6101398601398601,0.12406889112890061 +EBM (tuned + ensemble),2866.551095375648,0.3536289380146907,1716.6253639310917,0.31694700723366953,0.8144475987903695,0.7364009363908267,0.17180309869898544,0.13289820608093436,16005.580219570395,21.915829440477204,18.423076923076923,0.18934,1826.563945055008,0.30398499965667725,1142.3094021828929,0.2383204663966832,0.9368603191709515,0.9207311313375326,0.07547386841907339,0.05109125672741019,15335.47890204938,12.950523458443941,21.5,0,3,0,23,1276.6,36.7,28.2,0.6040209790209791,0.12124171082532008 +NN_TORCH (tuned + ensemble),8269.36125588417,3.6064914281551657,3548.397093003735,3.1069146452932506,0.8005534385996745,0.7867221786490863,0.15745609955540044,0.10096884964933617,54687.42205511106,206.78432079512532,19.884615384615383,0.186795,7021.069797754288,2.925987720489502,2792.139673756541,2.175346818589723,0.9508009219506811,0.917687560600783,0.0823266490663982,0.0717701052690009,44532.50472025028,168.68500762383525,20.0,0,0,1,25,1247.3,30.8,27.1,0.5708041958041958,0.07895497881555723 +REALMLP (tuned),17853.666671056013,0.2349767226439256,6069.468633023759,0.2967994896112342,0.7992732991174807,0.8048895624469216,0.16530717153664265,0.11169886093388302,114488.59335090281,15.66992951032636,20.76923076923077,0.179705,10652.850141763687,0.22168636322021484,5575.693006637695,0.23576453293502733,0.895201963654964,0.9670251309899789,0.1283441158885112,0.08568734793187416,88499.22644289241,12.792606929662101,20.5,0,1,0,25,1231.0,33.9,28.5,0.5506993006993007,0.07503622635930923 +EBM (tuned),2866.551095375648,0.04391795855302077,1716.6253639310917,0.042622423373582956,0.8796773591220505,0.7989370532749394,0.18031150115586747,0.13991074423102637,16005.580219570395,2.6200464262605005,20.865384615384617,0.19075999999999999,1826.563945055008,0.031216979026794434,1142.3094021828929,0.02993136666804911,1.0,1.0,0.08520888190451481,0.06958046400412875,15335.47890204938,1.3258183958395588,23.75,0,1,1,24,1227.1,36.6,31.5,0.548513986013986,0.08978323529643507 +EBM (default),11.305762657752403,0.07217896901644193,6.708906519026521,0.08642030214738769,0.8468070324259693,0.8110280220039858,0.18686874456979283,0.14250462388903842,76.4056328740172,4.809914969274192,22.23076923076923,0.19205,8.478393912315369,0.060648202896118164,5.571264059656597,0.07129895504487138,1.0,1.0,0.10192934818149862,0.06122058058323697,66.99828845613939,3.601780880785588,24.75,1,0,0,25,1206.9,28.3,34.8,0.5174825174825175,0.10315367705023558 +FASTAI (tuned + ensemble),2663.3156125178702,9.71036501114185,1626.2333617037298,10.35375213848753,0.7698173487890432,0.7888707552608327,0.19143489263209343,0.11424870338763603,20608.72361153851,575.9084247062348,22.326923076923077,0.18179499999999998,2361.516105532646,9.277374625205994,1350.0449662692658,6.569572430045136,0.9823907390988809,1.0,0.10018841132165368,0.07677125815183776,13001.178549428518,549.1440903510916,24.0,0,0,0,26,1202.1,28.7,33.1,0.5152972027972028,0.08032410389081089 +MNCA (default),29.761946293023918,0.45968284056736874,15.939524477151517,0.3666958147944243,0.8169283513779425,0.8227126311705688,0.19359838536157442,0.10940904253610519,187.22763978272167,23.803143758716253,22.826923076923077,0.19357,23.457369804382324,0.36945486068725586,13.667502577653629,0.33894040253174296,0.9808195818002508,0.9797952351969426,0.13240284359564197,0.08931940966654422,154.11579909730304,19.118442709424343,21.5,1,1,0,24,1187.8,28.8,27.9,0.5039335664335665,0.1019829541933546 +TABDPT (default),72.55373939184042,21.63674608560709,32.1753170626944,27.14316288922542,0.7421770023997575,0.7479465935897589,0.18444900328904643,0.12119989176698268,513.8148563269098,1550.6457261531152,23.48076923076923,0.20388,69.82611036300659,21.28597331047058,28.076698775708525,8.760817737139146,1.0,1.0,0.1080149448719816,0.07851722429401914,429.0853998300959,1143.1582415263433,29.5,1,3,0,22,1178.7,28.4,35.5,0.48907342657342656,0.14118801082755247 +REALMLP (default),110.65054762363434,0.2512630499326266,36.51262336985926,0.304026477962315,0.9170988508971886,0.8257233846816412,0.17480849590114966,0.10965875529591303,716.9172586317429,16.106317028225142,23.634615384615383,0.179885,61.38514173030853,0.19377470016479492,35.26610472383891,0.25260632993704635,1.0,1.0,0.10725973083151863,0.09589271240176966,562.1682174661271,13.084740916081461,26.25,1,0,0,25,1178.7,23.8,30.2,0.4855769230769231,0.09086138282523847 +NN_TORCH (tuned),8269.36125588417,0.1732868873156034,3548.397093003735,0.17387289280012472,0.9141063957909149,0.842287949515361,0.1775257283605692,0.11814031211601564,54687.42205511106,10.378521059679809,24.0,0.184685,7021.069797754288,0.11897110939025879,2792.139673756541,0.13285542411599796,1.0,1.0,0.10704543711737907,0.07835117203069997,44532.50472025028,7.793728816499673,25.0,0,0,0,26,1167.5,37.1,35.1,0.4772727272727273,0.055970780933021255 +XT (tuned + ensemble),723.008487197069,1.6600552705618052,480.9309873883464,1.918436680526551,0.8984673764067701,0.8698563213577479,0.20350469181458591,0.1535920856593196,5917.229990900821,105.19348400260608,24.884615384615383,0.188425,702.5076073408127,1.3369704484939575,378.34316304246136,1.3734038445009942,1.0,1.0,0.1315762721206637,0.09230140454586463,3777.3315625688056,84.73782880976256,28.0,0,0,0,26,1154.8,28.9,35.7,0.4571678321678322,0.059958692919405074 +FASTAI (tuned),2663.3156125178702,0.7872952864720271,1626.2333617037298,0.9244876829128756,0.8865326975991655,0.8432447700175844,0.206866728565722,0.1387333836548531,20608.72361153851,52.86743788138416,25.53846153846154,0.18316,2361.516105532646,0.6897614002227783,1350.0449662692658,0.612778202858813,1.0,1.0,0.1372818526486348,0.08670903152513522,13001.178549428518,36.29076458115673,27.5,0,0,1,25,1140.3,29.4,31.8,0.4423076923076923,0.06820649009803559 +XT (tuned),723.008487197069,0.16853698400350717,480.9309873883464,0.21789332007450063,0.9275522820640911,0.8584728603742081,0.20621433350726176,0.1564465527864028,5917.229990900821,11.278027514695932,26.0,0.189205,702.5076073408127,0.14003515243530273,378.34316304246136,0.15364019245636173,1.0,1.0,0.1505192879837034,0.09210625451126611,3777.3315625688056,8.882695855316769,29.5,0,1,0,25,1128.1,31.0,25.7,0.4318181818181818,0.07221148701643333 +XGB (default),5.406896040989802,0.29052331814399135,3.2395402815762044,0.40571388124316665,0.967663807523489,0.9208621475953046,0.1913484639295636,0.1583121152662077,36.135643016538445,18.079059576557942,26.01923076923077,0.18918000000000001,4.358284831047058,0.2170184850692749,2.659926162500852,0.1702856674842303,1.0,1.0,0.13622845464672223,0.08590682453074228,32.63170368298432,10.069746654781287,24.25,0,0,0,26,1129.9,27.4,31.1,0.4313811188811189,0.04570122330728964 +RF (tuned + ensemble),911.769657428448,1.55073488675631,531.6502580092293,1.7081982422422,0.9428934399339288,0.9018568954164017,0.20973262545633878,0.18169828635432628,6577.080710276883,100.32600883678194,26.96153846153846,0.18825,798.9582680463791,1.2464795112609863,527.2734284753022,1.1554577826899797,1.0,1.0,0.15587829515575163,0.09511037806823772,5029.2193365439925,80.30745598154598,30.0,0,1,0,25,1114.2,27.3,33.7,0.40996503496503495,0.06740230643960834 +GBM (default),5.678787359824548,0.359198652780973,3.357556396858338,0.23593366680622332,0.9701181894525962,0.9244030054522838,0.2009750941665907,0.16096135302268902,43.778662055665606,16.224185451530264,27.557692307692307,0.19459,5.089803218841553,0.34710729122161865,3.0120719035705887,0.17665561056592852,1.0,1.0,0.14940090168048453,0.08991864451874558,30.743592100025186,14.007062707115537,28.0,0,0,0,26,1097.5,31.5,29.5,0.3964160839160839,0.04169946841702806 +RF (tuned),911.769657428448,0.15133640399345985,531.6502580092293,0.19063529883897276,0.9772506936041817,0.9414350435106434,0.21876669694055456,0.18885525418574936,6577.080710276883,9.67186030508054,29.25,0.18775,798.9582680463791,0.13221728801727295,527.2734284753022,0.1275233993660041,1.0,1.0,0.16269556435701765,0.11206528012392108,5029.2193365439925,7.908293435451007,31.5,0,0,0,26,1064.0,29.3,35.0,0.35795454545454547,0.04504401000608687 +NN_TORCH (default),23.867143649321335,0.24185629991384652,13.87033788200038,0.29581404152824126,0.9698948249425869,0.9457939759123197,0.24766534415358382,0.1805126846920292,159.99336582070887,16.359564345871572,31.71153846153846,0.19833499999999998,19.20789933204651,0.18236374855041504,8.931817801759234,0.17241159209016133,1.0,1.0,0.1806992032072448,0.1282818632772945,162.04458259531137,9.195674192084631,33.0,0,0,0,26,1005.9,38.8,30.0,0.30201048951048953,0.03748733782316879 +FASTAI (default),10.92040595641503,0.607048539014963,5.789474679574097,0.6364629667404095,0.9807465319111015,0.9340218157343317,0.2458147993858494,0.18433902398032298,78.41470283692,37.58331718018816,32.15384615384615,0.19146000000000002,9.578287720680237,0.6503585577011108,4.679521651364521,0.5473457672031852,1.0,1.0,0.15595107304948763,0.13206479856027808,61.09577848128312,35.985779724786894,35.0,0,0,0,26,999.9,29.7,31.1,0.291958041958042,0.036046519543766274 +RF (default),1.1224354964036207,0.07520443659562331,0.5160590091828214,0.08504536046248315,0.9951894125278877,0.9662672985507942,0.2733008673941071,0.2607556340607851,7.699488729035172,4.8652515281017985,32.21153846153846,0.213715,0.8405723571777344,0.06134629249572754,0.44070860935344924,0.06492877134601137,1.0,1.0,0.22304594727380544,0.1387641342747614,5.263965350091326,4.042169058695611,35.75,0,0,0,26,1000.0,0.0,0.0,0.2906468531468531,0.04063169893049472 +XT (default),0.7937821791722224,0.076584870998676,0.45161836217627827,0.08730195757590078,0.9851460583683391,0.9723807339815378,0.2928638810006597,0.2925631030122461,5.4850410746785805,4.998284751448414,34.11538461538461,0.22224,0.7121238708496094,0.07150018215179443,0.3597485800421042,0.06793435113493712,1.0,1.0,0.21784002712442435,0.1545266336174043,3.777771179368423,4.378376796908203,38.0,0,0,0,26,949.4,35.6,31.6,0.24737762237762237,0.034356084606725006 +LR (tuned + ensemble),175.34701893879816,0.4311328667860765,114.81546568819135,0.35152858974096185,0.9447815704031849,0.9613573808489063,0.31304194938194396,0.27486950659356457,1394.89696800973,28.50704727597215,34.21153846153846,0.219115,157.3445065021515,0.20991504192352295,89.11614089252916,0.1966045265631563,1.0,1.0,0.260779813420035,0.18354757952857997,1033.8951605639145,9.91761243845308,37.5,1,0,0,25,946.4,32.2,34.1,0.24519230769230768,0.06657835051950235 +LR (tuned),175.34701893879816,0.13578540545243484,114.81546568819135,0.11644774040693313,0.9645430999513104,0.9706124256069096,0.3197153495116481,0.2866149774984877,1394.89696800973,6.834352988282761,34.92307692307692,0.21990500000000002,157.3445065021515,0.06698489189147949,89.11614089252916,0.08340922333653941,1.0,1.0,0.2839064101424172,0.1973963943493853,1033.8951605639145,4.726037355894221,38.75,0,0,1,25,925.0,34.9,43.3,0.229020979020979,0.04041420009021431 +LR (default),4.7979896068573,0.14980417031508225,2.784461238483506,0.15310137710602748,0.9745259999639975,0.9812456463551247,0.3329427914389331,0.32581993219914024,36.357934158034695,9.005429323015887,35.98076923076923,0.227395,4.980341196060181,0.0988316535949707,2.4349667711867333,0.10027059217560387,1.0,1.0,0.283906387231373,0.21846589850078024,26.549049340848278,5.855361941404224,39.0,0,0,0,26,891.8,35.4,28.6,0.20498251748251747,0.030292514887996096 +KNN (tuned + ensemble),16.958975911140442,0.2818152537712684,7.194345160625263,0.20399826229470497,1.0,0.9816350265387093,0.47672042775227436,0.5833708185909443,56.76087640724927,13.193730328032911,39.92307692307692,0.297435,6.915013313293457,0.11628735065460205,3.5025132784274384,0.1592449370444793,1.0,1.0,0.4130987117462378,0.6739246116612398,52.78507887384432,10.328630925855668,43.0,0,0,0,26,745.5,44.9,30.5,0.11538461538461539,0.030206633151293592 +KNN (tuned),16.958975911140442,0.05680100734417255,7.194345160625263,0.04235176068048229,1.0,0.9774922422312161,0.5019983015456773,0.6443397962703555,56.76087640724927,2.623837177847182,41.21153846153846,0.301285,6.915013313293457,0.03549385070800781,3.5025132784274384,0.03322185201916636,1.0,1.0,0.5143946348760557,0.7015107115204589,52.78507887384432,1.9257403822827532,44.0,0,0,1,25,676.5,41.9,47.1,0.08610139860139861,0.035393141891706566 +KNN (default),0.30434812949253964,0.03149266426379864,0.14343197725298135,0.03163346219935531,1.0,1.0,0.5949762374279601,0.9484018378342699,1.002692426657382,1.6405188884998085,44.26923076923077,0.34604,0.17687928676605225,0.01871800422668457,0.0745054444441075,0.021103033819676938,1.0,1.0,0.6151111085811185,1.0,1.0,1.0188814221661033,45.0,0,0,0,26,383.7,60.2,115.9,0.016608391608391608,0.02263422903807308 diff --git a/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf b/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..56df749f4b660ba4f866442ad45f444cc674ff6e Binary files /dev/null and b/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip b/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..11f6de74fa20ca51951d5e65c8838b9e75acc2bf --- /dev/null +++ b/data/lite/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3537446452cbef6ee98d645ac7ca084376854b8328e0703c07f9a66eb7f233 +size 138602 diff --git a/data/lite/tabpfn-tabicl/figures/critical-diagram.pdf b/data/lite/tabpfn-tabicl/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..63826253ffaa42082dab94252de70a5352dbae95 Binary files /dev/null and b/data/lite/tabpfn-tabicl/figures/critical-diagram.pdf differ diff --git a/data/lite/tabpfn-tabicl/figures/critical-diagram.png.zip b/data/lite/tabpfn-tabicl/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..ee6c1a909c106e11df9c28c59d1ea06ad8d90858 --- /dev/null +++ b/data/lite/tabpfn-tabicl/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a5e1e5d299642f413424cfe6d5802a988af48d82649e3847e843cf9179eb4ef +size 319000 diff --git a/data/lite/tabpfn-tabicl/leaderboard.tex b/data/lite/tabpfn-tabicl/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..470e5847143808188c07aa37c1d5e5b76d097272 --- /dev/null +++ b/data/lite/tabpfn-tabicl/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1507${}_{-45,+32}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{8.5}} & \textcolor{gold}{\textbf{2.8}} & \textcolor{gold}{\textbf{6}} & \textcolor{gold}{\textbf{8.0\%}} & 3361.32 & 51.67 \\ +AutoGluon 1.3 (4h) & \textcolor{silver}{\textbf{1434${}_{-31,+33}$}} & 0.440 & \textcolor{silver}{\textbf{11.0}} & 6.4 & 1 & \textcolor{silver}{\textbf{10.4\%}} & 2085.73 & 2.16 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1408${}_{-30,+28}$}} & 0.402 & \textcolor{bronze}{\textbf{12.1}} & 6.9 & 0 & 12.3\% & 5575.69 & 4.58 \\ +TabPFNv2 (T) & 1402${}_{-33,+32}$ & \textcolor{silver}{\textbf{0.544}} & 12.2 & \textcolor{bronze}{\textbf{4.5}} & 1 & 10.6\% & 3361.32 & 0.53 \\ +TabM (T+E) & 1392${}_{-40,+33}$ & 0.483 & 12.8 & 6.2 & 1 & 12.0\% & 2899.02 & 1.46 \\ +TabICL (D) & 1378${}_{-31,+37}$ & \textcolor{bronze}{\textbf{0.543}} & 13.4 & \textcolor{silver}{\textbf{3.9}} & \textcolor{silver}{\textbf{4}} & \textcolor{bronze}{\textbf{10.5\%}} & 9.07 & 2.01 \\ +LightGBM (T+E) & 1349${}_{-33,+29}$ & 0.319 & 14.8 & 11.6 & 0 & 14.1\% & 691.46 & 1.46 \\ +TabPFNv2 (D) & 1334${}_{-35,+35}$ & 0.499 & 15.5 & \textcolor{bronze}{\textbf{4.5}} & \textcolor{bronze}{\textbf{3}} & 12.1\% & 4.28 & 0.51 \\ +CatBoost (T+E) & 1332${}_{-36,+33}$ & 0.271 & 15.8 & 9.8 & 0 & 13.5\% & 1394.45 & 0.62 \\ +TabM (T) & 1330${}_{-31,+33}$ & 0.387 & 15.7 & 7.7 & 0 & 13.1\% & 2899.02 & 0.17 \\ +ModernNCA (T) & 1326${}_{-31,+31}$ & 0.255 & 16.0 & 6.4 & 2 & 12.7\% & 6020.89 & 0.45 \\ +XGBoost (T+E) & 1314${}_{-33,+29}$ & 0.222 & 16.6 & 11.4 & 0 & 15.2\% & 785.90 & 1.28 \\ +CatBoost (D) & 1308${}_{-32,+30}$ & 0.228 & 16.8 & 9.6 & 1 & 14.8\% & 5.74 & 0.14 \\ +CatBoost (T) & 1299${}_{-28,+42}$ & 0.247 & 17.2 & 13.3 & 0 & 14.4\% & 1394.45 & 0.05 \\ +ModernNCA (T+E) & 1292${}_{-35,+27}$ & 0.307 & 17.7 & 9.4 & 0 & 14.4\% & 6020.89 & 8.48 \\ +XGBoost (T) & 1290${}_{-26,+30}$ & 0.181 & 17.8 & 9.8 & 1 & 15.4\% & 785.90 & 0.21 \\ +LightGBM (T) & 1288${}_{-29,+30}$ & 0.262 & 17.6 & 11.9 & 0 & 15.1\% & 691.46 & 0.26 \\ +TabM (D) & 1281${}_{-25,+30}$ & 0.288 & 18.2 & 8.1 & 1 & 16.2\% & 11.60 & 0.15 \\ +EBM (T+E) & 1277${}_{-29,+37}$ & 0.186 & 18.4 & 8.2 & 0 & 17.2\% & 1142.31 & 0.24 \\ +TorchMLP (T+E) & 1247${}_{-28,+31}$ & 0.199 & 19.9 & 12.7 & 0 & 15.7\% & 2792.14 & 2.18 \\ +RealMLP (T) & 1231${}_{-29,+34}$ & 0.201 & 20.8 & 13.3 & 0 & 16.5\% & 5575.69 & 0.24 \\ +EBM (T) & 1227${}_{-32,+37}$ & 0.120 & 20.9 & 11.1 & 0 & 18.0\% & 1142.31 & 0.03 \\ +EBM (D) & 1207${}_{-35,+29}$ & 0.153 & 22.2 & 9.7 & 1 & 18.7\% & 5.57 & 0.07 \\ +FastaiMLP (T+E) & 1202${}_{-34,+29}$ & 0.230 & 22.3 & 12.4 & 0 & 19.1\% & 1350.04 & 6.57 \\ +ModernNCA (D) & 1188${}_{-28,+29}$ & 0.183 & 22.8 & 9.8 & 1 & 19.4\% & 13.67 & 0.34 \\ +TabDPT (D) & 1179${}_{-36,+29}$ & 0.258 & 23.5 & 7.1 & 1 & 18.4\% & 28.08 & 8.76 \\ +RealMLP (D) & 1179${}_{-31,+24}$ & 0.083 & 23.6 & 11.0 & 1 & 17.5\% & 35.27 & 0.25 \\ +TorchMLP (T) & 1168${}_{-36,+38}$ & 0.086 & 24.0 & 17.9 & 0 & 17.8\% & 2792.14 & 0.13 \\ +ExtraTrees (T+E) & 1155${}_{-36,+29}$ & 0.102 & 24.9 & 16.7 & 0 & 20.4\% & 378.34 & 1.37 \\ +FastaiMLP (T) & 1140${}_{-32,+30}$ & 0.113 & 25.5 & 14.7 & 0 & 20.7\% & 1350.04 & 0.61 \\ +XGBoost (D) & 1130${}_{-32,+28}$ & 0.032 & 26.0 & 21.9 & 0 & 19.1\% & 2.66 & 0.17 \\ +ExtraTrees (T) & 1128${}_{-26,+31}$ & 0.072 & 26.0 & 13.8 & 0 & 20.6\% & 378.34 & 0.15 \\ +RandomForest (T+E) & 1114${}_{-34,+28}$ & 0.057 & 27.0 & 14.8 & 0 & 21.0\% & 527.27 & 1.16 \\ +LightGBM (D) & 1098${}_{-30,+32}$ & 0.030 & 27.6 & 24.0 & 0 & 20.1\% & 3.01 & 0.18 \\ +RandomForest (T) & 1064${}_{-35,+30}$ & 0.023 & 29.2 & 22.2 & 0 & 21.9\% & 527.27 & 0.13 \\ +TorchMLP (D) & 1006${}_{-30,+39}$ & 0.030 & 31.7 & 26.7 & 0 & 24.8\% & 8.93 & 0.17 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 32.2 & 24.6 & 0 & 27.3\% & 0.44 & 0.06 \\ +FastaiMLP (D) & 1000${}_{-32,+30}$ & 0.019 & 32.2 & 27.7 & 0 & 24.6\% & 4.68 & 0.55 \\ +ExtraTrees (D) & 949${}_{-32,+36}$ & 0.015 & 34.1 & 29.1 & 0 & 29.3\% & 0.36 & 0.07 \\ +Linear (T+E) & 946${}_{-35,+33}$ & 0.055 & 34.2 & 15.0 & 1 & 31.3\% & 89.12 & 0.20 \\ +Linear (T) & 925${}_{-44,+35}$ & 0.035 & 34.9 & 24.7 & 0 & 32.0\% & 89.12 & 0.08 \\ +Linear (D) & 892${}_{-29,+36}$ & 0.025 & 36.0 & 33.0 & 0 & 33.3\% & 2.43 & 0.10 \\ +KNN (T+E) & 746${}_{-31,+45}$ & 0.000 & 39.9 & 33.1 & 0 & 47.7\% & 3.50 & 0.16 \\ +KNN (T) & 676${}_{-48,+42}$ & 0.000 & 41.2 & 28.3 & 0 & 50.2\% & 3.50 & 0.03 \\ +KNN (D) & 384${}_{-116,+61}$ & 0.000 & 44.3 & 44.2 & 0 & 59.5\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabpfn-tabicl/tabarena_leaderboard.csv b/data/lite/tabpfn-tabicl/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..df059e4121634d1f0f2b668e430505bc14507b44 --- /dev/null +++ b/data/lite/tabpfn-tabicl/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14624.10810300937,166.6936253951146,3837.093570770289,71.22922446033486,0.2999799624009034,0.37900004959687766,0.08031051995518385,0.05455935044977521,73712.17169915051,6326.444952096131,8.461538461538462,0.1749,8447.90291428566,59.9476752281189,3361.317280362242,51.668880171806535,0.21441671773452015,0.3480554663987826,0.03548854933740464,0.027745461982937078,32774.70384527536,4094.471326370238,5.5,6,1,3,16,1506.7,31.8,44.7,0.8304195804195804,0.358094093982103 +AutoGluon 1.3 (4h),5811.540137483524,3.0737958137805643,3715.2599271484114,3.044362316385498,0.5599854226187498,0.5617828950350577,0.10445421398736063,0.0599680756233326,37983.87924037632,165.48830983683717,11.01923076923077,0.17751,5213.819254875183,2.0895845890045166,2085.7270446537464,2.161349128608518,0.5600808158121042,0.5574000428410242,0.06078825021132733,0.06659288302785793,30345.10100676535,118.3021811054546,9.0,1,0,1,24,1433.5,32.8,30.7,0.7722902097902098,0.15518544636191695 +REALMLP (tuned + ensemble),17853.666671056013,5.060677555891184,6069.468633023759,5.352972929535142,0.5983187605703805,0.5697719962134024,0.1225244655490201,0.0697158337547381,114488.59335090281,312.9299756303972,12.115384615384615,0.180765,10652.850141763687,4.7910391092300415,5575.693006637695,4.577857176513236,0.5696681015084324,0.5408431020221902,0.06341551053522204,0.04576778429435464,88499.22644289241,286.2007603461078,11.5,0,1,2,23,1407.7,28.0,30.0,0.7473776223776224,0.1450476689017345 +TABPFNV2 (tuned),14624.10810300937,5.890562671881455,3837.093570770289,2.9216612176257093,0.4562783530638654,0.539616756848536,0.10636474393434797,0.07576264928807884,73712.17169915051,208.74723417223507,12.25,0.19132,8447.90291428566,0.5261704921722412,3361.317280362242,0.52729181819818,0.34109768962419973,0.6143846899224816,0.08017059655741715,0.050216477642017796,32774.70384527536,36.401677545743524,11.0,1,5,2,18,1402.3,31.6,32.9,0.7443181818181818,0.22277582323851844 +TABM (tuned + ensemble),7183.65333878994,2.169150572556716,3570.712955098323,1.861754182648195,0.5166525238887076,0.5534887980632361,0.11956349986158774,0.08309123144106954,42989.483218042995,121.47528176372664,12.846153846153847,0.18310500000000002,6058.230099320412,1.6269241571426392,2899.0220559668596,1.460880880345262,0.47136645123500964,0.5213443194889227,0.07336602828104005,0.040297742432766875,37327.19260107514,109.27107326295308,9.5,1,0,1,24,1392.2,32.7,39.4,0.7307692307692307,0.16156485134544094 +TABICL (default),21.54241045621725,2.6437206268310547,10.219926189931016,1.9528462364644343,0.45732829959408833,0.5557361534084676,0.10499055070178752,0.06310686388674132,140.84761892371782,137.0823191069963,13.365384615384615,0.177865,18.648964643478394,2.1483960151672363,9.073519792448254,2.007502492806867,0.46959481282562443,0.6652418475567421,0.05302697309380272,0.028777901101857722,117.57303754113096,107.76328227971135,13.5,4,2,1,19,1378.5,36.4,30.5,0.7189685314685315,0.2556026489858902 +GBM (tuned + ensemble),1386.6633568268555,2.647538212629465,855.5354588276103,2.6861264004396594,0.6806482110513491,0.7106186510085695,0.141362130997881,0.08782930779014528,10078.423717398238,182.57724180654643,14.788461538461538,0.18084,1317.1929507255554,2.008246660232544,691.4641967411458,1.4553717527055694,0.7005535661954161,0.7652961068739543,0.07965351598262033,0.06332006129013433,9059.644744264673,90.72358893879789,14.75,0,0,1,25,1349.4,28.6,33.0,0.6866258741258742,0.08652272628643233 +TABPFNV2 (default),12.006092300781837,1.2122490681134737,5.3148066442099084,0.6434373636958165,0.5013642769155608,0.6076681115357273,0.12052806276021125,0.08926430649670257,74.57934257049592,50.66248833495304,15.538461538461538,0.194575,9.273962259292603,0.4255213737487793,4.283948847917442,0.5110342254172261,0.3771347187714357,0.8078260737500734,0.07359963710195983,0.07618105155863322,62.366726819804335,28.80821414198816,14.5,3,2,2,19,1333.5,34.9,34.7,0.6695804195804196,0.22455622157365326 +TABM (tuned),7183.65333878994,0.2019361532651461,3570.712955098323,0.2168923191387062,0.6127855914191946,0.6153990479247885,0.130856425990661,0.09537399493661194,42989.483218042995,13.062348029072668,15.692307692307692,0.18589499999999998,6058.230099320412,0.1839456558227539,2899.0220559668596,0.171251855223312,0.5915500043256848,0.6288985041617685,0.09148128983327741,0.05588464476989181,37327.19260107514,9.17800659262771,12.0,0,2,2,22,1329.7,32.2,30.3,0.666083916083916,0.13000506341067208 +CAT (tuned + ensemble),4861.749095384891,1.0365588940106905,3035.7630585976663,0.9271931106606893,0.728920075896456,0.6786642048762535,0.13487992339677388,0.0909217961956513,26561.38119334446,53.29220521181529,15.76923076923077,0.178855,3259.4502482414246,0.9432240724563599,1394.4481643221718,0.6180738220288715,0.7992439551459185,0.7007978686240202,0.07405507598059591,0.051124014208117546,20607.594190900367,47.175332427485074,15.0,0,1,1,24,1331.9,32.8,35.8,0.6643356643356644,0.10243020698237862 +MNCA (tuned),11104.727954295966,0.5286179322462815,5977.187221364461,0.4814995680859316,0.7448490525012423,0.6406839211091188,0.12655502884420408,0.10326715793661159,78433.1838326286,30.191390941699975,16.01923076923077,0.18580999999999998,11212.920736551285,0.4423438310623169,6020.892835479641,0.4537708228509102,0.8177816514691232,0.7061138774961084,0.08668289690877401,0.05913687649157323,67721.7850080898,25.373417518222958,14.25,2,0,0,24,1325.6,30.3,30.3,0.6586538461538461,0.15586362718013336 +XGB (tuned + ensemble),1868.2429302564035,2.459977333362286,1131.3403541498637,3.0908212073715786,0.7784275988369416,0.7326751610286906,0.1521183469738161,0.10487866753851346,11151.77336671448,133.9681639809692,16.576923076923077,0.184395,1441.2213016748428,1.317580223083496,785.8975953484313,1.2809013038285242,0.8316341855574652,0.7341106406501618,0.1144861696628025,0.054690456125919155,11226.485943771408,74.70529109142498,13.5,0,0,2,24,1313.5,28.5,32.1,0.6459790209790209,0.08752950287187947 +CAT (default),173.12264615755814,0.2530022859573364,152.19981148640542,0.1746769705615129,0.7721520925382083,0.7508782741378199,0.14762383159906592,0.09673912909834526,503.5985284620293,11.812145121206303,16.75,0.18153,12.185660362243652,0.2613861560821533,5.736216485552211,0.13945330874004275,0.8708469956136012,0.8764795848440701,0.09472627265148731,0.048694739189140704,101.89620933712393,7.710770185087604,19.25,1,0,0,25,1308.5,29.6,31.7,0.6420454545454546,0.10408056855425275 +CAT (tuned),4861.749095384891,0.10756405500265268,3035.7630585976663,0.11792367197674246,0.7533425274831995,0.7418580085557809,0.14394572275280534,0.09786470048169181,26561.38119334446,7.02054598025311,17.192307692307693,0.179485,3259.4502482414246,0.05912292003631592,1394.4481643221718,0.05275041900206211,0.8504867849422679,0.7781645153943832,0.09050829480547845,0.05413571715045103,20607.594190900367,2.886943526422101,17.0,0,0,0,26,1299.2,41.1,27.8,0.631993006993007,0.07539691390572127 +GBM (tuned),1386.6633568268555,0.6239433655372033,855.5354588276103,0.6651164927916418,0.737683616453633,0.7596688264282848,0.15123309377717353,0.10299158861394081,10078.423717398238,42.45419816933193,17.634615384615383,0.18125,1317.1929507255554,0.32417428493499756,691.4641967411458,0.2643910314838089,0.7826548575969626,0.7981479775650631,0.08157588989660797,0.06910038121186715,9059.644744264673,20.34411810847653,18.5,0,1,0,25,1288.1,29.9,29.0,0.6219405594405595,0.08417253395400366 +MNCA (tuned + ensemble),11104.727954295966,11.626816547833956,5977.187221364461,9.077822725159232,0.6933896363411076,0.6660665157088731,0.1444822450466191,0.11574155496848519,78433.1838326286,625.8156905319904,17.653846153846153,0.196755,11212.920736551285,9.569536089897156,6020.892835479641,8.47783782257851,0.722368050936999,0.6622270828972778,0.09664772425546841,0.06590198674037494,67721.7850080898,462.66839224080115,15.75,0,0,2,24,1292.0,26.5,34.9,0.6215034965034965,0.10668541415880203 +XGB (tuned),1868.2429302564035,0.6344939470291138,1131.3403541498637,0.845858557757576,0.8187393857188691,0.7667685498472205,0.15449551107917392,0.10860550617326425,11151.77336671448,35.07178353015141,17.807692307692307,0.18658,1441.2213016748428,0.2507840394973755,785.8975953484313,0.21279594317419753,0.9017943832788667,0.7453177957281472,0.12200656150282413,0.06269228828209442,11226.485943771408,15.348416723363933,16.5,1,0,0,25,1289.8,29.4,25.3,0.618006993006993,0.10251976073586389 +TABM (default),26.77778566800631,0.20130640726823074,16.983704596070464,0.1991903141757489,0.7120650416184634,0.7086289666607134,0.1622519047697801,0.11661283756574645,212.38711719883958,11.49822311803021,18.153846153846153,0.18572,20.835053324699402,0.1561201810836792,11.598826273977398,0.14759862387674094,0.7417274447397321,0.7810887350534926,0.09137336521284833,0.06275561870896407,109.84095896574044,8.743598466674035,18.0,1,0,1,24,1281.0,29.5,24.9,0.6101398601398601,0.12406889112890061 +EBM (tuned + ensemble),2866.551095375648,0.3536289380146907,1716.6253639310917,0.31694700723366953,0.8144475987903695,0.7364009363908267,0.17180309869898544,0.13289820608093436,16005.580219570395,21.915829440477204,18.423076923076923,0.18934,1826.563945055008,0.30398499965667725,1142.3094021828929,0.2383204663966832,0.9368603191709515,0.9207311313375326,0.07547386841907339,0.05109125672741019,15335.47890204938,12.950523458443941,21.5,0,3,0,23,1276.6,36.7,28.2,0.6040209790209791,0.12124171082532008 +NN_TORCH (tuned + ensemble),8269.36125588417,3.6064914281551657,3548.397093003735,3.1069146452932506,0.8005534385996745,0.7867221786490863,0.15745609955540044,0.10096884964933617,54687.42205511106,206.78432079512532,19.884615384615383,0.186795,7021.069797754288,2.925987720489502,2792.139673756541,2.175346818589723,0.9508009219506811,0.917687560600783,0.0823266490663982,0.0717701052690009,44532.50472025028,168.68500762383525,20.0,0,0,1,25,1247.3,30.8,27.1,0.5708041958041958,0.07895497881555723 +REALMLP (tuned),17853.666671056013,0.2349767226439256,6069.468633023759,0.2967994896112342,0.7992732991174807,0.8048895624469216,0.16530717153664265,0.11169886093388302,114488.59335090281,15.66992951032636,20.76923076923077,0.179705,10652.850141763687,0.22168636322021484,5575.693006637695,0.23576453293502733,0.895201963654964,0.9670251309899789,0.1283441158885112,0.08568734793187416,88499.22644289241,12.792606929662101,20.5,0,1,0,25,1231.0,33.9,28.5,0.5506993006993007,0.07503622635930923 +EBM (tuned),2866.551095375648,0.04391795855302077,1716.6253639310917,0.042622423373582956,0.8796773591220505,0.7989370532749394,0.18031150115586747,0.13991074423102637,16005.580219570395,2.6200464262605005,20.865384615384617,0.19075999999999999,1826.563945055008,0.031216979026794434,1142.3094021828929,0.02993136666804911,1.0,1.0,0.08520888190451481,0.06958046400412875,15335.47890204938,1.3258183958395588,23.75,0,1,1,24,1227.1,36.6,31.5,0.548513986013986,0.08978323529643507 +EBM (default),11.305762657752403,0.07217896901644193,6.708906519026521,0.08642030214738769,0.8468070324259693,0.8110280220039858,0.18686874456979283,0.14250462388903842,76.4056328740172,4.809914969274192,22.23076923076923,0.19205,8.478393912315369,0.060648202896118164,5.571264059656597,0.07129895504487138,1.0,1.0,0.10192934818149862,0.06122058058323697,66.99828845613939,3.601780880785588,24.75,1,0,0,25,1206.9,28.3,34.8,0.5174825174825175,0.10315367705023558 +FASTAI (tuned + ensemble),2663.3156125178702,9.71036501114185,1626.2333617037298,10.35375213848753,0.7698173487890432,0.7888707552608327,0.19143489263209343,0.11424870338763603,20608.72361153851,575.9084247062348,22.326923076923077,0.18179499999999998,2361.516105532646,9.277374625205994,1350.0449662692658,6.569572430045136,0.9823907390988809,1.0,0.10018841132165368,0.07677125815183776,13001.178549428518,549.1440903510916,24.0,0,0,0,26,1202.1,28.7,33.1,0.5152972027972028,0.08032410389081089 +MNCA (default),29.761946293023918,0.45968284056736874,15.939524477151517,0.3666958147944243,0.8169283513779425,0.8227126311705688,0.19359838536157442,0.10940904253610519,187.22763978272167,23.803143758716253,22.826923076923077,0.19357,23.457369804382324,0.36945486068725586,13.667502577653629,0.33894040253174296,0.9808195818002508,0.9797952351969426,0.13240284359564197,0.08931940966654422,154.11579909730304,19.118442709424343,21.5,1,1,0,24,1187.8,28.8,27.9,0.5039335664335665,0.1019829541933546 +TABDPT (default),72.55373939184042,21.63674608560709,32.1753170626944,27.14316288922542,0.7421770023997575,0.7479465935897589,0.18444900328904643,0.12119989176698268,513.8148563269098,1550.6457261531152,23.48076923076923,0.20388,69.82611036300659,21.28597331047058,28.076698775708525,8.760817737139146,1.0,1.0,0.1080149448719816,0.07851722429401914,429.0853998300959,1143.1582415263433,29.5,1,3,0,22,1178.7,28.4,35.5,0.48907342657342656,0.14118801082755247 +REALMLP (default),110.65054762363434,0.2512630499326266,36.51262336985926,0.304026477962315,0.9170988508971886,0.8257233846816412,0.17480849590114966,0.10965875529591303,716.9172586317429,16.106317028225142,23.634615384615383,0.179885,61.38514173030853,0.19377470016479492,35.26610472383891,0.25260632993704635,1.0,1.0,0.10725973083151863,0.09589271240176966,562.1682174661271,13.084740916081461,26.25,1,0,0,25,1178.7,23.8,30.2,0.4855769230769231,0.09086138282523847 +NN_TORCH (tuned),8269.36125588417,0.1732868873156034,3548.397093003735,0.17387289280012472,0.9141063957909149,0.842287949515361,0.1775257283605692,0.11814031211601564,54687.42205511106,10.378521059679809,24.0,0.184685,7021.069797754288,0.11897110939025879,2792.139673756541,0.13285542411599796,1.0,1.0,0.10704543711737907,0.07835117203069997,44532.50472025028,7.793728816499673,25.0,0,0,0,26,1167.5,37.1,35.1,0.4772727272727273,0.055970780933021255 +XT (tuned + ensemble),723.008487197069,1.6600552705618052,480.9309873883464,1.918436680526551,0.8984673764067701,0.8698563213577479,0.20350469181458591,0.1535920856593196,5917.229990900821,105.19348400260608,24.884615384615383,0.188425,702.5076073408127,1.3369704484939575,378.34316304246136,1.3734038445009942,1.0,1.0,0.1315762721206637,0.09230140454586463,3777.3315625688056,84.73782880976256,28.0,0,0,0,26,1154.8,28.9,35.7,0.4571678321678322,0.059958692919405074 +FASTAI (tuned),2663.3156125178702,0.7872952864720271,1626.2333617037298,0.9244876829128756,0.8865326975991655,0.8432447700175844,0.206866728565722,0.1387333836548531,20608.72361153851,52.86743788138416,25.53846153846154,0.18316,2361.516105532646,0.6897614002227783,1350.0449662692658,0.612778202858813,1.0,1.0,0.1372818526486348,0.08670903152513522,13001.178549428518,36.29076458115673,27.5,0,0,1,25,1140.3,29.4,31.8,0.4423076923076923,0.06820649009803559 +XT (tuned),723.008487197069,0.16853698400350717,480.9309873883464,0.21789332007450063,0.9275522820640911,0.8584728603742081,0.20621433350726176,0.1564465527864028,5917.229990900821,11.278027514695932,26.0,0.189205,702.5076073408127,0.14003515243530273,378.34316304246136,0.15364019245636173,1.0,1.0,0.1505192879837034,0.09210625451126611,3777.3315625688056,8.882695855316769,29.5,0,1,0,25,1128.1,31.0,25.7,0.4318181818181818,0.07221148701643333 +XGB (default),5.406896040989802,0.29052331814399135,3.2395402815762044,0.40571388124316665,0.967663807523489,0.9208621475953046,0.1913484639295636,0.1583121152662077,36.135643016538445,18.079059576557942,26.01923076923077,0.18918000000000001,4.358284831047058,0.2170184850692749,2.659926162500852,0.1702856674842303,1.0,1.0,0.13622845464672223,0.08590682453074228,32.63170368298432,10.069746654781287,24.25,0,0,0,26,1129.9,27.4,31.1,0.4313811188811189,0.04570122330728964 +RF (tuned + ensemble),911.769657428448,1.55073488675631,531.6502580092293,1.7081982422422,0.9428934399339288,0.9018568954164017,0.20973262545633878,0.18169828635432628,6577.080710276883,100.32600883678194,26.96153846153846,0.18825,798.9582680463791,1.2464795112609863,527.2734284753022,1.1554577826899797,1.0,1.0,0.15587829515575163,0.09511037806823772,5029.2193365439925,80.30745598154598,30.0,0,1,0,25,1114.2,27.3,33.7,0.40996503496503495,0.06740230643960834 +GBM (default),5.678787359824548,0.359198652780973,3.357556396858338,0.23593366680622332,0.9701181894525962,0.9244030054522838,0.2009750941665907,0.16096135302268902,43.778662055665606,16.224185451530264,27.557692307692307,0.19459,5.089803218841553,0.34710729122161865,3.0120719035705887,0.17665561056592852,1.0,1.0,0.14940090168048453,0.08991864451874558,30.743592100025186,14.007062707115537,28.0,0,0,0,26,1097.5,31.5,29.5,0.3964160839160839,0.04169946841702806 +RF (tuned),911.769657428448,0.15133640399345985,531.6502580092293,0.19063529883897276,0.9772506936041817,0.9414350435106434,0.21876669694055456,0.18885525418574936,6577.080710276883,9.67186030508054,29.25,0.18775,798.9582680463791,0.13221728801727295,527.2734284753022,0.1275233993660041,1.0,1.0,0.16269556435701765,0.11206528012392108,5029.2193365439925,7.908293435451007,31.5,0,0,0,26,1064.0,29.3,35.0,0.35795454545454547,0.04504401000608687 +NN_TORCH (default),23.867143649321335,0.24185629991384652,13.87033788200038,0.29581404152824126,0.9698948249425869,0.9457939759123197,0.24766534415358382,0.1805126846920292,159.99336582070887,16.359564345871572,31.71153846153846,0.19833499999999998,19.20789933204651,0.18236374855041504,8.931817801759234,0.17241159209016133,1.0,1.0,0.1806992032072448,0.1282818632772945,162.04458259531137,9.195674192084631,33.0,0,0,0,26,1005.9,38.8,30.0,0.30201048951048953,0.03748733782316879 +FASTAI (default),10.92040595641503,0.607048539014963,5.789474679574097,0.6364629667404095,0.9807465319111015,0.9340218157343317,0.2458147993858494,0.18433902398032298,78.41470283692,37.58331718018816,32.15384615384615,0.19146000000000002,9.578287720680237,0.6503585577011108,4.679521651364521,0.5473457672031852,1.0,1.0,0.15595107304948763,0.13206479856027808,61.09577848128312,35.985779724786894,35.0,0,0,0,26,999.9,29.7,31.1,0.291958041958042,0.036046519543766274 +RF (default),1.1224354964036207,0.07520443659562331,0.5160590091828214,0.08504536046248315,0.9951894125278877,0.9662672985507942,0.2733008673941071,0.2607556340607851,7.699488729035172,4.8652515281017985,32.21153846153846,0.213715,0.8405723571777344,0.06134629249572754,0.44070860935344924,0.06492877134601137,1.0,1.0,0.22304594727380544,0.1387641342747614,5.263965350091326,4.042169058695611,35.75,0,0,0,26,1000.0,0.0,0.0,0.2906468531468531,0.04063169893049472 +XT (default),0.7937821791722224,0.076584870998676,0.45161836217627827,0.08730195757590078,0.9851460583683391,0.9723807339815378,0.2928638810006597,0.2925631030122461,5.4850410746785805,4.998284751448414,34.11538461538461,0.22224,0.7121238708496094,0.07150018215179443,0.3597485800421042,0.06793435113493712,1.0,1.0,0.21784002712442435,0.1545266336174043,3.777771179368423,4.378376796908203,38.0,0,0,0,26,949.4,35.6,31.6,0.24737762237762237,0.034356084606725006 +LR (tuned + ensemble),175.34701893879816,0.4311328667860765,114.81546568819135,0.35152858974096185,0.9447815704031849,0.9613573808489063,0.31304194938194396,0.27486950659356457,1394.89696800973,28.50704727597215,34.21153846153846,0.219115,157.3445065021515,0.20991504192352295,89.11614089252916,0.1966045265631563,1.0,1.0,0.260779813420035,0.18354757952857997,1033.8951605639145,9.91761243845308,37.5,1,0,0,25,946.4,32.2,34.1,0.24519230769230768,0.06657835051950235 +LR (tuned),175.34701893879816,0.13578540545243484,114.81546568819135,0.11644774040693313,0.9645430999513104,0.9706124256069096,0.3197153495116481,0.2866149774984877,1394.89696800973,6.834352988282761,34.92307692307692,0.21990500000000002,157.3445065021515,0.06698489189147949,89.11614089252916,0.08340922333653941,1.0,1.0,0.2839064101424172,0.1973963943493853,1033.8951605639145,4.726037355894221,38.75,0,0,1,25,925.0,34.9,43.3,0.229020979020979,0.04041420009021431 +LR (default),4.7979896068573,0.14980417031508225,2.784461238483506,0.15310137710602748,0.9745259999639975,0.9812456463551247,0.3329427914389331,0.32581993219914024,36.357934158034695,9.005429323015887,35.98076923076923,0.227395,4.980341196060181,0.0988316535949707,2.4349667711867333,0.10027059217560387,1.0,1.0,0.283906387231373,0.21846589850078024,26.549049340848278,5.855361941404224,39.0,0,0,0,26,891.8,35.4,28.6,0.20498251748251747,0.030292514887996096 +KNN (tuned + ensemble),16.958975911140442,0.2818152537712684,7.194345160625263,0.20399826229470497,1.0,0.9816350265387093,0.47672042775227436,0.5833708185909443,56.76087640724927,13.193730328032911,39.92307692307692,0.297435,6.915013313293457,0.11628735065460205,3.5025132784274384,0.1592449370444793,1.0,1.0,0.4130987117462378,0.6739246116612398,52.78507887384432,10.328630925855668,43.0,0,0,0,26,745.5,44.9,30.5,0.11538461538461539,0.030206633151293592 +KNN (tuned),16.958975911140442,0.05680100734417255,7.194345160625263,0.04235176068048229,1.0,0.9774922422312161,0.5019983015456773,0.6443397962703555,56.76087640724927,2.623837177847182,41.21153846153846,0.301285,6.915013313293457,0.03549385070800781,3.5025132784274384,0.03322185201916636,1.0,1.0,0.5143946348760557,0.7015107115204589,52.78507887384432,1.9257403822827532,44.0,0,0,1,25,676.5,41.9,47.1,0.08610139860139861,0.035393141891706566 +KNN (default),0.30434812949253964,0.03149266426379864,0.14343197725298135,0.03163346219935531,1.0,1.0,0.5949762374279601,0.9484018378342699,1.002692426657382,1.6405188884998085,44.26923076923077,0.34604,0.17687928676605225,0.01871800422668457,0.0745054444441075,0.021103033819676938,1.0,1.0,0.6151111085811185,1.0,1.0,1.0188814221661033,45.0,0,0,0,26,383.7,60.2,115.9,0.016608391608391608,0.02263422903807308 diff --git a/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf b/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..87a84cc29477088afd719cbf70343602077428db Binary files /dev/null and b/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip b/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..11f6de74fa20ca51951d5e65c8838b9e75acc2bf --- /dev/null +++ b/data/lite/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4e3537446452cbef6ee98d645ac7ca084376854b8328e0703c07f9a66eb7f233 +size 138602 diff --git a/data/lite/tabpfn/figures/critical-diagram.pdf b/data/lite/tabpfn/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c0b7e8a88aef3faaf9d90d670ab0d4c9e40e1dea Binary files /dev/null and b/data/lite/tabpfn/figures/critical-diagram.pdf differ diff --git a/data/lite/tabpfn/figures/critical-diagram.png.zip b/data/lite/tabpfn/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..cbd8c146f2380f529762ed99d807e44ba4a91a75 --- /dev/null +++ b/data/lite/tabpfn/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a80f3a070c9b90eb2b3b529b1b5770762faafdcf943b90a0c663fa4bce83f4ba +size 309083 diff --git a/data/lite/tabpfn/leaderboard.tex b/data/lite/tabpfn/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..cacdd671405bca1878d8b988f07657a6572aaf5d --- /dev/null +++ b/data/lite/tabpfn/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1532${}_{-28,+31}$}} & \textcolor{gold}{\textbf{0.712}} & \textcolor{gold}{\textbf{7.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{11}} & \textcolor{gold}{\textbf{6.1\%}} & 3650.56 & 52.33 \\ +AutoGluon 1.3 (4h) & \textcolor{silver}{\textbf{1475${}_{-28,+36}$}} & \textcolor{bronze}{\textbf{0.493}} & \textcolor{silver}{\textbf{9.7}} & 5.5 & 1 & \textcolor{bronze}{\textbf{8.9\%}} & 2518.14 & 2.33 \\ +RealMLP (T+E) & \textcolor{bronze}{\textbf{1454${}_{-35,+29}$}} & 0.477 & \textcolor{bronze}{\textbf{10.6}} & 5.8 & 1 & 10.1\% & 6142.88 & 4.52 \\ +TabPFNv2 (T) & 1431${}_{-37,+26}$ & \textcolor{silver}{\textbf{0.540}} & 11.6 & \textcolor{silver}{\textbf{4.2}} & 1 & \textcolor{silver}{\textbf{8.8\%}} & 3650.56 & 0.54 \\ +TabM (T+E) & 1405${}_{-26,+36}$ & 0.484 & 12.4 & 6.5 & 1 & 10.1\% & 3407.57 & 1.45 \\ +LightGBM (T+E) & 1386${}_{-31,+27}$ & 0.361 & 13.4 & 10.6 & 0 & 12.4\% & 780.12 & 2.13 \\ +CatBoost (T+E) & 1374${}_{-32,+26}$ & 0.342 & 14.0 & 9.3 & 0 & 11.7\% & 2234.83 & 0.65 \\ +TabPFNv2 (D) & 1348${}_{-24,+30}$ & 0.483 & 15.1 & \textcolor{bronze}{\textbf{4.3}} & \textcolor{bronze}{\textbf{3}} & 10.4\% & 4.97 & 0.55 \\ +TabM (T) & 1345${}_{-36,+26}$ & 0.382 & 15.3 & 8.2 & 0 & 11.2\% & 3407.57 & 0.17 \\ +CatBoost (T) & 1342${}_{-27,+27}$ & 0.316 & 15.3 & 10.2 & 0 & 12.4\% & 2234.83 & 0.07 \\ +XGBoost (T+E) & 1335${}_{-29,+22}$ & 0.271 & 15.8 & 11.6 & 0 & 13.4\% & 848.99 & 1.87 \\ +CatBoost (D) & 1327${}_{-27,+34}$ & 0.259 & 16.1 & 9.7 & 1 & 13.4\% & 8.13 & 0.13 \\ +ModernNCA (T+E) & 1326${}_{-45,+26}$ & 0.320 & 16.3 & 7.8 & 0 & 13.0\% & 6308.39 & 7.74 \\ +XGBoost (T) & 1317${}_{-33,+27}$ & 0.229 & 16.6 & 10.2 & 1 & 13.6\% & 848.99 & 0.30 \\ +LightGBM (T) & 1314${}_{-28,+30}$ & 0.288 & 16.8 & 12.0 & 0 & 13.5\% & 780.12 & 0.38 \\ +ModernNCA (T) & 1306${}_{-28,+35}$ & 0.228 & 17.2 & 7.2 & 2 & 12.3\% & 6308.39 & 0.48 \\ +TabM (D) & 1287${}_{-39,+27}$ & 0.295 & 18.2 & 8.9 & 1 & 14.2\% & 13.30 & 0.15 \\ +RealMLP (T) & 1260${}_{-25,+32}$ & 0.221 & 19.3 & 13.4 & 0 & 14.1\% & 6142.88 & 0.25 \\ +EBM (T+E) & 1258${}_{-30,+34}$ & 0.186 & 19.5 & 8.6 & 0 & 16.5\% & 1323.39 & 0.21 \\ +TabDPT (D) & 1258${}_{-42,+34}$ & 0.370 & 19.6 & 5.1 & \textcolor{silver}{\textbf{4}} & 15.0\% & 29.28 & 8.88 \\ +TorchMLP (T+E) & 1255${}_{-29,+31}$ & 0.186 & 19.7 & 12.7 & 0 & 14.1\% & 3460.09 & 2.06 \\ +ModernNCA (D) & 1226${}_{-28,+32}$ & 0.157 & 21.1 & 10.3 & 1 & 17.4\% & 14.50 & 0.30 \\ +EBM (T) & 1211${}_{-22,+31}$ & 0.127 & 21.8 & 8.1 & 2 & 17.4\% & 1323.39 & 0.03 \\ +RealMLP (D) & 1204${}_{-33,+28}$ & 0.099 & 22.3 & 11.2 & 1 & 15.2\% & 35.65 & 0.30 \\ +ExtraTrees (T+E) & 1192${}_{-35,+29}$ & 0.131 & 22.8 & 14.3 & 0 & 18.6\% & 418.39 & 0.97 \\ +FastaiMLP (T+E) & 1189${}_{-28,+33}$ & 0.181 & 22.9 & 13.5 & 0 & 17.7\% & 1453.07 & 6.68 \\ +EBM (D) & 1181${}_{-32,+25}$ & 0.136 & 23.4 & 11.0 & 1 & 18.0\% & 5.90 & 0.07 \\ +ExtraTrees (T) & 1176${}_{-27,+28}$ & 0.102 & 23.7 & 12.7 & 0 & 18.9\% & 418.39 & 0.15 \\ +TorchMLP (T) & 1175${}_{-32,+27}$ & 0.097 & 23.5 & 17.9 & 0 & 15.8\% & 3460.09 & 0.12 \\ +FastaiMLP (T) & 1135${}_{-31,+26}$ & 0.089 & 25.7 & 14.9 & 0 & 19.1\% & 1453.07 & 0.64 \\ +XGBoost (D) & 1129${}_{-32,+28}$ & 0.025 & 26.0 & 22.3 & 0 & 17.7\% & 2.86 & 0.19 \\ +RandomForest (T+E) & 1127${}_{-28,+30}$ & 0.071 & 25.9 & 15.2 & 0 & 19.5\% & 563.93 & 1.00 \\ +LightGBM (D) & 1113${}_{-33,+28}$ & 0.027 & 26.7 & 23.8 & 0 & 18.2\% & 3.28 & 0.18 \\ +RandomForest (T) & 1083${}_{-31,+31}$ & 0.036 & 28.1 & 22.1 & 0 & 20.5\% & 563.93 & 0.15 \\ +TorchMLP (D) & 1012${}_{-25,+32}$ & 0.026 & 30.9 & 26.5 & 0 & 22.2\% & 9.39 & 0.15 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.004 & 31.6 & 25.3 & 0 & 25.1\% & 0.48 & 0.07 \\ +FastaiMLP (D) & 1000${}_{-34,+31}$ & 0.015 & 31.6 & 27.8 & 0 & 23.3\% & 5.20 & 0.52 \\ +ExtraTrees (D) & 992${}_{-34,+28}$ & 0.030 & 31.9 & 27.1 & 0 & 26.1\% & 0.38 & 0.07 \\ +Linear (T+E) & 912${}_{-38,+33}$ & 0.044 & 34.7 & 17.1 & 1 & 32.0\% & 97.93 & 0.19 \\ +Linear (T) & 892${}_{-30,+31}$ & 0.028 & 35.3 & 26.5 & 0 & 32.5\% & 97.93 & 0.08 \\ +Linear (D) & 859${}_{-36,+31}$ & 0.020 & 36.4 & 33.7 & 0 & 33.9\% & 2.75 & 0.09 \\ +KNN (T+E) & 727${}_{-37,+33}$ & 0.000 & 39.4 & 33.9 & 0 & 46.1\% & 3.13 & 0.16 \\ +KNN (T) & 652${}_{-46,+46}$ & 0.000 & 40.6 & 29.9 & 0 & 48.2\% & 3.13 & 0.03 \\ +KNN (D) & 360${}_{-80,+57}$ & 0.000 & 43.3 & 43.2 & 0 & 56.3\% & 0.06 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/lite/tabpfn/tabarena_leaderboard.csv b/data/lite/tabpfn/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..fd51604e2c2801e33ac016ad1e93ad724a1ad090 --- /dev/null +++ b/data/lite/tabpfn/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14336.010728077455,156.01344153375337,4382.0533643641575,74.48134283766498,0.28759504252950974,0.3377998072280995,0.061011172759490306,0.042703954608678374,95784.56309893043,6432.006144478144,7.787878787878788,0.21046,7449.2451322078705,56.3117241859436,3650.5588495238276,52.332938254416526,0.16748308582785468,0.25133689839573625,0.02321606859178882,0.01681007850550054,71633.35015738624,4111.6099760549805,5.0,11,3,1,18,1531.8,30.4,27.3,0.8421423537702607,0.4477927627927628 +AutoGluon 1.3 (4h),5959.189094875798,4.367473082108931,3961.9186202053897,3.838659223481077,0.5066593479144164,0.5031130123587579,0.08889336232518867,0.053887108619865974,52768.640134241,245.21168201964974,9.651515151515152,0.22034,5402.2047798633575,2.42671799659729,2518.1361196041107,2.329453593213223,0.4597995521540763,0.4713507311669133,0.0452138062110049,0.03721823610662635,39027.82746427101,131.32895431780238,8.0,1,2,1,29,1474.6,35.2,27.9,0.7988019732205779,0.18205298316711024 +REALMLP (tuned + ensemble),17735.155029759262,4.730990727742513,6433.060002371296,5.4487082681787244,0.5227695654892056,0.5120650322000645,0.10130621837156616,0.06074937134426635,135393.06860073804,314.1828415073118,10.575757575757576,0.21073,10195.217458248138,4.599701642990112,6142.876617706428,4.516099027530876,0.48215030896347355,0.491559756921,0.058321654172537096,0.040913538766118675,111998.40741080203,285.1562934805996,9.0,1,0,4,28,1454.1,28.7,34.4,0.7773079633544749,0.17333136380772182 +TABPFNV2 (tuned),14336.010728077455,6.103090950936982,4382.0533643641575,3.0581431200087663,0.46046523778783577,0.5188209597444956,0.0877527045624097,0.06945004703125181,95784.56309893043,243.34239180847368,11.621212121212121,0.21386,7449.2451322078705,0.4960660934448242,3650.5588495238276,0.5430449730566292,0.34176855083335494,0.5086309060869947,0.0565608520318478,0.0423413312816546,71633.35015738624,33.800294797494224,8.0,1,8,3,21,1430.9,25.8,36.1,0.7529950669485553,0.238682666157629 +TABM (tuned + ensemble),7905.59104330612,1.9562460870453806,4185.709531882815,1.768466932810432,0.5158505640872573,0.5431361751910071,0.10095330849489449,0.07779881667044108,67788.46010990962,114.61474290926614,12.424242424242424,0.21175,6126.394609212875,1.3961477279663086,3407.56780996161,1.4485538005828857,0.47020083976979304,0.52429480819432,0.04867875413017375,0.04512415883221451,49987.707956643724,99.64653279572039,11.0,1,0,2,30,1404.9,35.1,25.3,0.7343199436222692,0.15429987172281395 +GBM (tuned + ensemble),1414.8606616944978,5.3146100116498545,902.1353750352665,3.8285745502016777,0.6392387450549837,0.6631109559408186,0.12435029597091696,0.08029296614256975,13042.108823335611,299.1337158047987,13.378787878787879,0.22026,1314.2295961380005,2.1158533096313477,780.1150358026446,2.128600501785187,0.6693216689945842,0.7087805886338155,0.06385145474270604,0.05353102776704557,13234.598330219944,119.30370400980756,13.5,0,0,1,32,1386.5,26.2,30.9,0.7121212121212122,0.09458883479781313 +CAT (tuned + ensemble),5998.228848081647,1.0770193735758464,3714.832915582639,1.0224930685169833,0.6581825114313259,0.6297945073423992,0.11693394683553834,0.08165622904718,53149.267800713766,63.12902616676368,14.030303030303031,0.21068,3471.3497779369354,0.9156572818756104,2234.828925740415,0.6529485519008803,0.6112565392764306,0.6242104996505731,0.05828476269775196,0.05387373518333481,22918.764355268395,48.89284806833719,13.0,0,1,1,31,1373.5,25.9,31.4,0.696969696969697,0.10785532910191802 +TABPFNV2 (default),11.6943236914548,1.0926416714986165,5.91242680443585,0.666528048525937,0.5171946106727229,0.5791551091135295,0.10381387583615634,0.08868854300049302,96.73088882379422,50.146253518643945,15.06060606060606,0.21791,9.203422546386719,0.43659281730651855,4.965901903461194,0.5452976226806641,0.3939035196311629,0.7094305649336043,0.06280305375012951,0.06015293769345357,75.26381057033151,30.54843958112344,11.0,3,4,4,22,1348.0,29.1,23.9,0.6730091613812544,0.23189064712882612 +CAT (tuned),5998.228848081647,0.1285553267507842,3714.832915582639,0.13007453601113816,0.6842728639559639,0.6885504248819497,0.12418630031972636,0.0896022810762604,53149.267800713766,8.587377880778849,15.333333333333334,0.21493,3471.3497779369354,0.06416535377502441,2234.828925740415,0.07115136708644841,0.6577077283695499,0.679930031599041,0.06876797936712631,0.05609349712267564,22918.764355268395,3.6064895871728715,14.0,0,1,1,31,1341.9,26.7,27.0,0.6666666666666666,0.09834940324674238 +TABM (tuned),7905.59104330612,0.19297228437481503,4185.709531882815,0.20786967739368611,0.6183990992477728,0.6224646452694973,0.11161000779133246,0.09116450110816081,67788.46010990962,12.857810575794538,15.333333333333334,0.21145,6126.394609212875,0.16823339462280273,3407.56780996161,0.17298539846653752,0.5745991505979262,0.6505981572663989,0.06855918585966836,0.057665303848857825,49987.707956643724,9.039602142310635,14.0,0,2,2,29,1344.6,25.8,35.3,0.6666666666666666,0.12208925959238352 +XGB (tuned + ensemble),1888.6873327096303,3.046517429929791,1158.2361646061293,3.757011355737224,0.7293222800450723,0.715820739762068,0.1340997707713229,0.09628802932833917,14952.255554140145,205.22200127699725,15.757575757575758,0.2153,1542.8871898651123,1.3964767456054688,848.994191986563,1.8656665541560775,0.7623744964555048,0.7222843291924909,0.07724634161829114,0.06320753111560178,12569.741812840497,126.36735681634848,14.0,0,0,2,31,1335.0,21.5,28.8,0.656800563777308,0.08594458301303867 +CAT (default),142.80315683104774,0.2447956186352354,124.41934267197115,0.18478647674970256,0.740681350814587,0.7436459210714602,0.134083376286267,0.09414081542188907,500.26431087714354,13.226167413169538,16.106060606060606,0.20726,12.813451290130615,0.1487717628479004,8.126988439617271,0.12587527839504942,0.8332509641822385,0.8434910311549387,0.08032670945111942,0.08404761904761912,120.27803545982526,7.6030997550901045,18.0,1,0,0,32,1327.0,33.4,26.8,0.6486962649753347,0.10326788547349249 +MNCA (tuned + ensemble),11156.128839138782,10.252970204208836,6590.840949619391,8.639408754165267,0.679738245400772,0.6394100270674522,0.13022705034800527,0.1066436921240968,103946.26001990445,584.7921673215841,16.303030303030305,0.2215,10821.423727989197,7.313557863235474,6308.3910004173395,7.737494511886756,0.7185505017354659,0.7125625863826853,0.06054389432338636,0.05812125873658276,85886.71528316107,464.6138316092033,15.5,0,2,2,29,1326.2,25.1,44.6,0.6441155743481325,0.1280991902660478 +XGB (tuned),1888.6873327096303,0.6773105679136334,1158.2361646061293,0.8819951020962133,0.7708905111580848,0.7430258064847394,0.1358951205286783,0.09911686954396351,14952.255554140145,43.913270063701376,16.606060606060606,0.21833,1542.8871898651123,0.2559163570404053,848.994191986563,0.2979882044274592,0.8207592710887125,0.7227971551467122,0.0827897084298348,0.066243759828395,12569.741812840497,16.170469837088785,15.0,1,0,0,32,1316.8,26.7,32.2,0.6370683579985905,0.09847653714118139 +GBM (tuned),1414.8606616944978,0.8018729108752627,902.1353750352665,0.7467808129875216,0.7124399645711881,0.7455920579660587,0.1350827213293653,0.09778475717045013,13042.108823335611,52.68401353539273,16.833333333333332,0.22222,1314.2295961380005,0.2889828681945801,780.1150358026446,0.3811195826851199,0.745365207761696,0.7967516354613091,0.07316130655481845,0.0701999852719947,13234.598330219944,23.12721292618487,17.0,0,1,0,32,1313.6,29.2,27.4,0.6317829457364341,0.08340160310473639 +MNCA (tuned),11156.128839138782,0.488406752095078,6590.840949619391,0.47560948312275564,0.7723596137566766,0.6796356799110586,0.12292244893259613,0.11094274217582878,103946.26001990445,29.77527390400744,17.166666666666668,0.22855,10821.423727989197,0.40321969985961914,6308.3910004173395,0.476673807114561,0.8745571721847745,0.7249265363485337,0.07201319051941868,0.06463247386035899,85886.71528316107,25.730108223021155,16.0,2,0,1,30,1306.0,34.3,27.4,0.624031007751938,0.13943961890940268 +TABM (default),30.478135679707382,0.1787743279428193,20.49279399878298,0.18760878299490344,0.7054228305321909,0.715318593489871,0.1416501573674291,0.1142675609628328,337.0087472221777,10.899849214261154,18.181818181818183,0.2188,24.11390233039856,0.13432693481445312,13.298290032000581,0.1461421244243193,0.6869868540300672,0.7790852226704958,0.08502834278092686,0.06957745058149634,137.0322141490484,7.861401175434497,18.0,1,0,1,31,1286.6,26.3,38.4,0.6004228329809725,0.11248849756004768 +REALMLP (tuned),17735.155029759262,0.22395390452760638,6433.060002371296,0.3029892240506847,0.779232820829222,0.786711402019705,0.14107038725126725,0.10377737683349993,135393.06860073804,16.18572290331018,19.333333333333332,0.22093,10195.217458248138,0.20874404907226562,6142.876617706428,0.2460546194468641,0.885134051632072,0.8907401409990414,0.09238241308793405,0.08023809523809645,111998.40741080203,13.170123583655588,17.0,0,1,0,32,1259.7,31.9,24.4,0.5736434108527132,0.07459406268073102 +EBM (tuned + ensemble),3438.1651705250597,0.30228347489328095,2025.9909898154654,0.278357915961119,0.8143931606450567,0.7605739546960194,0.16494084588356767,0.1424652018968239,28292.283296322228,19.073799904119216,19.454545454545453,0.21701,2006.7757859230042,0.22147798538208008,1323.3940540554784,0.21026261459440876,0.9460248644293228,0.9785295623374001,0.08652042406281557,0.05568300312825867,20828.647605338916,11.089686715499157,22.0,0,3,2,28,1257.9,33.4,29.5,0.5708245243128964,0.11603443862960279 +TABDPT (default),69.51610838283192,21.59643456430146,33.15048249304137,28.806748991917754,0.629987494107437,0.669848589440618,0.1504189894232722,0.10239872734567355,588.2569465373858,1654.1673313074607,19.62121212121212,0.21757,65.70441889762878,21.056044578552246,29.27648859508967,8.8838207817589,0.7203280108909103,0.8000249564975012,0.09511484956324834,0.070909641670269,498.25427608514167,1247.4102886322382,18.0,4,1,0,28,1257.9,33.6,41.7,0.5669485553206484,0.19522552078194996 +NN_TORCH (tuned + ensemble),9157.856176080126,3.1086663116108286,4282.323362629224,2.77024720786038,0.8141011522100134,0.7941988488635939,0.14096018405841734,0.10434619365969294,78848.62406573945,182.409192264201,19.727272727272727,0.21425,7053.366072177887,2.677239179611206,3460.094702877122,2.056839412206837,0.9757817342652233,0.9237194175475067,0.08543218616764037,0.0695951926212325,57294.450258986595,134.63673457491765,20.0,0,0,2,31,1254.6,30.1,28.4,0.5644820295983086,0.07865677645737007 +MNCA (default),30.355341976339165,0.40350833806124603,17.852429615073813,0.34235869991604173,0.8429271223076698,0.8024634797197773,0.1742757260670377,0.10531319376241087,270.4302626154547,22.061972158381582,21.060606060606062,0.23345,24.494569301605225,0.2764425277709961,14.495319221168756,0.3000060482138007,1.0,0.9531612138198724,0.1256505576208179,0.09743889944848891,179.96393060938846,16.251218083714438,20.0,1,1,0,31,1225.8,31.6,27.7,0.5334742776603242,0.09733484070536731 +EBM (tuned),3438.1651705250597,0.03783920316985159,2025.9909898154654,0.037231939130763475,0.8732650510599177,0.8036623595318353,0.17366806534596116,0.15766360268758092,28292.283296322228,2.285569594265516,21.803030303030305,0.22761,2006.7757859230042,0.027222394943237305,1323.3940540554784,0.025386841881354122,1.0,1.0,0.0893728877206168,0.08235501040640528,20828.647605338916,1.1835017506360512,24.0,2,0,1,30,1211.1,30.2,21.9,0.5162085976039464,0.12329523943951919 +REALMLP (default),105.41575461445433,0.23138419064608487,37.06590128185811,0.29773903605674285,0.9013016306032677,0.815323709848827,0.1523971378269817,0.10502477628843246,791.8198164679417,15.953890267817545,22.318181818181817,0.22649,57.062673807144165,0.18715763092041016,35.649430762175584,0.30133283927465565,1.0,1.0,0.10414681647940127,0.09045562976994424,625.7008793991246,13.341950411904335,24.0,1,0,0,32,1204.0,27.1,32.1,0.5042283298097252,0.08959166478898548 +XT (tuned + ensemble),708.1867504842354,1.5957839488983154,501.4565343765227,1.8224398148781864,0.8688873144004177,0.8350075171577747,0.18587020622182546,0.14066221082257024,7205.038232093607,102.24882259176883,22.818181818181817,0.22616,686.9523606300354,1.3263120651245117,418.38755533099174,0.9717198690479619,1.0,1.0,0.12222279775100575,0.0969336948409988,5299.380855419872,85.4887622756034,26.5,0,0,0,33,1192.3,29.0,34.4,0.492600422832981,0.06973746524809589 +FASTAI (tuned + ensemble),2576.8732850045867,8.659171588493116,1678.8968661467622,9.647784447253477,0.8186439717731856,0.8233840342610399,0.17744811445555947,0.12353073291472176,24684.22585148677,537.1797900399015,22.924242424242426,0.22761,2283.895311355591,7.460584878921509,1453.0691261589527,6.677908072211324,1.0,1.0,0.09544576825940065,0.08681294440231768,19729.511337209304,439.3893190512624,26.0,0,0,0,33,1189.2,32.7,27.6,0.4901338971106413,0.07388840038053952 +EBM (default),11.796755761811227,0.06572751565413042,7.163006681498326,0.08347649829212658,0.8635835253158718,0.8429534121799166,0.18042792289811066,0.15909949729002285,102.62018767841327,4.629805865519655,23.424242424242426,0.21297,8.807492733001709,0.05507326126098633,5.903346415390749,0.07107973737946784,1.0,1.0,0.10385145665987439,0.07023809523809632,94.535974439501,3.64412233970632,25.0,1,0,0,32,1180.7,25.0,31.3,0.4785059901338971,0.09056415513048684 +NN_TORCH (tuned),9157.856176080126,0.15434888637427127,4282.323362629224,0.16049520877603024,0.9028056592953255,0.8416227639000304,0.15759149268767128,0.12428490892625065,78848.62406573945,9.627911202663915,23.545454545454547,0.22879,7053.366072177887,0.11448168754577637,3460.094702877122,0.12080316789438152,1.0,1.0,0.100147151360233,0.0767812248241212,57294.450258986595,7.364003194231025,26.0,0,0,0,33,1175.1,26.1,31.1,0.47568710359408034,0.05581669262818803 +XT (tuned),708.1867504842354,0.17320121418346057,501.4565343765227,0.21536245368256726,0.8983919342324714,0.8194714168333243,0.18876898814096105,0.14265005001131126,7205.038232093607,11.521354503327439,23.727272727272727,0.22797,686.9523606300354,0.13843560218811035,418.38755533099174,0.14728453101181402,1.0,1.0,0.1376017223667837,0.08719461637839616,5299.380855419872,9.92312806131416,29.0,0,1,0,32,1176.2,27.4,26.3,0.4714587737843552,0.07889643231090135 +FASTAI (tuned),2576.8732850045867,0.7244977951049805,1678.8968661467622,0.8750374778043916,0.9106015193205547,0.8702717656688002,0.19067319655867174,0.14971818844740778,24684.22585148677,50.26766962756969,25.727272727272727,0.24751,2283.895311355591,0.6439375877380371,1453.0691261589527,0.6368904113769531,1.0,1.0,0.11504721620397895,0.09909086376803719,19729.511337209304,38.24136055200847,28.0,0,1,0,32,1135.4,25.4,30.8,0.4249471458773784,0.06710682416721182 +RF (tuned + ensemble),888.3357556805466,1.4584405927947073,548.982788660307,1.6144500937500732,0.9290486520150778,0.8921110383301076,0.19522228883414436,0.17030886938280962,7986.192963738265,94.79279363134923,25.90909090909091,0.21825,787.3376877307892,1.127267837524414,563.9330272159958,0.9968510626638947,1.0,1.0,0.139739054041219,0.10570563621989555,6864.068149879297,78.90150886133937,29.0,0,1,0,32,1127.1,29.6,27.1,0.42071881606765327,0.06564423024290174 +XGB (default),5.387323235020493,0.2821633526773164,3.3893656086420445,0.3968518697927455,0.9745229998669913,0.9296399987869843,0.17677814512907894,0.15922339773871735,47.27320839563511,19.56191216890218,25.984848484848484,0.22354,4.284018039703369,0.21862435340881348,2.860024247198545,0.18955767154693604,1.0,1.0,0.12943385728195922,0.11358382514493719,45.34698317461044,11.901745335164652,25.0,0,0,0,33,1129.2,27.2,32.0,0.4189570119802678,0.04483168413349101 +GBM (default),6.021721796555952,0.4273146427038944,3.870147482423626,0.35290963126649316,0.9731114574560898,0.9376334778794365,0.18182512474100704,0.1537308330415496,60.70161264382517,26.35196183486977,26.742424242424242,0.2303,5.221571445465088,0.33568358421325684,3.2788508925910955,0.17879690442766463,1.0,1.0,0.13840923735053867,0.10597679216116598,53.27869689108153,15.293676047219583,27.0,0,0,0,33,1112.8,27.5,33.0,0.401338971106413,0.041963556335582776 +RF (tuned),888.3357556805466,0.14608911311987674,548.982788660307,0.1899222961302186,0.963610556213676,0.9266190835825735,0.20495524099504517,0.17970269356115554,7986.192963738265,9.898822250146974,28.12121212121212,0.22167,787.3376877307892,0.12934398651123047,563.9330272159958,0.1492024419788353,1.0,1.0,0.1400874358486981,0.11247571843237328,6864.068149879297,8.186121069044912,31.0,0,0,0,33,1082.8,30.4,30.6,0.3692741367159972,0.04528986332728922 +NN_TORCH (default),27.938863674799602,0.20885326645591043,17.594693580825485,0.25765115579582876,0.9742281682834123,0.9498085655717742,0.2223573038490221,0.18756332476278967,276.3144916310567,14.424726839721131,30.893939393939394,0.22004,19.96923542022705,0.14620447158813477,9.388451067337588,0.15027597806179566,1.0,1.0,0.13014412363246453,0.12823638255534808,185.13016820561077,8.78781289003576,32.0,0,0,0,33,1012.4,31.2,24.7,0.3047921071176885,0.03776804310751238 +RF (default),1.2340893889918472,0.07513474695610278,0.5622938182798829,0.08939393564694124,0.9962098401734872,0.9672887065230135,0.25140766775808776,0.24715201472904136,10.185916203069231,5.20171567420914,31.575757575757574,0.23701,0.7245135307312012,0.060329437255859375,0.4841926739300568,0.0743608035126163,1.0,1.0,0.2215808246703358,0.1728884254431697,8.226093372880863,4.642861593290882,35.0,0,0,0,33,1000.0,0.0,0.0,0.28893587033121915,0.03955153794316757 +FASTAI (default),10.49087964404713,0.5738813010129061,5.992322657632511,0.6140012973845629,0.9848306008996557,0.9441644290803554,0.23284269048954082,0.1966651448852838,93.5145549982301,37.08947361427536,31.606060606060606,0.23988,7.7834436893463135,0.5681214332580566,5.198076387326242,0.522444012045375,1.0,1.0,0.1527465876997327,0.13328152321740847,81.9890544250646,35.934326641384644,34.0,0,0,0,33,999.9,30.5,33.7,0.288231148696265,0.03602082006463107 +XT (default),0.765689365791552,0.07577740062366832,0.4465789210364869,0.09028580254631946,0.9701664672021088,0.9565087495356771,0.26114772709993034,0.26011640953720233,6.382092612032232,5.230500351711613,31.87878787878788,0.23857,0.6523852348327637,0.06471061706542969,0.3792952342205737,0.07494776627979297,1.0,1.0,0.1985320515377561,0.1548442777574803,6.583290644538605,4.555164209010363,36.0,0,0,0,33,992.5,27.6,33.3,0.28188865398167723,0.036874429537043306 +LR (tuned + ensemble),170.2873171820785,0.3659532792640455,120.20623845518064,0.3201269296402589,0.9564945706206911,0.968230584030719,0.3196771170239951,0.33838034029256764,1708.0827888193253,25.164726190305664,34.72727272727273,0.24535,154.81117272377014,0.17154169082641602,97.92847529053688,0.18593118581206958,1.0,1.0,0.2711660718812774,0.2552683896620274,1268.2145563260167,9.250054132082282,39.0,1,0,0,32,911.5,32.1,37.9,0.2156448202959831,0.05844011745944829 +LR (tuned),170.2873171820785,0.1145554528091893,120.20623845518064,0.1042144714095375,0.972064260567699,0.97552603483875,0.32504329837645596,0.3478458566538538,1708.0827888193253,6.107453287725022,35.303030303030305,0.25429,154.81117272377014,0.0533757209777832,97.92847529053688,0.07892496065012959,1.0,1.0,0.2768258727778956,0.2552683896620274,1268.2145563260167,4.226983992546795,39.5,0,0,1,32,892.1,30.8,29.2,0.2022551092318534,0.03779485565304773 +LR (default),4.627540183789803,0.13403422904737067,2.892082672901587,0.14710367960415885,0.9799295757292101,0.9836682619130945,0.33924904946053636,0.39537360179646885,43.81259012975329,8.95784223571136,36.39393939393939,0.25429,4.974438667297363,0.08619952201843262,2.750714412453488,0.09439525477373414,1.0,1.0,0.2768258727778956,0.31807101339614474,41.29590265237998,5.564972891186484,40.0,0,0,0,33,858.8,30.1,35.9,0.17688513037350245,0.029640439524678194 +KNN (tuned + ensemble),14.287633382912839,0.2593685569185199,6.199230307814771,0.19812645438519322,1.0,0.9842561941589855,0.46075029134694007,0.6166126245311254,57.516079843298705,12.788063330642764,39.36363636363637,0.34601,5.862637996673584,0.10712385177612305,3.133583813905716,0.15733957985747313,1.0,1.0,0.42992491292377444,0.7074394089684692,52.1575390239796,10.510779636451476,42.0,0,0,0,33,727.3,32.4,36.7,0.10782241014799154,0.029496960390970647 +KNN (tuned),14.287633382912839,0.05143869284427527,6.199230307814771,0.03996037829183494,1.0,0.9825364908791403,0.48222725453192367,0.6731385428276928,57.516079843298705,2.4985748964581607,40.56060606060606,0.34412,5.862637996673584,0.032462120056152344,3.133583813905716,0.030069828033447266,1.0,1.0,0.5139348134152102,0.7621498558446018,52.1575390239796,1.9337322466384677,43.0,0,0,1,32,651.7,45.4,45.4,0.07998590556730091,0.033406459585363854 +KNN (default),0.25405634533275256,0.029383905006177498,0.12227935577421162,0.030852453478009863,1.0,1.0,0.5634010526029024,0.946484954284205,1.0021213058512708,1.6500080471330536,43.303030303030305,0.40785,0.12821459770202637,0.01713418960571289,0.06314403921930106,0.02475904764267444,1.0,1.0,0.5282115941198384,1.0,1.0,1.2284888285212276,44.0,0,0,0,33,360.5,56.8,79.8,0.01620859760394644,0.023134363057794087 diff --git a/data/lite/tabpfn/tuning-impact-elo-horizontal.pdf b/data/lite/tabpfn/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ed99681333859006a61dc9555780af8fd60479f7 Binary files /dev/null and b/data/lite/tabpfn/tuning-impact-elo-horizontal.pdf differ diff --git a/data/lite/tabpfn/tuning-impact-elo-horizontal.png.zip b/data/lite/tabpfn/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..8714af302898b55aa7fdaa8b5145e0703276180e --- /dev/null +++ b/data/lite/tabpfn/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6829223d4546befaa590b3203a69a424e6c39c904e7414643036ad35f9bb7f2 +size 135098 diff --git a/data/tabicl-cls/figures/critical-diagram.pdf b/data/tabicl-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0052000636bb76edbc0ee7a9a51b7cc8d9ab5867 Binary files /dev/null and b/data/tabicl-cls/figures/critical-diagram.pdf differ diff --git a/data/tabicl-cls/figures/critical-diagram.png.zip b/data/tabicl-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..eeaf1bd4070bd54d33c25ac9661d9ea7fdfb8040 --- /dev/null +++ b/data/tabicl-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b9ef7a3f66ec78df1118b64a21ebb3aa4fc34c4cddd8998137f78f590184b3 +size 312248 diff --git a/data/tabicl-cls/leaderboard.tex b/data/tabicl-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..f6a356299f52996338ff47cc4a594c8f45ef23e4 --- /dev/null +++ b/data/tabicl-cls/leaderboard.tex @@ -0,0 +1,50 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1645${}_{-36,+32}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{7.9}} & \textcolor{silver}{\textbf{2.9}} & \textcolor{silver}{\textbf{7}} & \textcolor{gold}{\textbf{5.4\%}} & 1222.55 & 2.36 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1634${}_{-32,+36}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{silver}{\textbf{8.3}} & \textcolor{bronze}{\textbf{3.4}} & \textcolor{bronze}{\textbf{4}} & \textcolor{bronze}{\textbf{6.7\%}} & 2387.83 & 1.47 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1599${}_{-40,+31}$}} & \textcolor{silver}{\textbf{0.521}} & \textcolor{bronze}{\textbf{9.4}} & \textcolor{gold}{\textbf{2.8}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{5.4\%}} & 8.68 & 1.81 \\ +RealMLP (T+E) & 1598${}_{-34,+29}$ & 0.418 & 9.5 & 5.5 & 1 & 8.1\% & 5472.30 & 3.06 \\ +LightGBM (T+E) & 1584${}_{-23,+33}$ & 0.389 & 9.9 & 5.7 & 1 & 8.8\% & 374.34 & 1.46 \\ +TabM (T) & 1537${}_{-32,+35}$ & 0.409 & 11.6 & 5.4 & 1 & 7.8\% & 2387.83 & 0.16 \\ +CatBoost (T+E) & 1530${}_{-29,+28}$ & 0.377 & 11.9 & 7.0 & 1 & 7.7\% & 1233.49 & 0.52 \\ +CatBoost (T) & 1522${}_{-27,+30}$ & 0.360 & 12.3 & 6.3 & 1 & 7.9\% & 1233.49 & 0.07 \\ +LightGBM (T) & 1504${}_{-26,+33}$ & 0.302 & 13.0 & 10.9 & 0 & 9.5\% & 374.34 & 0.22 \\ +CatBoost (D) & 1502${}_{-25,+23}$ & 0.343 & 13.0 & 6.4 & 1 & 9.2\% & 5.31 & 0.07 \\ +XGBoost (T+E) & 1497${}_{-31,+30}$ & 0.320 & 13.2 & 9.3 & 0 & 9.6\% & 637.94 & 1.22 \\ +ModernNCA (T) & 1462${}_{-31,+33}$ & 0.275 & 14.7 & 7.1 & 2 & 9.2\% & 4614.64 & 0.52 \\ +ModernNCA (T+E) & 1455${}_{-33,+30}$ & 0.383 & 15.0 & 6.0 & 1 & 9.1\% & 4614.64 & 8.40 \\ +XGBoost (T) & 1450${}_{-30,+33}$ & 0.264 & 15.0 & 12.4 & 0 & 10.0\% & 637.94 & 0.17 \\ +TabM (D) & 1425${}_{-27,+31}$ & 0.272 & 16.3 & 10.0 & 0 & 11.5\% & 9.82 & 0.13 \\ +TorchMLP (T+E) & 1401${}_{-23,+28}$ & 0.228 & 17.1 & 12.2 & 0 & 10.5\% & 2372.22 & 2.09 \\ +RealMLP (T) & 1399${}_{-38,+31}$ & 0.198 & 17.4 & 12.6 & 0 & 11.3\% & 5472.30 & 0.17 \\ +EBM (T+E) & 1391${}_{-27,+26}$ & 0.170 & 17.7 & 10.7 & 0 & 13.9\% & 895.61 & 0.20 \\ +FastaiMLP (T+E) & 1367${}_{-30,+31}$ & 0.207 & 18.8 & 10.6 & 0 & 13.9\% & 582.77 & 4.52 \\ +ModernNCA (D) & 1355${}_{-26,+29}$ & 0.142 & 19.3 & 10.4 & 1 & 13.1\% & 14.53 & 0.34 \\ +EBM (T) & 1328${}_{-36,+26}$ & 0.110 & 20.6 & 15.1 & 0 & 14.7\% & 895.61 & 0.02 \\ +EBM (D) & 1292${}_{-29,+29}$ & 0.119 & 22.1 & 7.9 & 3 & 15.6\% & 3.72 & 0.04 \\ +XGBoost (D) & 1277${}_{-22,+34}$ & 0.102 & 22.6 & 18.0 & 0 & 13.2\% & 1.58 & 0.11 \\ +RealMLP (D) & 1274${}_{-30,+27}$ & 0.095 & 22.9 & 18.8 & 0 & 13.3\% & 35.20 & 0.18 \\ +TabDPT (D) & 1269${}_{-30,+35}$ & 0.186 & 23.0 & 6.2 & 3 & 14.7\% & 20.51 & 8.54 \\ +TorchMLP (T) & 1269${}_{-22,+32}$ & 0.085 & 23.0 & 19.1 & 0 & 13.2\% & 2372.22 & 0.15 \\ +ExtraTrees (T+E) & 1263${}_{-28,+29}$ & 0.082 & 23.3 & 15.9 & 0 & 15.2\% & 182.30 & 0.74 \\ +FastaiMLP (T) & 1259${}_{-29,+29}$ & 0.094 & 23.5 & 18.2 & 0 & 16.0\% & 582.77 & 0.29 \\ +RandomForest (T+E) & 1210${}_{-24,+31}$ & 0.067 & 25.5 & 15.4 & 0 & 16.0\% & 260.01 & 0.74 \\ +LightGBM (D) & 1206${}_{-25,+30}$ & 0.053 & 25.7 & 23.9 & 0 & 14.8\% & 1.41 & 0.12 \\ +ExtraTrees (T) & 1205${}_{-27,+30}$ & 0.056 & 25.8 & 20.7 & 0 & 16.7\% & 182.30 & 0.07 \\ +RandomForest (T) & 1156${}_{-28,+29}$ & 0.045 & 27.7 & 21.1 & 0 & 17.3\% & 260.01 & 0.07 \\ +TorchMLP (D) & 1103${}_{-32,+34}$ & 0.022 & 29.6 & 26.8 & 0 & 18.6\% & 6.03 & 0.13 \\ +FastaiMLP (D) & 1073${}_{-31,+34}$ & 0.027 & 30.6 & 27.5 & 0 & 21.5\% & 2.81 & 0.32 \\ +Linear (T+E) & 1030${}_{-39,+33}$ & 0.044 & 32.0 & 23.1 & 0 & 26.9\% & 44.46 & 0.20 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 32.8 & 31.3 & 0 & 23.4\% & 0.33 & 0.04 \\ +Linear (T) & 989${}_{-24,+35}$ & 0.028 & 33.1 & 26.3 & 0 & 27.9\% & 44.46 & 0.07 \\ +Linear (D) & 979${}_{-29,+35}$ & 0.020 & 33.4 & 17.4 & 1 & 29.2\% & 1.43 & 0.09 \\ +ExtraTrees (D) & 929${}_{-38,+35}$ & 0.011 & 34.7 & 31.5 & 0 & 26.1\% & 0.24 & 0.04 \\ +KNN (T+E) & 694${}_{-47,+39}$ & 0.000 & 38.8 & 38.5 & 0 & 49.2\% & 3.44 & 0.18 \\ +KNN (T) & 597${}_{-48,+54}$ & 0.000 & 39.8 & 39.7 & 0 & 51.1\% & 3.44 & 0.04 \\ +KNN (D) & 407${}_{-77,+83}$ & 0.000 & 41.2 & 41.1 & 0 & 59.7\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabicl-cls/tabarena_leaderboard.csv b/data/tabicl-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ff26eed457798f850c0be6a353f123ed8bd3afa --- /dev/null +++ b/data/tabicl-cls/tabarena_leaderboard.csv @@ -0,0 +1,43 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7750.377155031171,22.84719785201697,2751.5674214709147,2.9633558383567813,0.4305752039617605,0.3953654066045534,0.05350042752511665,0.03292137769247373,32689.64667720609,276.28827431069953,7.916666666666667,0.159155,6752.988276031282,3.4718479580349393,1222.5529738029095,2.35500290690449,0.3593825434326649,0.3493492611774055,0.022091856756724426,0.014502276761886618,26140.06177622487,154.15434639040092,5.0,7,3,2,24,1645.2,31.5,36.0,0.8313008130081301,0.34643609213236404 +TABM (tuned + ensemble),32820.849396629244,7.890166007074309,3102.495096524168,1.8344804606248617,0.5092283925039975,0.4916904001038966,0.06741513792808897,0.0531492276149925,48157.60289475875,128.99098784852723,8.277777777777779,0.17235499999999998,8134.35737352901,2.5847251441743637,2387.831479177676,1.4722284599091153,0.47136645123500964,0.47309371248601884,0.032755260863256086,0.01377708489439205,44088.87353980956,121.43882105497326,5.0,4,6,3,23,1634.2,35.1,31.2,0.8224932249322493,0.29539017627252917 +TABICL (default),112.87139065964723,20.33866377333064,9.733153451045638,2.345033367232273,0.4788051732855598,0.4999558134896783,0.05432482602183145,0.042761048439141636,178.07934402242574,242.8201765656135,9.430555555555555,0.17207,25.732762111557854,3.564396858215332,8.684246340890724,1.808164283651731,0.4992564187967412,0.5554459288972347,0.022981096428129733,0.00939871045261338,151.54793227614402,132.97465082181864,7.0,8,5,1,22,1598.7,30.5,39.9,0.7943766937669376,0.35893420945385096 +REALMLP (tuned + ensemble),78320.7099595922,13.457468075516783,5777.34529914899,4.747200702818298,0.5822217846671746,0.5363650919569951,0.08100170596576858,0.048333408990573846,141412.64388451062,278.6382916983155,9.458333333333334,0.166985,23101.338454975023,5.6439330816268924,5472.29624726055,3.0648418488665286,0.5258237901453162,0.5461812667291126,0.044731996378166494,0.02214701961011789,99490.3161960261,233.98656302124476,8.0,1,1,5,29,1597.7,28.7,33.8,0.7936991869918699,0.18239141458573893 +GBM (tuned + ensemble),2757.3124551578803,12.456019589415302,657.304398009506,2.529125543711048,0.6113071815088934,0.5862746617157221,0.08764117401144472,0.05490427588548946,9019.445489261983,194.08976472235491,9.875,0.16698000000000002,1484.5727322684393,3.4757837878333198,374.34433555986243,1.4573305804667545,0.6534872634143774,0.6218047916068159,0.048374130042081465,0.01740708281579771,8683.556526223521,104.28918180708692,8.5,1,3,1,31,1584.3,32.8,22.7,0.7835365853658537,0.17646098963500922 +TABM (tuned),32820.849396629244,0.8654038354202553,3102.495096524168,0.19252750021110288,0.5912555585094392,0.5498665833450093,0.07844798244845448,0.06744705276522203,48157.60289475875,13.240850473332038,11.569444444444445,0.174175,8134.35737352901,0.26544706026713055,2387.831479177676,0.1626291486781835,0.5877464635288101,0.5841370688586468,0.037275782250363654,0.01907334143513442,44088.87353980956,10.02168824405339,12.0,1,4,3,28,1536.9,34.6,31.2,0.7422086720867209,0.18618211258130268 +CAT (tuned + ensemble),14620.521881076362,2.36174942917294,2269.8595810438947,0.6522846741920234,0.6226459246322733,0.5795784602549343,0.07694002639502172,0.04688218046996814,28484.88552620336,44.38703157771985,11.930555555555555,0.16040500000000002,4812.184866507848,1.2190414004855685,1233.4944988708792,0.5157389806383175,0.6153632608570143,0.6133377930340731,0.04311874094655577,0.022138825584558906,20694.132105891145,37.76547447057217,11.0,1,1,1,33,1529.9,27.9,28.5,0.7334010840108401,0.14347848502260266 +CAT (tuned),14620.521881076362,0.4177007556697469,2269.8595810438947,0.10130234094803403,0.6397756322608608,0.595921365819077,0.07946152837678017,0.046679526112002975,28484.88552620336,6.396687248681741,12.319444444444445,0.161705,4812.184866507848,0.12577376100752088,1233.4944988708792,0.07246540449837217,0.6663747291561838,0.6619777725537788,0.04785266987647563,0.026129509718822636,20694.132105891145,5.269306492183282,12.25,1,3,1,31,1521.7,29.1,26.2,0.7239159891598916,0.15993342501015134 +GBM (tuned),2757.3124551578803,1.9554996257946817,657.304398009506,0.5137681973731403,0.6982904100452502,0.6488862533650219,0.09547546943100074,0.06488642847403936,9019.445489261983,34.03980226584826,12.958333333333334,0.17060999999999998,1484.5727322684393,0.5311000559065078,374.34433555986243,0.2150466969054991,0.7332165833714938,0.6835161575453327,0.05123166148223679,0.024837453552070406,8683.556526223521,16.198247708611177,12.5,0,0,0,36,1503.8,32.8,25.3,0.7083333333333334,0.0919832914612663 +CAT (default),225.3881851879167,0.2547764916478852,113.28441441630724,0.11944997568510514,0.6569717560090025,0.6334563301133419,0.09235672092720176,0.05024888832619162,427.0412955109482,6.664774458427448,13.0,0.16373,17.732768376668293,0.17626664373609757,5.3143473208136776,0.0720373699728269,0.6872314742824219,0.6803142470448004,0.04732883359628853,0.02335001341945147,105.75875711549895,5.435542513489766,14.0,1,3,2,30,1502.1,22.6,24.9,0.7073170731707317,0.15658825936709342 +XGB (tuned + ensemble),5493.491215199232,6.412096421880487,917.6069037638632,2.6250017135715122,0.6804014135907539,0.6480950730376123,0.0961225966782588,0.06599826390387813,11372.296711293906,142.78657719786145,13.180555555555555,0.165745,1589.3805764291023,2.081376380390591,637.9415221019913,1.2169344189421265,0.7126419091535116,0.6677711533451594,0.05643896517815861,0.028686095761617292,9210.443722124051,76.49553505940113,11.0,0,0,1,35,1497.4,29.9,30.7,0.7029132791327913,0.10712949410746893 +MNCA (tuned),57657.94978451405,20.4386415688344,5175.01000177568,1.4870657842617447,0.7251534847407544,0.6345357500361336,0.09177847543835013,0.066116504035111,84656.4189016225,131.8799800913606,14.652777777777779,0.175815,13573.088971928755,0.5870524644851685,4614.640089198099,0.5156200362715774,0.7744612480983963,0.6562641413967298,0.06242848849069588,0.03523361966179468,72619.42076015053,27.073934594746163,13.5,2,0,1,33,1461.5,32.9,30.2,0.6670054200542005,0.14077979219617281 +MNCA (tuned + ensemble),57657.94978451405,541.5858152206297,5175.01000177568,37.18030109053553,0.6174214208973879,0.578573359765691,0.091280224261016,0.07648319937009321,84656.4189016225,3366.8719867776967,14.972222222222221,0.19167499999999998,13573.088971928755,13.54958667092853,4614.640089198099,8.395978282094799,0.6265388929939845,0.5467453849211364,0.06524742082240353,0.02282628933015263,72619.42076015053,536.5448946581089,11.0,1,2,4,29,1454.7,29.8,32.8,0.6592140921409214,0.16725857123574286 +XGB (tuned),5493.491215199232,1.3085634729008617,917.6069037638632,0.6530885382876873,0.7363747567107989,0.6897295053665443,0.1003192246432696,0.07056737035819206,11372.296711293906,29.704007586433832,15.0,0.16848000000000002,1589.3805764291023,0.36310173670450846,637.9415221019913,0.1716870718901738,0.7444722954009122,0.7092778593623184,0.06467959137432433,0.034634993554849815,9210.443722124051,11.556301431859715,14.0,0,0,0,36,1450.4,32.8,29.2,0.6585365853658537,0.08096098336783362 +TABM (default),138.894014670893,0.939259964963536,13.258693701508712,0.18837149821625798,0.7280741356398281,0.7031672840939802,0.11528709718327648,0.08598939671962084,198.59294054356857,12.617551449180137,16.291666666666668,0.175255,25.32820102903578,0.18552133904563056,9.819120211215754,0.1330683562425531,0.8404844924976476,0.7640196748614664,0.05825776091558643,0.026799112086523788,147.95052750875777,10.814297716965921,14.5,0,1,0,35,1425.2,30.7,26.3,0.6270325203252033,0.1003195000204453 +NN_TORCH (tuned + ensemble),25121.675461051862,15.268945999498719,2998.2377676701126,3.008285271799925,0.7723767872546564,0.7329991335924426,0.1049995713196365,0.07342829613614794,59110.75454743922,227.46421879425387,17.13888888888889,0.17071999999999998,8702.23048403528,3.8643554978900485,2372.2159626094954,2.0891774346723144,0.9031626632034824,0.796545307543182,0.06602425212832286,0.04001808164772668,46965.794452564674,173.17543399472862,17.0,0,0,1,35,1401.0,27.8,22.9,0.6063685636856369,0.08227944225449874 +REALMLP (tuned),78320.7099595922,0.5876796141082858,5777.34529914899,0.24772644002890626,0.8023310570642167,0.7196142224519255,0.11271280938723849,0.07745170331430798,141412.64388451062,13.205522206162579,17.38888888888889,0.16874,23101.338454975023,0.26369897921880087,5472.29624726055,0.17084459589124912,0.8787741747777617,0.7325797818153339,0.07388175037437161,0.030556202222580157,99490.3161960261,10.835281307428414,16.0,0,0,1,35,1398.7,31.0,37.3,0.6002710027100271,0.0795807238824226 +EBM (tuned + ensemble),28019.426662537786,1.0875822747195207,2272.8378900325356,0.27343562526003057,0.8303965578080257,0.7998301684426825,0.139104611556999,0.10652863155292883,26169.487179400727,18.583552688722236,17.666666666666668,0.177485,2278.5297676722207,0.3938958803812663,895.6082584208186,0.2030472330989584,0.9326307523783561,0.8571175970768821,0.0823298574360965,0.032353455252280325,16406.757135657386,11.254406005139426,18.5,0,1,1,34,1391.0,25.8,26.8,0.5934959349593496,0.0934328920963366 +FASTAI (tuned + ensemble),7322.968421160292,18.779504420875032,1297.972449752507,8.192088559474888,0.7926888983747619,0.7673069188967362,0.13929056478939184,0.07943566667656245,19993.18321597409,475.72057536620804,18.833333333333332,0.17986000000000002,2947.7962622510063,11.628821227285597,582.7734793353083,4.517300460862554,0.9972327674497973,0.820063334450488,0.07610987251289619,0.051962323371093286,17127.566654953327,460.36404533374946,18.5,0,1,2,33,1367.4,30.8,29.1,0.5650406504065041,0.09441557919127325 +MNCA (default),316.27131853140435,10.342945864171158,16.66944361197823,0.8061333025000202,0.8583270774687776,0.7849274466188256,0.13081067264693186,0.08485561499744637,268.415346120585,74.8577885297529,19.333333333333332,0.18519,30.626362359523775,0.5374451610777113,14.527288647230225,0.336567721078897,1.0,0.8518797436324906,0.06725399583885783,0.039754913635691974,215.5139276781307,21.914790751288265,21.0,1,0,0,35,1354.8,28.5,25.4,0.5528455284552846,0.09575174716206745 +EBM (tuned),28019.426662537786,0.13116049015963518,2272.8378900325356,0.036898209286527296,0.8902973079447266,0.8411638055547851,0.14660895675863106,0.1137392135429118,26169.487179400727,2.281962543329168,20.555555555555557,0.179115,2278.5297676722207,0.04349470535914103,895.6082584208186,0.02459548072380017,1.0,0.903638712948607,0.0875118921642335,0.033230693653085626,16406.757135657386,1.2360778780029142,21.0,0,0,0,36,1327.6,25.2,35.7,0.5230352303523035,0.0663003158876297 +EBM (default),118.50732698676026,0.14401131014765045,8.954596469527505,0.06265208718847394,0.8813849391122723,0.8575746263475605,0.15618392351407046,0.1173829251060243,115.00830556550737,3.283968403256123,22.055555555555557,0.18057,9.348468089103699,0.062386990918053525,3.724321540044417,0.037089624442184096,1.0,0.9382548154739399,0.09278733883112866,0.03641693127266392,64.88241965125457,2.244816344727411,23.5,3,0,0,33,1291.6,28.2,28.5,0.48644986449864497,0.12712641408804534 +XGB (default),11.683355353643865,0.5746506972813311,2.5273290583117265,0.2898449163711814,0.8979377675030106,0.8480945944789343,0.13212736937606528,0.11521166421593966,33.06311547785161,14.058796569155744,22.61111111111111,0.17317,5.340686360994974,0.2856193900108337,1.5761941364945817,0.11450144426278887,1.0,0.91672704407903,0.09869044613168698,0.05725778772571348,33.38866095520069,9.412365893361752,21.0,0,0,0,36,1277.1,33.9,21.8,0.47289972899729,0.05552162629067966 +REALMLP (default),499.37680751445856,0.5946384676444678,36.31037117287357,0.250663200278434,0.9051917890614664,0.8440600984706405,0.13339765969888573,0.09497626322643343,898.4271769217308,13.32830039320175,22.875,0.17601499999999998,147.1676659848955,0.26458012130525377,35.19995410895538,0.1841485669408721,1.0,0.8960472101429542,0.11263887324292665,0.05040272045303727,622.1033556481548,11.425702599260347,23.5,0,0,0,36,1273.5,26.9,30.0,0.46646341463414637,0.05316555619508551 +NN_TORCH (tuned),25121.675461051862,0.8252101601641856,2998.2377676701126,0.18188676084331395,0.9148909672592942,0.8429645197476189,0.13163265867904522,0.0979338033406521,59110.75454743922,12.21885557115889,23.01388888888889,0.17446499999999998,8702.23048403528,0.23337317837609184,2372.2159626094954,0.14653810958067576,1.0,0.8984711644371493,0.08673667098005605,0.06014400941853005,46965.794452564674,9.196372470124006,24.0,0,0,0,36,1268.7,31.8,21.6,0.46307588075880757,0.052482110192258194 +TABDPT (default),176.01594519710835,69.14393293923803,27.198725725356752,23.388429074834338,0.8137945017331583,0.7840494520773492,0.14650365650107092,0.11089852283070799,507.4757748221604,1409.9337088973832,23.02777777777778,0.200525,98.94233159224191,28.93788754940033,20.513119835829798,8.53560138835576,1.0,0.9339842943576007,0.09923475589347508,0.04191194931280873,462.0657004935143,1196.5084355075069,27.0,3,1,2,30,1268.8,34.7,29.1,0.4627371273712737,0.160184662131605 +XT (tuned + ensemble),1031.1996921384775,2.988154384089105,357.27527748345153,1.3397702898173773,0.9176899118580594,0.8688120880193462,0.15241364522714682,0.11524290202486896,4802.300435215387,79.00786738708753,23.305555555555557,0.18232500000000001,744.239438480801,1.8136235740449693,182.30061053451453,0.7431041876698922,1.0,0.941453859089225,0.08575210936900413,0.062337182875290234,3258.869343201663,69.06723555057565,26.5,0,0,0,36,1263.4,28.2,27.4,0.4559620596205962,0.06295015232961548 +FASTAI (tuned),7322.968421160292,1.0651015653286453,1297.972449752507,0.6247631304455121,0.9058648946317567,0.8471459785625569,0.16009499027089993,0.10119416426388964,19993.18321597409,33.55086488448845,23.48611111111111,0.18178,2947.7962622510063,0.8198094805081686,582.7734793353083,0.28744913553656576,1.0,0.9016172166129159,0.08909024228375345,0.06045194430933999,17127.566654953327,27.88231049239245,24.0,0,0,0,36,1259.2,28.2,28.6,0.4515582655826558,0.054895327587024506 +RF (tuned + ensemble),2044.775237460416,2.3872891816092126,416.2783457096328,1.255027602569144,0.932518287859348,0.9003532053450211,0.1595494360924323,0.12881789867366603,5642.134899127388,70.94429168864653,25.52777777777778,0.178315,852.5537050988939,1.9029027620951335,260.0125674942402,0.7428875097152683,1.0,0.9855078952116808,0.09488416408425421,0.07479057874905146,4493.239522943328,63.634247370273194,27.0,0,1,1,34,1210.2,31.0,23.3,0.40176151761517614,0.06479346005504813 +GBM (default),6.758794430523743,0.5789695977428813,2.490182745714093,0.15718383308630232,0.9471769132881858,0.9059712649517301,0.148345200939041,0.11387674425058363,33.5255932823166,11.26499493738597,25.666666666666668,0.18472,5.414635124471452,0.25708606508043075,1.4105395256066293,0.11924102542301018,1.0,0.9314537409293944,0.11068698010353623,0.06510168593553425,28.347402616029974,6.917663985246303,25.0,0,0,0,36,1206.0,29.8,24.3,0.3983739837398374,0.041771634889801394 +XT (tuned),1031.1996921384775,0.29890986717777485,357.27527748345153,0.16424540531360832,0.9439228284777494,0.9022142402747604,0.16661997486773536,0.1253673531425734,4802.300435215387,8.701857547554141,25.75,0.18223,744.239438480801,0.18342396948072645,182.30061053451453,0.07494392763268541,1.0,0.9726085839753669,0.10587648465771732,0.06747774841145451,3258.869343201663,8.07141276727868,29.25,0,0,0,36,1204.9,30.0,26.7,0.39634146341463417,0.04825337807791283 +RF (tuned),2044.775237460416,0.23278873113938317,416.2783457096328,0.1507722695594024,0.9551019371507523,0.9209883678532962,0.17329719554098053,0.14075801263587298,5642.134899127388,7.55687197585497,27.708333333333332,0.18139,852.5537050988939,0.17230602105458576,260.0125674942402,0.06758631242886748,1.0,0.9951901183637382,0.10801648961735072,0.08172079532650084,4493.239522943328,6.782207740325264,29.0,0,0,1,35,1155.7,28.5,27.6,0.3485772357723577,0.04733340788805532 +NN_TORCH (default),48.70578088156971,0.5936917389616555,11.125118656665736,0.17673793806000127,0.978257373569646,0.9461626627360779,0.18617243329422217,0.13932265609762545,163.6050788464746,10.908192435522393,29.59722222222222,0.180355,25.35211862458123,0.2432508071263631,6.0317417768089925,0.12717011148259783,1.0,1.0,0.1315979221742642,0.08392578273701758,147.85412694112986,8.619821917518276,30.0,0,0,0,36,1103.4,33.9,31.4,0.3025067750677507,0.037309167342424246 +FASTAI (default),31.09969735491423,1.10650484436824,4.683882685740998,0.49602131438348224,0.972512597016023,0.937491255300511,0.214763192071676,0.16309292278867282,78.50334203533181,29.20329942994179,30.61111111111111,0.19183499999999998,12.376497785250347,0.7602158255047269,2.8056596594025227,0.32364197153238683,1.0,0.9999615975422427,0.16139709053294848,0.10101015982783891,62.830982755166104,26.31473180904403,33.5,0,0,0,36,1072.9,33.5,31.0,0.2777777777777778,0.03642319913980632 +LR (tuned + ensemble),247.81690773551847,1.5417047552120537,86.9527487524539,0.32565721661614494,0.9561054304859822,0.945372194390879,0.26906892579103764,0.2196796767290125,1145.6529776499187,22.80693330823688,32.02777777777778,0.20382,171.4822693798277,0.2983522944980197,44.45837271402539,0.19711479228248058,1.0,1.0,0.2029910383523567,0.12545333458842928,713.1690352739483,13.25530093456624,35.0,0,0,1,35,1029.6,32.8,38.1,0.24322493224932248,0.04334753037904041 +RF (default),3.0601362299771955,0.1389405298380204,0.4197356009776281,0.0653398591441877,0.9965256868256966,0.9750534412705346,0.23374289847938945,0.22890241001401435,6.380264980433452,3.8863601848700204,32.84722222222222,0.21025,1.1198331514994302,0.08397722244262695,0.3277067685609757,0.03542500892500126,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.4893450212553976,34.75,0,0,0,36,1000.0,0.0,0.0,0.22323848238482386,0.03196753393423037 +LR (tuned),247.81690773551847,0.4449969635333544,86.9527487524539,0.10396867681281166,0.9723830890370608,0.9526957165635138,0.2793124361704181,0.22914434571494902,1145.6529776499187,6.67768937474542,33.111111111111114,0.20357,171.4822693798277,0.11908173561096191,44.45837271402539,0.07001089403664298,1.0,1.0,0.209915797201436,0.13903865882301508,713.1690352739483,4.422167155622043,36.0,0,0,0,36,989.1,34.1,23.6,0.21680216802168023,0.03806036380315983 +LR (default),6.169420995020572,0.45878614010634244,2.106396664251635,0.12050053609932035,0.9795929927195421,0.9593614170235713,0.2916458967947873,0.2548930671246872,29.155480122785303,7.705503358920759,33.388888888888886,0.20875,5.325402127371894,0.12779696782430014,1.4313534079421033,0.0896510462814927,1.0,1.0,0.2098231228117865,0.1419072705068749,21.98095065107635,4.735441569338942,36.5,1,0,0,35,979.0,34.7,28.5,0.21002710027100271,0.05743750876797652 +XT (default),1.8043978815461381,0.17975940917745048,0.3713595805715374,0.06954273446410252,0.9892721532660226,0.9709853179280655,0.2609714953848282,0.2584310419389792,5.400149437442951,4.422216908920569,34.69444444444444,0.21292,0.9855960739983453,0.08559976683722602,0.24198385110028492,0.03905757784211604,1.0,1.0,0.18424442460299273,0.13284238574591212,4.712255061768779,3.9447928104085292,36.0,0,0,0,36,928.7,34.2,37.2,0.17818428184281843,0.031705419003530376 +KNN (tuned + ensemble),167.8303013191547,12.105770299979199,8.709396553002023,0.6510287601474662,1.0,0.9950381724620512,0.4922467034154276,0.6086097785091938,80.7590848163391,80.9066319119832,38.84722222222222,0.31954499999999997,14.630206929312813,0.18374058273103502,3.4367890290794296,0.18360265549810623,1.0,1.0,0.4552264030449861,0.6664113509907352,59.32150817198775,12.487074071232104,40.0,0,0,0,36,694.5,38.1,46.7,0.0768970189701897,0.025970231106033758 +KNN (tuned),167.8303013191547,1.8655599888460135,8.709396553002023,0.10631861792797924,1.0,0.9973606072760356,0.5109632324100885,0.654584490441368,80.7590848163391,12.961828932017745,39.84722222222222,0.32433,14.630206929312813,0.07810062832302517,3.4367890290794296,0.039402452723266784,1.0,1.0,0.4955223463378593,0.7068928996407389,59.32150817198775,2.342964973116946,41.0,0,0,0,36,596.8,53.8,47.6,0.05250677506775068,0.02518666084837813 +KNN (default),0.8106582238350385,0.22655490967962477,0.11174909219233083,0.031188125017345747,1.0,1.0,0.5965726261878597,0.9589083406549634,1.0058498096362567,2.4347704716843976,41.25,0.392575,0.22842825783623588,0.035103811158074275,0.06799989431884934,0.01944144969012079,1.0,1.0,0.6183072807462191,1.0,1.0,1.2342542936555674,42.0,0,0,0,36,406.8,82.2,76.6,0.018292682926829267,0.02432448428372201 diff --git a/data/tabicl-cls/tuning-impact-elo-horizontal.pdf b/data/tabicl-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c88e63db62070ccf4e12a45f643893c3393e8360 Binary files /dev/null and b/data/tabicl-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabicl-cls/tuning-impact-elo-horizontal.png.zip b/data/tabicl-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..9ba326e93cb3d1dedb1b9ee2b47b2ff585feedb3 --- /dev/null +++ b/data/tabicl-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e858c95af701d1a8d5d6368f1a8c3669f399c2229854ccff0595e69c3ea3da0d +size 132739 diff --git a/data/tabicl-imputed-cls/figures/critical-diagram.pdf b/data/tabicl-imputed-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..bf843ada801ed3435fd1bddf53b7a79dd4f5e169 Binary files /dev/null and b/data/tabicl-imputed-cls/figures/critical-diagram.pdf differ diff --git a/data/tabicl-imputed-cls/figures/critical-diagram.png.zip b/data/tabicl-imputed-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..dbc8af23f2ed54c53adf16bb2f35a7df366e96ed --- /dev/null +++ b/data/tabicl-imputed-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735c80d30000ae29ff299d889176259c503a407d4a93ab4fa31fe0124709c252 +size 318459 diff --git a/data/tabicl-imputed-cls/leaderboard.tex b/data/tabicl-imputed-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..c1cecfa9c3986b7c8e4d9a1d998ac552be5d950a --- /dev/null +++ b/data/tabicl-imputed-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1599${}_{-29,+34}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{9.4}} & \textcolor{bronze}{\textbf{3.5}} & \textcolor{bronze}{\textbf{5}} & \textcolor{gold}{\textbf{7.0\%}} & 1222.55 & 2.36 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1588${}_{-30,+41}$}} & 0.491 & \textcolor{silver}{\textbf{9.7}} & 4.4 & 3 & \textcolor{bronze}{\textbf{8.6\%}} & 2387.83 & 1.47 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1565${}_{-31,+33}$}} & \textcolor{silver}{\textbf{0.521}} & \textcolor{bronze}{\textbf{10.7}} & \textcolor{silver}{\textbf{3.4}} & \textcolor{silver}{\textbf{6}} & \textcolor{silver}{\textbf{7.3\%}} & 8.68 & 1.81 \\ +RealMLP (T+E) & 1555${}_{-38,+32}$ & 0.418 & 11.1 & 7.5 & 0 & 9.6\% & 5472.30 & 3.06 \\ +LightGBM (T+E) & 1547${}_{-31,+28}$ & 0.389 & 11.4 & 6.4 & 1 & 10.3\% & 374.34 & 1.46 \\ +TabM (T) & 1506${}_{-32,+31}$ & 0.409 & 13.1 & 6.2 & 1 & 9.6\% & 2387.83 & 0.16 \\ +CatBoost (T+E) & 1495${}_{-28,+28}$ & 0.377 & 13.5 & 8.7 & 0 & 9.5\% & 1233.49 & 0.52 \\ +CatBoost (T) & 1487${}_{-32,+35}$ & 0.360 & 13.9 & 6.9 & 1 & 9.7\% & 1233.49 & 0.07 \\ +TabPFNv2 (T+E) & 1484${}_{-35,+31}$ & \textcolor{bronze}{\textbf{0.506}} & 14.0 & \textcolor{gold}{\textbf{2.9}} & \textcolor{gold}{\textbf{8}} & 9.2\% & 3031.01 & 27.04 \\ +LightGBM (T) & 1472${}_{-37,+29}$ & 0.302 & 14.6 & 12.3 & 0 & 11.0\% & 374.34 & 0.22 \\ +CatBoost (D) & 1471${}_{-31,+28}$ & 0.343 & 14.6 & 6.9 & 1 & 10.7\% & 5.31 & 0.07 \\ +XGBoost (T+E) & 1466${}_{-28,+29}$ & 0.320 & 14.8 & 10.1 & 0 & 11.1\% & 637.94 & 1.22 \\ +ModernNCA (T) & 1434${}_{-33,+35}$ & 0.275 & 16.3 & 9.5 & 1 & 10.8\% & 4614.64 & 0.52 \\ +ModernNCA (T+E) & 1428${}_{-33,+23}$ & 0.383 & 16.6 & 7.3 & 0 & 10.8\% & 4614.64 & 8.40 \\ +XGBoost (T) & 1426${}_{-35,+32}$ & 0.264 & 16.8 & 13.7 & 0 & 11.6\% & 637.94 & 0.17 \\ +TabPFNv2 (T) & 1411${}_{-34,+32}$ & 0.393 & 17.3 & 4.9 & 1 & 11.7\% & 3031.01 & 0.59 \\ +TabM (D) & 1400${}_{-27,+26}$ & 0.272 & 18.0 & 11.8 & 0 & 12.9\% & 9.82 & 0.13 \\ +TabPFNv2 (D) & 1391${}_{-38,+28}$ & 0.360 & 18.5 & 4.5 & 4 & 12.7\% & 3.33 & 0.33 \\ +TorchMLP (T+E) & 1381${}_{-30,+28}$ & 0.228 & 18.9 & 14.6 & 0 & 11.9\% & 2372.22 & 2.09 \\ +RealMLP (T) & 1374${}_{-26,+34}$ & 0.198 & 19.2 & 15.0 & 0 & 12.7\% & 5472.30 & 0.17 \\ +EBM (T+E) & 1371${}_{-35,+26}$ & 0.170 & 19.5 & 13.5 & 0 & 15.5\% & 895.61 & 0.20 \\ +FastaiMLP (T+E) & 1347${}_{-32,+32}$ & 0.207 & 20.6 & 12.3 & 0 & 14.9\% & 582.77 & 4.52 \\ +ModernNCA (D) & 1338${}_{-37,+29}$ & 0.142 & 21.1 & 11.9 & 1 & 14.5\% & 14.53 & 0.34 \\ +EBM (T) & 1309${}_{-29,+28}$ & 0.110 & 22.5 & 17.9 & 0 & 16.2\% & 895.61 & 0.02 \\ +EBM (D) & 1274${}_{-27,+35}$ & 0.119 & 24.1 & 11.6 & 1 & 17.0\% & 3.72 & 0.04 \\ +XGBoost (D) & 1266${}_{-30,+28}$ & 0.102 & 24.6 & 19.3 & 0 & 14.6\% & 1.58 & 0.11 \\ +RealMLP (D) & 1262${}_{-24,+34}$ & 0.095 & 24.8 & 20.6 & 0 & 14.7\% & 35.20 & 0.18 \\ +TorchMLP (T) & 1259${}_{-38,+31}$ & 0.085 & 25.0 & 21.6 & 0 & 14.5\% & 2372.22 & 0.15 \\ +TabDPT (D) & 1258${}_{-32,+34}$ & 0.186 & 25.0 & 7.9 & 2 & 15.9\% & 20.51 & 8.54 \\ +ExtraTrees (T+E) & 1254${}_{-30,+32}$ & 0.082 & 25.2 & 18.9 & 0 & 16.5\% & 182.30 & 0.74 \\ +FastaiMLP (T) & 1244${}_{-26,+38}$ & 0.094 & 25.6 & 20.6 & 0 & 16.9\% & 582.77 & 0.29 \\ +RandomForest (T+E) & 1206${}_{-33,+26}$ & 0.067 & 27.5 & 17.4 & 0 & 17.5\% & 260.01 & 0.74 \\ +LightGBM (D) & 1200${}_{-31,+31}$ & 0.053 & 27.7 & 25.8 & 0 & 16.1\% & 1.41 & 0.12 \\ +ExtraTrees (T) & 1196${}_{-36,+29}$ & 0.056 & 27.8 & 23.8 & 0 & 17.9\% & 182.30 & 0.07 \\ +RandomForest (T) & 1153${}_{-34,+27}$ & 0.045 & 29.8 & 22.7 & 0 & 18.7\% & 260.01 & 0.07 \\ +TorchMLP (D) & 1101${}_{-33,+36}$ & 0.022 & 31.8 & 29.0 & 0 & 19.7\% & 6.03 & 0.13 \\ +FastaiMLP (D) & 1070${}_{-35,+35}$ & 0.027 & 32.9 & 29.7 & 0 & 22.3\% & 2.81 & 0.32 \\ +Linear (T+E) & 1029${}_{-33,+28}$ & 0.044 & 34.4 & 24.9 & 0 & 28.0\% & 44.46 & 0.20 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 35.3 & 33.7 & 0 & 24.4\% & 0.33 & 0.04 \\ +Linear (T) & 991${}_{-29,+34}$ & 0.028 & 35.6 & 30.0 & 0 & 28.9\% & 44.46 & 0.07 \\ +Linear (D) & 984${}_{-37,+30}$ & 0.020 & 35.9 & 27.1 & 0 & 30.0\% & 1.43 & 0.09 \\ +ExtraTrees (D) & 923${}_{-38,+30}$ & 0.011 & 37.5 & 34.5 & 0 & 26.9\% & 0.24 & 0.04 \\ +KNN (T+E) & 686${}_{-48,+31}$ & 0.000 & 41.8 & 41.5 & 0 & 49.7\% & 3.44 & 0.18 \\ +KNN (T) & 597${}_{-47,+39}$ & 0.000 & 42.8 & 42.7 & 0 & 51.5\% & 3.44 & 0.04 \\ +KNN (D) & 410${}_{-78,+56}$ & 0.000 & 44.2 & 44.1 & 0 & 60.0\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabicl-imputed-cls/tabarena_leaderboard.csv b/data/tabicl-imputed-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..e24e41bd4a3334eedf84f09b871d0111db4e9ea3 --- /dev/null +++ b/data/tabicl-imputed-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7750.377155031171,22.84719785201697,2751.5674214709147,2.9633558383567813,0.4305752039617605,0.4364232219614402,0.0700098687307868,0.03926886896957941,32689.64667720609,276.28827431069953,9.36111111111111,0.159155,6752.988276031282,3.4718479580349393,1222.5529738029095,2.35500290690449,0.3593825434326649,0.428274830438652,0.03226297629046343,0.018303111934730745,26140.06177622487,154.15434639040092,6.5,5,3,1,27,1599.4,33.1,28.9,0.8099747474747475,0.28804158475055064 +TABM (tuned + ensemble),32820.849396629244,7.890166007074309,3102.495096524168,1.8344804606248617,0.5092283925039975,0.5350797555443023,0.08573855686320273,0.05963111030471803,48157.60289475875,128.99098784852723,9.694444444444445,0.17235499999999998,8134.35737352901,2.5847251441743637,2387.831479177676,1.4722284599091153,0.47136645123500964,0.5540981099375837,0.03899671266638771,0.02990990162448854,44088.87353980956,121.43882105497326,8.0,3,2,2,29,1588.1,40.9,29.4,0.80239898989899,0.22816337166827363 +TABICL (default),112.87139065964723,20.33866377333064,9.733153451045638,2.345033367232273,0.4788051732855598,0.5342388690836984,0.0733517897000551,0.04928098549249302,178.07934402242574,242.8201765656135,10.652777777777779,0.17207,25.732762111557854,3.564396858215332,8.684246340890724,1.808164283651731,0.4992564187967412,0.567554590317891,0.03487584587193143,0.01673206206271185,151.54793227614402,132.97465082181864,9.5,6,4,1,25,1565.3,32.1,30.1,0.7806186868686869,0.2984959355396537 +REALMLP (tuned + ensemble),78320.7099595922,13.457468075516783,5777.34529914899,4.747200702818298,0.5822217846671746,0.5763475950282718,0.09627181319390925,0.054761231537571176,141412.64388451062,278.6382916983155,11.055555555555555,0.166985,23101.338454975023,5.6439330816268924,5472.29624726055,3.0648418488665286,0.5258237901453162,0.5623887498677591,0.051850568481074444,0.029565742001882556,99490.3161960261,233.98656302124476,9.0,0,0,3,33,1554.6,31.9,37.4,0.7714646464646465,0.13261942492574919 +GBM (tuned + ensemble),2757.3124551578803,12.456019589415302,657.304398009506,2.529125543711048,0.6113071815088934,0.6163296836138819,0.10258436279694044,0.06118717781173281,9019.445489261983,194.08976472235491,11.402777777777779,0.16698000000000002,1484.5727322684393,3.4757837878333198,374.34433555986243,1.4573305804667545,0.6534872634143774,0.6349889770045265,0.051567085599247775,0.02199552512560315,8683.556526223521,104.28918180708692,11.0,1,1,3,31,1546.6,27.1,30.9,0.7635732323232324,0.15604065347685897 +TABM (tuned),32820.849396629244,0.8654038354202553,3102.495096524168,0.19252750021110288,0.5912555585094392,0.5882396070204128,0.09626471764112607,0.0738205060529711,48157.60289475875,13.240850473332038,13.069444444444445,0.174175,8134.35737352901,0.26544706026713055,2387.831479177676,0.1626291486781835,0.5877464635288101,0.634980455939971,0.05532096025125255,0.03209545871697756,44088.87353980956,10.02168824405339,13.75,1,3,2,30,1505.7,31.0,31.9,0.7256944444444444,0.16031899840421437 +CAT (tuned + ensemble),14620.521881076362,2.36174942917294,2269.8595810438947,0.6522846741920234,0.6226459246322733,0.6119512748454302,0.09491452657978199,0.05316377757060419,28484.88552620336,44.38703157771985,13.541666666666666,0.16040500000000002,4812.184866507848,1.2190414004855685,1233.4944988708792,0.5157389806383175,0.6153632608570143,0.669054345099479,0.05899249898577069,0.023358363194257874,20694.132105891145,37.76547447057217,12.5,0,1,2,33,1495.4,27.3,27.9,0.7149621212121212,0.11479127151934615 +CAT (tuned),14620.521881076362,0.4177007556697469,2269.8595810438947,0.10130234094803403,0.6397756322608608,0.6261788191167288,0.09729473616020898,0.05295568317374286,28484.88552620336,6.396687248681741,13.902777777777779,0.161705,4812.184866507848,0.12577376100752088,1233.4944988708792,0.07246540449837217,0.6663747291561838,0.6847359221621947,0.058863950439092316,0.027822328565894415,20694.132105891145,5.269306492183282,13.5,1,2,1,32,1487.2,34.3,31.7,0.7067550505050505,0.14526232549284618 +TABPFNV2 (tuned + ensemble),11030.15986506019,107.05828573483008,2810.9122771048533,47.2254791910091,0.49442997284509693,0.5420555152344999,0.09177765662867848,0.07513502873967023,52630.38142536018,3635.7848895640136,14.01388888888889,0.17261500000000002,3494.967008225123,14.569461973508199,3031.00823805294,27.044389802716765,0.46131723763599985,0.5739389565251636,0.04061436251124223,0.029417615348611093,29915.904357304615,1226.8083686763093,7.5,8,4,1,23,1483.5,30.2,34.4,0.704229797979798,0.3421229152339704 +CAT (default),225.3881851879167,0.2547764916478852,113.28441441630724,0.11944997568510514,0.6569717560090025,0.65900799981315,0.10699266309902715,0.056416147276699205,427.0412955109482,6.664774458427448,14.555555555555555,0.16373,17.732768376668293,0.17626664373609757,5.3143473208136776,0.0720373699728269,0.6872314742824219,0.7200990221467047,0.061103005380912956,0.025874497523709752,105.75875711549895,5.435542513489766,16.0,1,3,1,31,1471.4,27.1,30.1,0.6919191919191919,0.14459821669831055 +GBM (tuned),2757.3124551578803,1.9554996257946817,657.304398009506,0.5137681973731403,0.6982904100452502,0.673933401607768,0.11041958078697732,0.0711252717920008,9019.445489261983,34.03980226584826,14.597222222222221,0.17060999999999998,1484.5727322684393,0.5311000559065078,374.34433555986243,0.2150466969054991,0.7332165833714938,0.686475527918081,0.05516713547384128,0.031112468640158004,8683.556526223521,16.198247708611177,14.5,0,0,0,36,1471.7,28.8,36.4,0.6909722222222222,0.08110839951682725 +XGB (tuned + ensemble),5493.491215199232,6.412096421880487,917.6069037638632,2.6250017135715122,0.6804014135907539,0.6707440018597075,0.11134706115124875,0.07209033824084621,11372.296711293906,142.78657719786145,14.777777777777779,0.165745,1589.3805764291023,2.081376380390591,637.9415221019913,1.2169344189421265,0.7126419091535116,0.7253164483987984,0.06110424311659296,0.03060723658382906,9210.443722124051,76.49553505940113,13.5,0,0,1,35,1465.9,29.0,28.0,0.6868686868686869,0.09898002518067026 +MNCA (tuned),57657.94978451405,20.4386415688344,5175.01000177568,1.4870657842617447,0.7251534847407544,0.6598256901305299,0.10801789710267194,0.07238170454580357,84656.4189016225,131.8799800913606,16.34722222222222,0.175815,13573.088971928755,0.5870524644851685,4614.640089198099,0.5156200362715774,0.7744612480983963,0.6781072751281167,0.06755654915093501,0.04344719805270737,72619.42076015053,27.073934594746163,15.5,1,0,0,35,1434.4,34.7,32.3,0.6511994949494949,0.10576974912815088 +MNCA (tuned + ensemble),57657.94978451405,541.5858152206297,5175.01000177568,37.18030109053553,0.6174214208973879,0.6070275906047402,0.10797383295669277,0.0824875593489974,84656.4189016225,3366.8719867776967,16.61111111111111,0.19167499999999998,13573.088971928755,13.54958667092853,4614.640089198099,8.395978282094799,0.6265388929939845,0.5779679694371859,0.07521385499526678,0.03974745118482769,72619.42076015053,536.5448946581089,12.5,0,2,4,30,1428.3,22.6,32.9,0.6452020202020202,0.13670416822906425 +XGB (tuned),5493.491215199232,1.3085634729008617,917.6069037638632,0.6530885382876873,0.7363747567107989,0.7113950131102941,0.1155670799584022,0.07662472696691829,11372.296711293906,29.704007586433832,16.77777777777778,0.16848000000000002,1589.3805764291023,0.36310173670450846,637.9415221019913,0.1716870718901738,0.7444722954009122,0.7540446689272955,0.07553227424104386,0.03514782204325123,9210.443722124051,11.556301431859715,15.0,0,0,0,36,1425.7,31.8,34.8,0.6414141414141414,0.0729104956436391 +TABPFNV2 (tuned),11030.15986506019,3.5877571221487026,2810.9122771048533,1.6323263976196694,0.6073121438794584,0.6281530530145072,0.11748720621877623,0.09038925599249494,52630.38142536018,120.27811182838367,17.291666666666668,0.1868,3494.967008225123,0.5116564194361368,3031.00823805294,0.5868151874902603,0.67505591577672,0.6473273087383917,0.08561151707807774,0.036586305588518116,29915.904357304615,28.32255960244214,10.5,1,8,1,26,1411.2,31.4,33.2,0.6297348484848485,0.20260470036840778 +TABM (default),138.894014670893,0.939259964963536,13.258693701508712,0.18837149821625798,0.7280741356398281,0.7302425630887684,0.12926002829697214,0.09224776399188665,198.59294054356857,12.617551449180137,18.01388888888889,0.175255,25.32820102903578,0.18552133904563056,9.819120211215754,0.1330683562425531,0.8404844924976476,0.7991898878806922,0.06154427249600947,0.03281092722346347,147.95052750875777,10.814297716965921,16.5,0,0,0,36,1400.2,25.6,26.9,0.6133207070707071,0.08493077053880228 +TABPFNV2 (default),10.947083245604126,0.9331302737012321,4.035215198044816,0.47311805105554033,0.6398741999945717,0.6872282155694117,0.12708612946361297,0.09962338191206124,56.66111760729146,31.04552226404085,18.47222222222222,0.1886,7.492631395657857,0.30309558312098184,3.3292708959332433,0.32914197487032515,0.7746032035996981,0.7254516789049502,0.07929605187577571,0.034405727147343024,46.89348887212364,18.74748191899635,17.0,4,1,4,27,1391.3,27.6,37.8,0.6029040404040404,0.22005532484489676 +NN_TORCH (tuned + ensemble),25121.675461051862,15.268945999498719,2998.2377676701126,3.008285271799925,0.7723767872546564,0.7474196520675455,0.11924706433162137,0.07969727765582034,59110.75454743922,227.46421879425387,18.916666666666668,0.17071999999999998,8702.23048403528,3.8643554978900485,2372.2159626094954,2.0891774346723144,0.9031626632034824,0.8115035227779454,0.06782483230561193,0.04484844246885637,46965.794452564674,173.17543399472862,19.5,0,0,0,36,1381.3,27.9,29.7,0.5928030303030303,0.06851852421920986 +REALMLP (tuned),78320.7099595922,0.5876796141082858,5777.34529914899,0.24772644002890626,0.8023310570642167,0.741216248161471,0.12736749165857575,0.08376011570703244,141412.64388451062,13.205522206162579,19.22222222222222,0.16874,23101.338454975023,0.26369897921880087,5472.29624726055,0.17084459589124912,0.8787741747777617,0.7462218843777739,0.07816480714694801,0.04156599671061481,99490.3161960261,10.835281307428414,17.0,0,0,0,36,1373.6,33.9,25.6,0.5858585858585859,0.0664808087477506 +EBM (tuned + ensemble),28019.426662537786,1.0875822747195207,2272.8378900325356,0.27343562526003057,0.8303965578080257,0.816647869111564,0.15463347761353838,0.11176096514887118,26169.487179400727,18.583552688722236,19.47222222222222,0.177485,2278.5297676722207,0.3938958803812663,895.6082584208186,0.2030472330989584,0.9326307523783561,0.8581162937539966,0.0823298574360965,0.040258063765572304,16406.757135657386,11.254406005139426,19.0,0,0,1,35,1370.6,26.0,34.9,0.5801767676767676,0.07417276335502465 +FASTAI (tuned + ensemble),7322.968421160292,18.779504420875032,1297.972449752507,8.192088559474888,0.7926888983747619,0.784027715843069,0.14900317958349352,0.08536599362281201,19993.18321597409,475.72057536620804,20.583333333333332,0.17986000000000002,2947.7962622510063,11.628821227285597,582.7734793353083,4.517300460862554,0.9972327674497973,0.834748128143675,0.08731644523471122,0.05446587011526682,17127.566654953327,460.36404533374946,21.5,0,1,0,35,1346.9,31.4,31.7,0.5549242424242424,0.08155591994782946 +MNCA (default),316.27131853140435,10.342945864171158,16.66944361197823,0.8061333025000202,0.8583270774687776,0.8029408211123655,0.14526299957038255,0.09098389075428723,268.415346120585,74.8577885297529,21.083333333333332,0.18519,30.626362359523775,0.5374451610777113,14.527288647230225,0.336567721078897,1.0,0.8549990387579132,0.07974631950048428,0.04958848985494747,215.5139276781307,21.914790751288265,22.0,1,0,0,35,1337.8,28.3,36.8,0.5435606060606061,0.08412099138581888 +EBM (tuned),28019.426662537786,0.13116049015963518,2272.8378900325356,0.036898209286527296,0.8902973079447266,0.8557790399564313,0.1616382710145804,0.11889562612147969,26169.487179400727,2.281962543329168,22.47222222222222,0.179115,2278.5297676722207,0.04349470535914103,895.6082584208186,0.02459548072380017,1.0,0.8902021091214286,0.08835084225135731,0.04420635043937865,16406.757135657386,1.2360778780029142,23.25,0,0,0,36,1309.0,27.4,28.6,0.5119949494949495,0.05572257387782647 +EBM (default),118.50732698676026,0.14401131014765045,8.954596469527505,0.06265208718847394,0.8813849391122723,0.870854717263904,0.1703532319761959,0.12258014417255342,115.00830556550737,3.283968403256123,24.083333333333332,0.18057,9.348468089103699,0.062386990918053525,3.724321540044417,0.037089624442184096,1.0,0.9291636979284108,0.09554880640873625,0.03755874966541475,64.88241965125457,2.244816344727411,23.5,1,0,2,33,1274.4,34.7,26.5,0.4753787878787879,0.08602897342942367 +XGB (default),11.683355353643865,0.5746506972813311,2.5273290583117265,0.2898449163711814,0.8979377675030106,0.8549642182424325,0.1462630945968126,0.1209765309530816,33.06311547785161,14.058796569155744,24.61111111111111,0.17317,5.340686360994974,0.2856193900108337,1.5761941364945817,0.11450144426278887,1.0,0.9333352642247335,0.10102883170605775,0.06626118331117697,33.38866095520069,9.412365893361752,24.0,0,0,0,36,1265.9,27.9,29.5,0.4633838383838384,0.05169753113821428 +REALMLP (default),499.37680751445856,0.5946384676444678,36.31037117287357,0.250663200278434,0.9051917890614664,0.8497890741111055,0.14747236879393302,0.10088444678422995,898.4271769217308,13.32830039320175,24.819444444444443,0.17601499999999998,147.1676659848955,0.26458012130525377,35.19995410895538,0.1841485669408721,1.0,0.9211958531631512,0.11529538667598471,0.05067504133005549,622.1033556481548,11.425702599260347,25.5,0,0,0,36,1261.6,33.4,24.0,0.4586489898989899,0.04844172183377057 +TABDPT (default),176.01594519710835,69.14393293923803,27.198725725356752,23.388429074834338,0.8137945017331583,0.799744988562006,0.15922158885670307,0.1170304146939885,507.4757748221604,1409.9337088973832,24.97222222222222,0.200525,98.94233159224191,28.93788754940033,20.513119835829798,8.53560138835576,1.0,0.9387874844543951,0.1092338236572048,0.046098002717393685,462.0657004935143,1196.5084355075069,30.0,2,0,3,31,1257.7,33.5,31.9,0.4551767676767677,0.12657617387506367 +NN_TORCH (tuned),25121.675461051862,0.8252101601641856,2998.2377676701126,0.18188676084331395,0.9148909672592942,0.8496725279774323,0.14458100675706576,0.10402677710025271,59110.75454743922,12.21885557115889,24.98611111111111,0.17446499999999998,8702.23048403528,0.23337317837609184,2372.2159626094954,0.14653810958067576,1.0,0.9016439793972164,0.10379368194908523,0.06014400941853005,46965.794452564674,9.196372470124006,25.5,0,0,0,36,1258.8,30.8,37.6,0.4548611111111111,0.04640323044695552 +XT (tuned + ensemble),1031.1996921384775,2.988154384089105,357.27527748345153,1.3397702898173773,0.9176899118580594,0.8794762664358787,0.16523036066445795,0.12115563698636553,4802.300435215387,79.00786738708753,25.194444444444443,0.18232500000000001,744.239438480801,1.8136235740449693,182.30061053451453,0.7431041876698922,1.0,0.9417364460602908,0.09673248729372463,0.0685642717298475,3258.869343201663,69.06723555057565,28.0,0,0,0,36,1254.4,32.0,29.8,0.45012626262626265,0.05277526086197158 +FASTAI (tuned),7322.968421160292,1.0651015653286453,1297.972449752507,0.6247631304455121,0.9058648946317567,0.8597803803405958,0.169495011754614,0.10702555217869385,19993.18321597409,33.55086488448845,25.569444444444443,0.18178,2947.7962622510063,0.8198094805081686,582.7734793353083,0.28744913553656576,1.0,0.9008669648648755,0.09789642437607732,0.06498589353723835,17127.566654953327,27.88231049239245,25.5,0,0,0,36,1243.9,37.2,26.0,0.44160353535353536,0.04861372178212665 +RF (tuned + ensemble),2044.775237460416,2.3872891816092126,416.2783457096328,1.255027602569144,0.932518287859348,0.9026149264767462,0.17454873677687424,0.13450122144811216,5642.134899127388,70.94429168864653,27.47222222222222,0.178315,852.5537050988939,1.9029027620951335,260.0125674942402,0.7428875097152683,1.0,0.9845679509425131,0.11236159795018158,0.07590678210412867,4493.239522943328,63.634247370273194,29.5,0,1,0,35,1206.2,25.9,33.0,0.39835858585858586,0.0573714640639939 +GBM (default),6.758794430523743,0.5789695977428813,2.490182745714093,0.15718383308630232,0.9471769132881858,0.9122903858859188,0.16095597202628661,0.11966196030964046,33.5255932823166,11.26499493738597,27.72222222222222,0.18472,5.414635124471452,0.25708606508043075,1.4105395256066293,0.11924102542301018,1.0,0.9543370010011993,0.11742663255155822,0.07438374059328141,28.347402616029974,6.917663985246303,27.5,0,0,0,36,1199.8,30.8,30.9,0.3926767676767677,0.03882178455915101 +XT (tuned),1031.1996921384775,0.29890986717777485,357.27527748345153,0.16424540531360832,0.9439228284777494,0.9101536244007575,0.1790239881586457,0.1311878003083955,4802.300435215387,8.701857547554141,27.833333333333332,0.18223,744.239438480801,0.18342396948072645,182.30061053451453,0.07494392763268541,1.0,0.9777420353808277,0.10725621698653792,0.07275376605805199,3258.869343201663,8.07141276727868,30.5,0,0,0,36,1195.7,29.0,35.6,0.39015151515151514,0.042100697442121075 +RF (tuned),2044.775237460416,0.23278873113938317,416.2783457096328,0.1507722695594024,0.9551019371507523,0.9237240500821575,0.1867740077419543,0.14637254843975767,5642.134899127388,7.55687197585497,29.75,0.18139,852.5537050988939,0.17230602105458576,260.0125674942402,0.06758631242886748,1.0,0.9977992954936508,0.12445981052443861,0.0841550100370696,4493.239522943328,6.782207740325264,32.0,0,0,1,35,1153.0,27.0,33.4,0.3465909090909091,0.04414933653556944 +NN_TORCH (default),48.70578088156971,0.5936917389616555,11.125118656665736,0.17673793806000127,0.978257373569646,0.9486699848780762,0.19710882479579594,0.14461232370250393,163.6050788464746,10.908192435522393,31.76388888888889,0.180355,25.35211862458123,0.2432508071263631,6.0317417768089925,0.12717011148259783,1.0,0.996424410496547,0.142508632176782,0.09058250288187904,147.85412694112986,8.619821917518276,33.0,0,0,0,36,1101.4,35.8,33.0,0.30082070707070707,0.034537317016541016 +FASTAI (default),31.09969735491423,1.10650484436824,4.683882685740998,0.49602131438348224,0.972512597016023,0.93827750568811,0.22259183987469472,0.1676085097457016,78.50334203533181,29.20329942994179,32.94444444444444,0.19183499999999998,12.376497785250347,0.7602158255047269,2.8056596594025227,0.32364197153238683,1.0,0.9998853211009175,0.16139709053294848,0.10101015982783891,62.830982755166104,26.31473180904403,36.0,0,0,0,36,1069.9,34.4,34.9,0.273989898989899,0.03368733318328171 +LR (tuned + ensemble),247.81690773551847,1.5417047552120537,86.9527487524539,0.32565721661614494,0.9561054304859822,0.9503142970931224,0.2795482007705742,0.22419398362837153,1145.6529776499187,22.80693330823688,34.416666666666664,0.20382,171.4822693798277,0.2983522944980197,44.45837271402539,0.19711479228248058,1.0,1.0,0.2029910383523567,0.12993275981723412,713.1690352739483,13.25530093456624,37.5,0,0,1,35,1029.4,27.5,32.1,0.24053030303030304,0.0401701082123688 +RF (default),3.0601362299771955,0.1389405298380204,0.4197356009776281,0.0653398591441877,0.9965256868256966,0.9784714305124642,0.24439814041789643,0.23332089200953096,6.380264980433452,3.8863601848700204,35.263888888888886,0.21025,1.1198331514994302,0.08397722244262695,0.3277067685609757,0.03542500892500126,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.4893450212553976,37.25,0,0,0,36,1000.0,0.0,0.0,0.2212752525252525,0.02965019969409202 +LR (tuned),247.81690773551847,0.4449969635333544,86.9527487524539,0.10396867681281166,0.9723830890370608,0.957603281359881,0.2890832368031623,0.2335985252567378,1145.6529776499187,6.67768937474542,35.55555555555556,0.20357,171.4822693798277,0.11908173561096191,44.45837271402539,0.07001089403664298,1.0,1.0,0.209915797201436,0.13996577713587863,713.1690352739483,4.422167155622043,38.0,0,0,0,36,990.7,33.7,28.9,0.21464646464646464,0.03335309519895627 +LR (default),6.169420995020572,0.45878614010634244,2.106396664251635,0.12050053609932035,0.9795929927195421,0.9635148500480292,0.2995361357732522,0.25915777266480783,29.155480122785303,7.705503358920759,35.861111111111114,0.20875,5.325402127371894,0.12779696782430014,1.4313534079421033,0.0896510462814927,1.0,1.0,0.2098231228117865,0.14283438851684813,21.98095065107635,4.735441569338942,39.5,0,0,1,35,984.0,29.4,36.7,0.2077020202020202,0.036834411850132504 +XT (default),1.8043978815461381,0.17975940917745048,0.3713595805715374,0.06954273446410252,0.9892721532660226,0.9741636720839656,0.2693928274585231,0.26259240866880457,5.400149437442951,4.422216908920569,37.47222222222222,0.21292,0.9855960739983453,0.08559976683722602,0.24198385110028492,0.03905757784211604,1.0,1.0,0.18424442460299273,0.1366333464189622,4.712255061768779,3.9447928104085292,39.0,0,0,0,36,922.9,29.7,37.5,0.1710858585858586,0.028997498234233365 +KNN (tuned + ensemble),167.8303013191547,12.105770299979199,8.709396553002023,0.6510287601474662,1.0,0.9960654060232148,0.49677010357074014,0.6120942109628191,80.7590848163391,80.9066319119832,41.80555555555556,0.31954499999999997,14.630206929312813,0.18374058273103502,3.4367890290794296,0.18360265549810623,1.0,1.0,0.4552264030449861,0.6664113509907352,59.32150817198775,12.487074071232104,43.0,0,0,0,36,685.5,30.3,47.5,0.0726010101010101,0.024106030242002884 +KNN (tuned),167.8303013191547,1.8655599888460135,8.709396553002023,0.10631861792797924,1.0,0.9975847139031918,0.5153228769885266,0.6579120926382002,80.7590848163391,12.961828932017745,42.80555555555556,0.32433,14.630206929312813,0.07810062832302517,3.4367890290794296,0.039402452723266784,1.0,1.0,0.4955223463378593,0.7068928996407389,59.32150817198775,2.342964973116946,44.0,0,0,0,36,596.6,38.1,46.2,0.049873737373737376,0.02344196684773788 +KNN (default),0.8106582238350385,0.22655490967962477,0.11174909219233083,0.031188125017345747,1.0,1.0,0.5999865293650463,0.9596846471567678,1.0058498096362567,2.4347704716843976,44.208333333333336,0.392575,0.22842825783623588,0.035103811158074275,0.06799989431884934,0.01944144969012079,1.0,1.0,0.619455920315494,1.0,1.0,1.2342542936555674,45.0,0,0,0,36,409.8,56.0,77.2,0.017992424242424244,0.0226951547710286 diff --git a/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf b/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..2744db91c9ad47cd90aee02f30bc56b5a6f89687 Binary files /dev/null and b/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip b/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..239598955be83ce2de1491934bc83495b5eb0560 --- /dev/null +++ b/data/tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cec3024c196362180fc485a00abd771d989a4cea1aa97071eb9351886ced78 +size 152921 diff --git a/data/tabicl-imputed/figures/critical-diagram.pdf b/data/tabicl-imputed/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..f32dec8a66eb31cc62912db42102455e71e2c287 Binary files /dev/null and b/data/tabicl-imputed/figures/critical-diagram.pdf differ diff --git a/data/tabicl-imputed/figures/critical-diagram.png.zip b/data/tabicl-imputed/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..dbc8af23f2ed54c53adf16bb2f35a7df366e96ed --- /dev/null +++ b/data/tabicl-imputed/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:735c80d30000ae29ff299d889176259c503a407d4a93ab4fa31fe0124709c252 +size 318459 diff --git a/data/tabicl-imputed/leaderboard.tex b/data/tabicl-imputed/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..c1cecfa9c3986b7c8e4d9a1d998ac552be5d950a --- /dev/null +++ b/data/tabicl-imputed/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1599${}_{-29,+34}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{9.4}} & \textcolor{bronze}{\textbf{3.5}} & \textcolor{bronze}{\textbf{5}} & \textcolor{gold}{\textbf{7.0\%}} & 1222.55 & 2.36 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1588${}_{-30,+41}$}} & 0.491 & \textcolor{silver}{\textbf{9.7}} & 4.4 & 3 & \textcolor{bronze}{\textbf{8.6\%}} & 2387.83 & 1.47 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1565${}_{-31,+33}$}} & \textcolor{silver}{\textbf{0.521}} & \textcolor{bronze}{\textbf{10.7}} & \textcolor{silver}{\textbf{3.4}} & \textcolor{silver}{\textbf{6}} & \textcolor{silver}{\textbf{7.3\%}} & 8.68 & 1.81 \\ +RealMLP (T+E) & 1555${}_{-38,+32}$ & 0.418 & 11.1 & 7.5 & 0 & 9.6\% & 5472.30 & 3.06 \\ +LightGBM (T+E) & 1547${}_{-31,+28}$ & 0.389 & 11.4 & 6.4 & 1 & 10.3\% & 374.34 & 1.46 \\ +TabM (T) & 1506${}_{-32,+31}$ & 0.409 & 13.1 & 6.2 & 1 & 9.6\% & 2387.83 & 0.16 \\ +CatBoost (T+E) & 1495${}_{-28,+28}$ & 0.377 & 13.5 & 8.7 & 0 & 9.5\% & 1233.49 & 0.52 \\ +CatBoost (T) & 1487${}_{-32,+35}$ & 0.360 & 13.9 & 6.9 & 1 & 9.7\% & 1233.49 & 0.07 \\ +TabPFNv2 (T+E) & 1484${}_{-35,+31}$ & \textcolor{bronze}{\textbf{0.506}} & 14.0 & \textcolor{gold}{\textbf{2.9}} & \textcolor{gold}{\textbf{8}} & 9.2\% & 3031.01 & 27.04 \\ +LightGBM (T) & 1472${}_{-37,+29}$ & 0.302 & 14.6 & 12.3 & 0 & 11.0\% & 374.34 & 0.22 \\ +CatBoost (D) & 1471${}_{-31,+28}$ & 0.343 & 14.6 & 6.9 & 1 & 10.7\% & 5.31 & 0.07 \\ +XGBoost (T+E) & 1466${}_{-28,+29}$ & 0.320 & 14.8 & 10.1 & 0 & 11.1\% & 637.94 & 1.22 \\ +ModernNCA (T) & 1434${}_{-33,+35}$ & 0.275 & 16.3 & 9.5 & 1 & 10.8\% & 4614.64 & 0.52 \\ +ModernNCA (T+E) & 1428${}_{-33,+23}$ & 0.383 & 16.6 & 7.3 & 0 & 10.8\% & 4614.64 & 8.40 \\ +XGBoost (T) & 1426${}_{-35,+32}$ & 0.264 & 16.8 & 13.7 & 0 & 11.6\% & 637.94 & 0.17 \\ +TabPFNv2 (T) & 1411${}_{-34,+32}$ & 0.393 & 17.3 & 4.9 & 1 & 11.7\% & 3031.01 & 0.59 \\ +TabM (D) & 1400${}_{-27,+26}$ & 0.272 & 18.0 & 11.8 & 0 & 12.9\% & 9.82 & 0.13 \\ +TabPFNv2 (D) & 1391${}_{-38,+28}$ & 0.360 & 18.5 & 4.5 & 4 & 12.7\% & 3.33 & 0.33 \\ +TorchMLP (T+E) & 1381${}_{-30,+28}$ & 0.228 & 18.9 & 14.6 & 0 & 11.9\% & 2372.22 & 2.09 \\ +RealMLP (T) & 1374${}_{-26,+34}$ & 0.198 & 19.2 & 15.0 & 0 & 12.7\% & 5472.30 & 0.17 \\ +EBM (T+E) & 1371${}_{-35,+26}$ & 0.170 & 19.5 & 13.5 & 0 & 15.5\% & 895.61 & 0.20 \\ +FastaiMLP (T+E) & 1347${}_{-32,+32}$ & 0.207 & 20.6 & 12.3 & 0 & 14.9\% & 582.77 & 4.52 \\ +ModernNCA (D) & 1338${}_{-37,+29}$ & 0.142 & 21.1 & 11.9 & 1 & 14.5\% & 14.53 & 0.34 \\ +EBM (T) & 1309${}_{-29,+28}$ & 0.110 & 22.5 & 17.9 & 0 & 16.2\% & 895.61 & 0.02 \\ +EBM (D) & 1274${}_{-27,+35}$ & 0.119 & 24.1 & 11.6 & 1 & 17.0\% & 3.72 & 0.04 \\ +XGBoost (D) & 1266${}_{-30,+28}$ & 0.102 & 24.6 & 19.3 & 0 & 14.6\% & 1.58 & 0.11 \\ +RealMLP (D) & 1262${}_{-24,+34}$ & 0.095 & 24.8 & 20.6 & 0 & 14.7\% & 35.20 & 0.18 \\ +TorchMLP (T) & 1259${}_{-38,+31}$ & 0.085 & 25.0 & 21.6 & 0 & 14.5\% & 2372.22 & 0.15 \\ +TabDPT (D) & 1258${}_{-32,+34}$ & 0.186 & 25.0 & 7.9 & 2 & 15.9\% & 20.51 & 8.54 \\ +ExtraTrees (T+E) & 1254${}_{-30,+32}$ & 0.082 & 25.2 & 18.9 & 0 & 16.5\% & 182.30 & 0.74 \\ +FastaiMLP (T) & 1244${}_{-26,+38}$ & 0.094 & 25.6 & 20.6 & 0 & 16.9\% & 582.77 & 0.29 \\ +RandomForest (T+E) & 1206${}_{-33,+26}$ & 0.067 & 27.5 & 17.4 & 0 & 17.5\% & 260.01 & 0.74 \\ +LightGBM (D) & 1200${}_{-31,+31}$ & 0.053 & 27.7 & 25.8 & 0 & 16.1\% & 1.41 & 0.12 \\ +ExtraTrees (T) & 1196${}_{-36,+29}$ & 0.056 & 27.8 & 23.8 & 0 & 17.9\% & 182.30 & 0.07 \\ +RandomForest (T) & 1153${}_{-34,+27}$ & 0.045 & 29.8 & 22.7 & 0 & 18.7\% & 260.01 & 0.07 \\ +TorchMLP (D) & 1101${}_{-33,+36}$ & 0.022 & 31.8 & 29.0 & 0 & 19.7\% & 6.03 & 0.13 \\ +FastaiMLP (D) & 1070${}_{-35,+35}$ & 0.027 & 32.9 & 29.7 & 0 & 22.3\% & 2.81 & 0.32 \\ +Linear (T+E) & 1029${}_{-33,+28}$ & 0.044 & 34.4 & 24.9 & 0 & 28.0\% & 44.46 & 0.20 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 35.3 & 33.7 & 0 & 24.4\% & 0.33 & 0.04 \\ +Linear (T) & 991${}_{-29,+34}$ & 0.028 & 35.6 & 30.0 & 0 & 28.9\% & 44.46 & 0.07 \\ +Linear (D) & 984${}_{-37,+30}$ & 0.020 & 35.9 & 27.1 & 0 & 30.0\% & 1.43 & 0.09 \\ +ExtraTrees (D) & 923${}_{-38,+30}$ & 0.011 & 37.5 & 34.5 & 0 & 26.9\% & 0.24 & 0.04 \\ +KNN (T+E) & 686${}_{-48,+31}$ & 0.000 & 41.8 & 41.5 & 0 & 49.7\% & 3.44 & 0.18 \\ +KNN (T) & 597${}_{-47,+39}$ & 0.000 & 42.8 & 42.7 & 0 & 51.5\% & 3.44 & 0.04 \\ +KNN (D) & 410${}_{-78,+56}$ & 0.000 & 44.2 & 44.1 & 0 & 60.0\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabicl-imputed/tabarena_leaderboard.csv b/data/tabicl-imputed/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..e24e41bd4a3334eedf84f09b871d0111db4e9ea3 --- /dev/null +++ b/data/tabicl-imputed/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7750.377155031171,22.84719785201697,2751.5674214709147,2.9633558383567813,0.4305752039617605,0.4364232219614402,0.0700098687307868,0.03926886896957941,32689.64667720609,276.28827431069953,9.36111111111111,0.159155,6752.988276031282,3.4718479580349393,1222.5529738029095,2.35500290690449,0.3593825434326649,0.428274830438652,0.03226297629046343,0.018303111934730745,26140.06177622487,154.15434639040092,6.5,5,3,1,27,1599.4,33.1,28.9,0.8099747474747475,0.28804158475055064 +TABM (tuned + ensemble),32820.849396629244,7.890166007074309,3102.495096524168,1.8344804606248617,0.5092283925039975,0.5350797555443023,0.08573855686320273,0.05963111030471803,48157.60289475875,128.99098784852723,9.694444444444445,0.17235499999999998,8134.35737352901,2.5847251441743637,2387.831479177676,1.4722284599091153,0.47136645123500964,0.5540981099375837,0.03899671266638771,0.02990990162448854,44088.87353980956,121.43882105497326,8.0,3,2,2,29,1588.1,40.9,29.4,0.80239898989899,0.22816337166827363 +TABICL (default),112.87139065964723,20.33866377333064,9.733153451045638,2.345033367232273,0.4788051732855598,0.5342388690836984,0.0733517897000551,0.04928098549249302,178.07934402242574,242.8201765656135,10.652777777777779,0.17207,25.732762111557854,3.564396858215332,8.684246340890724,1.808164283651731,0.4992564187967412,0.567554590317891,0.03487584587193143,0.01673206206271185,151.54793227614402,132.97465082181864,9.5,6,4,1,25,1565.3,32.1,30.1,0.7806186868686869,0.2984959355396537 +REALMLP (tuned + ensemble),78320.7099595922,13.457468075516783,5777.34529914899,4.747200702818298,0.5822217846671746,0.5763475950282718,0.09627181319390925,0.054761231537571176,141412.64388451062,278.6382916983155,11.055555555555555,0.166985,23101.338454975023,5.6439330816268924,5472.29624726055,3.0648418488665286,0.5258237901453162,0.5623887498677591,0.051850568481074444,0.029565742001882556,99490.3161960261,233.98656302124476,9.0,0,0,3,33,1554.6,31.9,37.4,0.7714646464646465,0.13261942492574919 +GBM (tuned + ensemble),2757.3124551578803,12.456019589415302,657.304398009506,2.529125543711048,0.6113071815088934,0.6163296836138819,0.10258436279694044,0.06118717781173281,9019.445489261983,194.08976472235491,11.402777777777779,0.16698000000000002,1484.5727322684393,3.4757837878333198,374.34433555986243,1.4573305804667545,0.6534872634143774,0.6349889770045265,0.051567085599247775,0.02199552512560315,8683.556526223521,104.28918180708692,11.0,1,1,3,31,1546.6,27.1,30.9,0.7635732323232324,0.15604065347685897 +TABM (tuned),32820.849396629244,0.8654038354202553,3102.495096524168,0.19252750021110288,0.5912555585094392,0.5882396070204128,0.09626471764112607,0.0738205060529711,48157.60289475875,13.240850473332038,13.069444444444445,0.174175,8134.35737352901,0.26544706026713055,2387.831479177676,0.1626291486781835,0.5877464635288101,0.634980455939971,0.05532096025125255,0.03209545871697756,44088.87353980956,10.02168824405339,13.75,1,3,2,30,1505.7,31.0,31.9,0.7256944444444444,0.16031899840421437 +CAT (tuned + ensemble),14620.521881076362,2.36174942917294,2269.8595810438947,0.6522846741920234,0.6226459246322733,0.6119512748454302,0.09491452657978199,0.05316377757060419,28484.88552620336,44.38703157771985,13.541666666666666,0.16040500000000002,4812.184866507848,1.2190414004855685,1233.4944988708792,0.5157389806383175,0.6153632608570143,0.669054345099479,0.05899249898577069,0.023358363194257874,20694.132105891145,37.76547447057217,12.5,0,1,2,33,1495.4,27.3,27.9,0.7149621212121212,0.11479127151934615 +CAT (tuned),14620.521881076362,0.4177007556697469,2269.8595810438947,0.10130234094803403,0.6397756322608608,0.6261788191167288,0.09729473616020898,0.05295568317374286,28484.88552620336,6.396687248681741,13.902777777777779,0.161705,4812.184866507848,0.12577376100752088,1233.4944988708792,0.07246540449837217,0.6663747291561838,0.6847359221621947,0.058863950439092316,0.027822328565894415,20694.132105891145,5.269306492183282,13.5,1,2,1,32,1487.2,34.3,31.7,0.7067550505050505,0.14526232549284618 +TABPFNV2 (tuned + ensemble),11030.15986506019,107.05828573483008,2810.9122771048533,47.2254791910091,0.49442997284509693,0.5420555152344999,0.09177765662867848,0.07513502873967023,52630.38142536018,3635.7848895640136,14.01388888888889,0.17261500000000002,3494.967008225123,14.569461973508199,3031.00823805294,27.044389802716765,0.46131723763599985,0.5739389565251636,0.04061436251124223,0.029417615348611093,29915.904357304615,1226.8083686763093,7.5,8,4,1,23,1483.5,30.2,34.4,0.704229797979798,0.3421229152339704 +CAT (default),225.3881851879167,0.2547764916478852,113.28441441630724,0.11944997568510514,0.6569717560090025,0.65900799981315,0.10699266309902715,0.056416147276699205,427.0412955109482,6.664774458427448,14.555555555555555,0.16373,17.732768376668293,0.17626664373609757,5.3143473208136776,0.0720373699728269,0.6872314742824219,0.7200990221467047,0.061103005380912956,0.025874497523709752,105.75875711549895,5.435542513489766,16.0,1,3,1,31,1471.4,27.1,30.1,0.6919191919191919,0.14459821669831055 +GBM (tuned),2757.3124551578803,1.9554996257946817,657.304398009506,0.5137681973731403,0.6982904100452502,0.673933401607768,0.11041958078697732,0.0711252717920008,9019.445489261983,34.03980226584826,14.597222222222221,0.17060999999999998,1484.5727322684393,0.5311000559065078,374.34433555986243,0.2150466969054991,0.7332165833714938,0.686475527918081,0.05516713547384128,0.031112468640158004,8683.556526223521,16.198247708611177,14.5,0,0,0,36,1471.7,28.8,36.4,0.6909722222222222,0.08110839951682725 +XGB (tuned + ensemble),5493.491215199232,6.412096421880487,917.6069037638632,2.6250017135715122,0.6804014135907539,0.6707440018597075,0.11134706115124875,0.07209033824084621,11372.296711293906,142.78657719786145,14.777777777777779,0.165745,1589.3805764291023,2.081376380390591,637.9415221019913,1.2169344189421265,0.7126419091535116,0.7253164483987984,0.06110424311659296,0.03060723658382906,9210.443722124051,76.49553505940113,13.5,0,0,1,35,1465.9,29.0,28.0,0.6868686868686869,0.09898002518067026 +MNCA (tuned),57657.94978451405,20.4386415688344,5175.01000177568,1.4870657842617447,0.7251534847407544,0.6598256901305299,0.10801789710267194,0.07238170454580357,84656.4189016225,131.8799800913606,16.34722222222222,0.175815,13573.088971928755,0.5870524644851685,4614.640089198099,0.5156200362715774,0.7744612480983963,0.6781072751281167,0.06755654915093501,0.04344719805270737,72619.42076015053,27.073934594746163,15.5,1,0,0,35,1434.4,34.7,32.3,0.6511994949494949,0.10576974912815088 +MNCA (tuned + ensemble),57657.94978451405,541.5858152206297,5175.01000177568,37.18030109053553,0.6174214208973879,0.6070275906047402,0.10797383295669277,0.0824875593489974,84656.4189016225,3366.8719867776967,16.61111111111111,0.19167499999999998,13573.088971928755,13.54958667092853,4614.640089198099,8.395978282094799,0.6265388929939845,0.5779679694371859,0.07521385499526678,0.03974745118482769,72619.42076015053,536.5448946581089,12.5,0,2,4,30,1428.3,22.6,32.9,0.6452020202020202,0.13670416822906425 +XGB (tuned),5493.491215199232,1.3085634729008617,917.6069037638632,0.6530885382876873,0.7363747567107989,0.7113950131102941,0.1155670799584022,0.07662472696691829,11372.296711293906,29.704007586433832,16.77777777777778,0.16848000000000002,1589.3805764291023,0.36310173670450846,637.9415221019913,0.1716870718901738,0.7444722954009122,0.7540446689272955,0.07553227424104386,0.03514782204325123,9210.443722124051,11.556301431859715,15.0,0,0,0,36,1425.7,31.8,34.8,0.6414141414141414,0.0729104956436391 +TABPFNV2 (tuned),11030.15986506019,3.5877571221487026,2810.9122771048533,1.6323263976196694,0.6073121438794584,0.6281530530145072,0.11748720621877623,0.09038925599249494,52630.38142536018,120.27811182838367,17.291666666666668,0.1868,3494.967008225123,0.5116564194361368,3031.00823805294,0.5868151874902603,0.67505591577672,0.6473273087383917,0.08561151707807774,0.036586305588518116,29915.904357304615,28.32255960244214,10.5,1,8,1,26,1411.2,31.4,33.2,0.6297348484848485,0.20260470036840778 +TABM (default),138.894014670893,0.939259964963536,13.258693701508712,0.18837149821625798,0.7280741356398281,0.7302425630887684,0.12926002829697214,0.09224776399188665,198.59294054356857,12.617551449180137,18.01388888888889,0.175255,25.32820102903578,0.18552133904563056,9.819120211215754,0.1330683562425531,0.8404844924976476,0.7991898878806922,0.06154427249600947,0.03281092722346347,147.95052750875777,10.814297716965921,16.5,0,0,0,36,1400.2,25.6,26.9,0.6133207070707071,0.08493077053880228 +TABPFNV2 (default),10.947083245604126,0.9331302737012321,4.035215198044816,0.47311805105554033,0.6398741999945717,0.6872282155694117,0.12708612946361297,0.09962338191206124,56.66111760729146,31.04552226404085,18.47222222222222,0.1886,7.492631395657857,0.30309558312098184,3.3292708959332433,0.32914197487032515,0.7746032035996981,0.7254516789049502,0.07929605187577571,0.034405727147343024,46.89348887212364,18.74748191899635,17.0,4,1,4,27,1391.3,27.6,37.8,0.6029040404040404,0.22005532484489676 +NN_TORCH (tuned + ensemble),25121.675461051862,15.268945999498719,2998.2377676701126,3.008285271799925,0.7723767872546564,0.7474196520675455,0.11924706433162137,0.07969727765582034,59110.75454743922,227.46421879425387,18.916666666666668,0.17071999999999998,8702.23048403528,3.8643554978900485,2372.2159626094954,2.0891774346723144,0.9031626632034824,0.8115035227779454,0.06782483230561193,0.04484844246885637,46965.794452564674,173.17543399472862,19.5,0,0,0,36,1381.3,27.9,29.7,0.5928030303030303,0.06851852421920986 +REALMLP (tuned),78320.7099595922,0.5876796141082858,5777.34529914899,0.24772644002890626,0.8023310570642167,0.741216248161471,0.12736749165857575,0.08376011570703244,141412.64388451062,13.205522206162579,19.22222222222222,0.16874,23101.338454975023,0.26369897921880087,5472.29624726055,0.17084459589124912,0.8787741747777617,0.7462218843777739,0.07816480714694801,0.04156599671061481,99490.3161960261,10.835281307428414,17.0,0,0,0,36,1373.6,33.9,25.6,0.5858585858585859,0.0664808087477506 +EBM (tuned + ensemble),28019.426662537786,1.0875822747195207,2272.8378900325356,0.27343562526003057,0.8303965578080257,0.816647869111564,0.15463347761353838,0.11176096514887118,26169.487179400727,18.583552688722236,19.47222222222222,0.177485,2278.5297676722207,0.3938958803812663,895.6082584208186,0.2030472330989584,0.9326307523783561,0.8581162937539966,0.0823298574360965,0.040258063765572304,16406.757135657386,11.254406005139426,19.0,0,0,1,35,1370.6,26.0,34.9,0.5801767676767676,0.07417276335502465 +FASTAI (tuned + ensemble),7322.968421160292,18.779504420875032,1297.972449752507,8.192088559474888,0.7926888983747619,0.784027715843069,0.14900317958349352,0.08536599362281201,19993.18321597409,475.72057536620804,20.583333333333332,0.17986000000000002,2947.7962622510063,11.628821227285597,582.7734793353083,4.517300460862554,0.9972327674497973,0.834748128143675,0.08731644523471122,0.05446587011526682,17127.566654953327,460.36404533374946,21.5,0,1,0,35,1346.9,31.4,31.7,0.5549242424242424,0.08155591994782946 +MNCA (default),316.27131853140435,10.342945864171158,16.66944361197823,0.8061333025000202,0.8583270774687776,0.8029408211123655,0.14526299957038255,0.09098389075428723,268.415346120585,74.8577885297529,21.083333333333332,0.18519,30.626362359523775,0.5374451610777113,14.527288647230225,0.336567721078897,1.0,0.8549990387579132,0.07974631950048428,0.04958848985494747,215.5139276781307,21.914790751288265,22.0,1,0,0,35,1337.8,28.3,36.8,0.5435606060606061,0.08412099138581888 +EBM (tuned),28019.426662537786,0.13116049015963518,2272.8378900325356,0.036898209286527296,0.8902973079447266,0.8557790399564313,0.1616382710145804,0.11889562612147969,26169.487179400727,2.281962543329168,22.47222222222222,0.179115,2278.5297676722207,0.04349470535914103,895.6082584208186,0.02459548072380017,1.0,0.8902021091214286,0.08835084225135731,0.04420635043937865,16406.757135657386,1.2360778780029142,23.25,0,0,0,36,1309.0,27.4,28.6,0.5119949494949495,0.05572257387782647 +EBM (default),118.50732698676026,0.14401131014765045,8.954596469527505,0.06265208718847394,0.8813849391122723,0.870854717263904,0.1703532319761959,0.12258014417255342,115.00830556550737,3.283968403256123,24.083333333333332,0.18057,9.348468089103699,0.062386990918053525,3.724321540044417,0.037089624442184096,1.0,0.9291636979284108,0.09554880640873625,0.03755874966541475,64.88241965125457,2.244816344727411,23.5,1,0,2,33,1274.4,34.7,26.5,0.4753787878787879,0.08602897342942367 +XGB (default),11.683355353643865,0.5746506972813311,2.5273290583117265,0.2898449163711814,0.8979377675030106,0.8549642182424325,0.1462630945968126,0.1209765309530816,33.06311547785161,14.058796569155744,24.61111111111111,0.17317,5.340686360994974,0.2856193900108337,1.5761941364945817,0.11450144426278887,1.0,0.9333352642247335,0.10102883170605775,0.06626118331117697,33.38866095520069,9.412365893361752,24.0,0,0,0,36,1265.9,27.9,29.5,0.4633838383838384,0.05169753113821428 +REALMLP (default),499.37680751445856,0.5946384676444678,36.31037117287357,0.250663200278434,0.9051917890614664,0.8497890741111055,0.14747236879393302,0.10088444678422995,898.4271769217308,13.32830039320175,24.819444444444443,0.17601499999999998,147.1676659848955,0.26458012130525377,35.19995410895538,0.1841485669408721,1.0,0.9211958531631512,0.11529538667598471,0.05067504133005549,622.1033556481548,11.425702599260347,25.5,0,0,0,36,1261.6,33.4,24.0,0.4586489898989899,0.04844172183377057 +TABDPT (default),176.01594519710835,69.14393293923803,27.198725725356752,23.388429074834338,0.8137945017331583,0.799744988562006,0.15922158885670307,0.1170304146939885,507.4757748221604,1409.9337088973832,24.97222222222222,0.200525,98.94233159224191,28.93788754940033,20.513119835829798,8.53560138835576,1.0,0.9387874844543951,0.1092338236572048,0.046098002717393685,462.0657004935143,1196.5084355075069,30.0,2,0,3,31,1257.7,33.5,31.9,0.4551767676767677,0.12657617387506367 +NN_TORCH (tuned),25121.675461051862,0.8252101601641856,2998.2377676701126,0.18188676084331395,0.9148909672592942,0.8496725279774323,0.14458100675706576,0.10402677710025271,59110.75454743922,12.21885557115889,24.98611111111111,0.17446499999999998,8702.23048403528,0.23337317837609184,2372.2159626094954,0.14653810958067576,1.0,0.9016439793972164,0.10379368194908523,0.06014400941853005,46965.794452564674,9.196372470124006,25.5,0,0,0,36,1258.8,30.8,37.6,0.4548611111111111,0.04640323044695552 +XT (tuned + ensemble),1031.1996921384775,2.988154384089105,357.27527748345153,1.3397702898173773,0.9176899118580594,0.8794762664358787,0.16523036066445795,0.12115563698636553,4802.300435215387,79.00786738708753,25.194444444444443,0.18232500000000001,744.239438480801,1.8136235740449693,182.30061053451453,0.7431041876698922,1.0,0.9417364460602908,0.09673248729372463,0.0685642717298475,3258.869343201663,69.06723555057565,28.0,0,0,0,36,1254.4,32.0,29.8,0.45012626262626265,0.05277526086197158 +FASTAI (tuned),7322.968421160292,1.0651015653286453,1297.972449752507,0.6247631304455121,0.9058648946317567,0.8597803803405958,0.169495011754614,0.10702555217869385,19993.18321597409,33.55086488448845,25.569444444444443,0.18178,2947.7962622510063,0.8198094805081686,582.7734793353083,0.28744913553656576,1.0,0.9008669648648755,0.09789642437607732,0.06498589353723835,17127.566654953327,27.88231049239245,25.5,0,0,0,36,1243.9,37.2,26.0,0.44160353535353536,0.04861372178212665 +RF (tuned + ensemble),2044.775237460416,2.3872891816092126,416.2783457096328,1.255027602569144,0.932518287859348,0.9026149264767462,0.17454873677687424,0.13450122144811216,5642.134899127388,70.94429168864653,27.47222222222222,0.178315,852.5537050988939,1.9029027620951335,260.0125674942402,0.7428875097152683,1.0,0.9845679509425131,0.11236159795018158,0.07590678210412867,4493.239522943328,63.634247370273194,29.5,0,1,0,35,1206.2,25.9,33.0,0.39835858585858586,0.0573714640639939 +GBM (default),6.758794430523743,0.5789695977428813,2.490182745714093,0.15718383308630232,0.9471769132881858,0.9122903858859188,0.16095597202628661,0.11966196030964046,33.5255932823166,11.26499493738597,27.72222222222222,0.18472,5.414635124471452,0.25708606508043075,1.4105395256066293,0.11924102542301018,1.0,0.9543370010011993,0.11742663255155822,0.07438374059328141,28.347402616029974,6.917663985246303,27.5,0,0,0,36,1199.8,30.8,30.9,0.3926767676767677,0.03882178455915101 +XT (tuned),1031.1996921384775,0.29890986717777485,357.27527748345153,0.16424540531360832,0.9439228284777494,0.9101536244007575,0.1790239881586457,0.1311878003083955,4802.300435215387,8.701857547554141,27.833333333333332,0.18223,744.239438480801,0.18342396948072645,182.30061053451453,0.07494392763268541,1.0,0.9777420353808277,0.10725621698653792,0.07275376605805199,3258.869343201663,8.07141276727868,30.5,0,0,0,36,1195.7,29.0,35.6,0.39015151515151514,0.042100697442121075 +RF (tuned),2044.775237460416,0.23278873113938317,416.2783457096328,0.1507722695594024,0.9551019371507523,0.9237240500821575,0.1867740077419543,0.14637254843975767,5642.134899127388,7.55687197585497,29.75,0.18139,852.5537050988939,0.17230602105458576,260.0125674942402,0.06758631242886748,1.0,0.9977992954936508,0.12445981052443861,0.0841550100370696,4493.239522943328,6.782207740325264,32.0,0,0,1,35,1153.0,27.0,33.4,0.3465909090909091,0.04414933653556944 +NN_TORCH (default),48.70578088156971,0.5936917389616555,11.125118656665736,0.17673793806000127,0.978257373569646,0.9486699848780762,0.19710882479579594,0.14461232370250393,163.6050788464746,10.908192435522393,31.76388888888889,0.180355,25.35211862458123,0.2432508071263631,6.0317417768089925,0.12717011148259783,1.0,0.996424410496547,0.142508632176782,0.09058250288187904,147.85412694112986,8.619821917518276,33.0,0,0,0,36,1101.4,35.8,33.0,0.30082070707070707,0.034537317016541016 +FASTAI (default),31.09969735491423,1.10650484436824,4.683882685740998,0.49602131438348224,0.972512597016023,0.93827750568811,0.22259183987469472,0.1676085097457016,78.50334203533181,29.20329942994179,32.94444444444444,0.19183499999999998,12.376497785250347,0.7602158255047269,2.8056596594025227,0.32364197153238683,1.0,0.9998853211009175,0.16139709053294848,0.10101015982783891,62.830982755166104,26.31473180904403,36.0,0,0,0,36,1069.9,34.4,34.9,0.273989898989899,0.03368733318328171 +LR (tuned + ensemble),247.81690773551847,1.5417047552120537,86.9527487524539,0.32565721661614494,0.9561054304859822,0.9503142970931224,0.2795482007705742,0.22419398362837153,1145.6529776499187,22.80693330823688,34.416666666666664,0.20382,171.4822693798277,0.2983522944980197,44.45837271402539,0.19711479228248058,1.0,1.0,0.2029910383523567,0.12993275981723412,713.1690352739483,13.25530093456624,37.5,0,0,1,35,1029.4,27.5,32.1,0.24053030303030304,0.0401701082123688 +RF (default),3.0601362299771955,0.1389405298380204,0.4197356009776281,0.0653398591441877,0.9965256868256966,0.9784714305124642,0.24439814041789643,0.23332089200953096,6.380264980433452,3.8863601848700204,35.263888888888886,0.21025,1.1198331514994302,0.08397722244262695,0.3277067685609757,0.03542500892500126,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.4893450212553976,37.25,0,0,0,36,1000.0,0.0,0.0,0.2212752525252525,0.02965019969409202 +LR (tuned),247.81690773551847,0.4449969635333544,86.9527487524539,0.10396867681281166,0.9723830890370608,0.957603281359881,0.2890832368031623,0.2335985252567378,1145.6529776499187,6.67768937474542,35.55555555555556,0.20357,171.4822693798277,0.11908173561096191,44.45837271402539,0.07001089403664298,1.0,1.0,0.209915797201436,0.13996577713587863,713.1690352739483,4.422167155622043,38.0,0,0,0,36,990.7,33.7,28.9,0.21464646464646464,0.03335309519895627 +LR (default),6.169420995020572,0.45878614010634244,2.106396664251635,0.12050053609932035,0.9795929927195421,0.9635148500480292,0.2995361357732522,0.25915777266480783,29.155480122785303,7.705503358920759,35.861111111111114,0.20875,5.325402127371894,0.12779696782430014,1.4313534079421033,0.0896510462814927,1.0,1.0,0.2098231228117865,0.14283438851684813,21.98095065107635,4.735441569338942,39.5,0,0,1,35,984.0,29.4,36.7,0.2077020202020202,0.036834411850132504 +XT (default),1.8043978815461381,0.17975940917745048,0.3713595805715374,0.06954273446410252,0.9892721532660226,0.9741636720839656,0.2693928274585231,0.26259240866880457,5.400149437442951,4.422216908920569,37.47222222222222,0.21292,0.9855960739983453,0.08559976683722602,0.24198385110028492,0.03905757784211604,1.0,1.0,0.18424442460299273,0.1366333464189622,4.712255061768779,3.9447928104085292,39.0,0,0,0,36,922.9,29.7,37.5,0.1710858585858586,0.028997498234233365 +KNN (tuned + ensemble),167.8303013191547,12.105770299979199,8.709396553002023,0.6510287601474662,1.0,0.9960654060232148,0.49677010357074014,0.6120942109628191,80.7590848163391,80.9066319119832,41.80555555555556,0.31954499999999997,14.630206929312813,0.18374058273103502,3.4367890290794296,0.18360265549810623,1.0,1.0,0.4552264030449861,0.6664113509907352,59.32150817198775,12.487074071232104,43.0,0,0,0,36,685.5,30.3,47.5,0.0726010101010101,0.024106030242002884 +KNN (tuned),167.8303013191547,1.8655599888460135,8.709396553002023,0.10631861792797924,1.0,0.9975847139031918,0.5153228769885266,0.6579120926382002,80.7590848163391,12.961828932017745,42.80555555555556,0.32433,14.630206929312813,0.07810062832302517,3.4367890290794296,0.039402452723266784,1.0,1.0,0.4955223463378593,0.7068928996407389,59.32150817198775,2.342964973116946,44.0,0,0,0,36,596.6,38.1,46.2,0.049873737373737376,0.02344196684773788 +KNN (default),0.8106582238350385,0.22655490967962477,0.11174909219233083,0.031188125017345747,1.0,1.0,0.5999865293650463,0.9596846471567678,1.0058498096362567,2.4347704716843976,44.208333333333336,0.392575,0.22842825783623588,0.035103811158074275,0.06799989431884934,0.01944144969012079,1.0,1.0,0.619455920315494,1.0,1.0,1.2342542936555674,45.0,0,0,0,36,409.8,56.0,77.2,0.017992424242424244,0.0226951547710286 diff --git a/data/tabicl-imputed/tuning-impact-elo-horizontal.pdf b/data/tabicl-imputed/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..49cf6a8ead3cc58d08a5720949edacbda39de184 Binary files /dev/null and b/data/tabicl-imputed/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabicl-imputed/tuning-impact-elo-horizontal.png.zip b/data/tabicl-imputed/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..239598955be83ce2de1491934bc83495b5eb0560 --- /dev/null +++ b/data/tabicl-imputed/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:69cec3024c196362180fc485a00abd771d989a4cea1aa97071eb9351886ced78 +size 152921 diff --git a/data/tabicl/figures/critical-diagram.pdf b/data/tabicl/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b17532e0edcd586e793714208586b2412d92e694 Binary files /dev/null and b/data/tabicl/figures/critical-diagram.pdf differ diff --git a/data/tabicl/figures/critical-diagram.png.zip b/data/tabicl/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..eeaf1bd4070bd54d33c25ac9661d9ea7fdfb8040 --- /dev/null +++ b/data/tabicl/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a1b9ef7a3f66ec78df1118b64a21ebb3aa4fc34c4cddd8998137f78f590184b3 +size 312248 diff --git a/data/tabicl/leaderboard.tex b/data/tabicl/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..f6a356299f52996338ff47cc4a594c8f45ef23e4 --- /dev/null +++ b/data/tabicl/leaderboard.tex @@ -0,0 +1,50 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +AutoGluon 1.3 (4h) & \textcolor{gold}{\textbf{1645${}_{-36,+32}$}} & \textcolor{gold}{\textbf{0.569}} & \textcolor{gold}{\textbf{7.9}} & \textcolor{silver}{\textbf{2.9}} & \textcolor{silver}{\textbf{7}} & \textcolor{gold}{\textbf{5.4\%}} & 1222.55 & 2.36 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1634${}_{-32,+36}$}} & \textcolor{bronze}{\textbf{0.491}} & \textcolor{silver}{\textbf{8.3}} & \textcolor{bronze}{\textbf{3.4}} & \textcolor{bronze}{\textbf{4}} & \textcolor{bronze}{\textbf{6.7\%}} & 2387.83 & 1.47 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1599${}_{-40,+31}$}} & \textcolor{silver}{\textbf{0.521}} & \textcolor{bronze}{\textbf{9.4}} & \textcolor{gold}{\textbf{2.8}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{5.4\%}} & 8.68 & 1.81 \\ +RealMLP (T+E) & 1598${}_{-34,+29}$ & 0.418 & 9.5 & 5.5 & 1 & 8.1\% & 5472.30 & 3.06 \\ +LightGBM (T+E) & 1584${}_{-23,+33}$ & 0.389 & 9.9 & 5.7 & 1 & 8.8\% & 374.34 & 1.46 \\ +TabM (T) & 1537${}_{-32,+35}$ & 0.409 & 11.6 & 5.4 & 1 & 7.8\% & 2387.83 & 0.16 \\ +CatBoost (T+E) & 1530${}_{-29,+28}$ & 0.377 & 11.9 & 7.0 & 1 & 7.7\% & 1233.49 & 0.52 \\ +CatBoost (T) & 1522${}_{-27,+30}$ & 0.360 & 12.3 & 6.3 & 1 & 7.9\% & 1233.49 & 0.07 \\ +LightGBM (T) & 1504${}_{-26,+33}$ & 0.302 & 13.0 & 10.9 & 0 & 9.5\% & 374.34 & 0.22 \\ +CatBoost (D) & 1502${}_{-25,+23}$ & 0.343 & 13.0 & 6.4 & 1 & 9.2\% & 5.31 & 0.07 \\ +XGBoost (T+E) & 1497${}_{-31,+30}$ & 0.320 & 13.2 & 9.3 & 0 & 9.6\% & 637.94 & 1.22 \\ +ModernNCA (T) & 1462${}_{-31,+33}$ & 0.275 & 14.7 & 7.1 & 2 & 9.2\% & 4614.64 & 0.52 \\ +ModernNCA (T+E) & 1455${}_{-33,+30}$ & 0.383 & 15.0 & 6.0 & 1 & 9.1\% & 4614.64 & 8.40 \\ +XGBoost (T) & 1450${}_{-30,+33}$ & 0.264 & 15.0 & 12.4 & 0 & 10.0\% & 637.94 & 0.17 \\ +TabM (D) & 1425${}_{-27,+31}$ & 0.272 & 16.3 & 10.0 & 0 & 11.5\% & 9.82 & 0.13 \\ +TorchMLP (T+E) & 1401${}_{-23,+28}$ & 0.228 & 17.1 & 12.2 & 0 & 10.5\% & 2372.22 & 2.09 \\ +RealMLP (T) & 1399${}_{-38,+31}$ & 0.198 & 17.4 & 12.6 & 0 & 11.3\% & 5472.30 & 0.17 \\ +EBM (T+E) & 1391${}_{-27,+26}$ & 0.170 & 17.7 & 10.7 & 0 & 13.9\% & 895.61 & 0.20 \\ +FastaiMLP (T+E) & 1367${}_{-30,+31}$ & 0.207 & 18.8 & 10.6 & 0 & 13.9\% & 582.77 & 4.52 \\ +ModernNCA (D) & 1355${}_{-26,+29}$ & 0.142 & 19.3 & 10.4 & 1 & 13.1\% & 14.53 & 0.34 \\ +EBM (T) & 1328${}_{-36,+26}$ & 0.110 & 20.6 & 15.1 & 0 & 14.7\% & 895.61 & 0.02 \\ +EBM (D) & 1292${}_{-29,+29}$ & 0.119 & 22.1 & 7.9 & 3 & 15.6\% & 3.72 & 0.04 \\ +XGBoost (D) & 1277${}_{-22,+34}$ & 0.102 & 22.6 & 18.0 & 0 & 13.2\% & 1.58 & 0.11 \\ +RealMLP (D) & 1274${}_{-30,+27}$ & 0.095 & 22.9 & 18.8 & 0 & 13.3\% & 35.20 & 0.18 \\ +TabDPT (D) & 1269${}_{-30,+35}$ & 0.186 & 23.0 & 6.2 & 3 & 14.7\% & 20.51 & 8.54 \\ +TorchMLP (T) & 1269${}_{-22,+32}$ & 0.085 & 23.0 & 19.1 & 0 & 13.2\% & 2372.22 & 0.15 \\ +ExtraTrees (T+E) & 1263${}_{-28,+29}$ & 0.082 & 23.3 & 15.9 & 0 & 15.2\% & 182.30 & 0.74 \\ +FastaiMLP (T) & 1259${}_{-29,+29}$ & 0.094 & 23.5 & 18.2 & 0 & 16.0\% & 582.77 & 0.29 \\ +RandomForest (T+E) & 1210${}_{-24,+31}$ & 0.067 & 25.5 & 15.4 & 0 & 16.0\% & 260.01 & 0.74 \\ +LightGBM (D) & 1206${}_{-25,+30}$ & 0.053 & 25.7 & 23.9 & 0 & 14.8\% & 1.41 & 0.12 \\ +ExtraTrees (T) & 1205${}_{-27,+30}$ & 0.056 & 25.8 & 20.7 & 0 & 16.7\% & 182.30 & 0.07 \\ +RandomForest (T) & 1156${}_{-28,+29}$ & 0.045 & 27.7 & 21.1 & 0 & 17.3\% & 260.01 & 0.07 \\ +TorchMLP (D) & 1103${}_{-32,+34}$ & 0.022 & 29.6 & 26.8 & 0 & 18.6\% & 6.03 & 0.13 \\ +FastaiMLP (D) & 1073${}_{-31,+34}$ & 0.027 & 30.6 & 27.5 & 0 & 21.5\% & 2.81 & 0.32 \\ +Linear (T+E) & 1030${}_{-39,+33}$ & 0.044 & 32.0 & 23.1 & 0 & 26.9\% & 44.46 & 0.20 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.003 & 32.8 & 31.3 & 0 & 23.4\% & 0.33 & 0.04 \\ +Linear (T) & 989${}_{-24,+35}$ & 0.028 & 33.1 & 26.3 & 0 & 27.9\% & 44.46 & 0.07 \\ +Linear (D) & 979${}_{-29,+35}$ & 0.020 & 33.4 & 17.4 & 1 & 29.2\% & 1.43 & 0.09 \\ +ExtraTrees (D) & 929${}_{-38,+35}$ & 0.011 & 34.7 & 31.5 & 0 & 26.1\% & 0.24 & 0.04 \\ +KNN (T+E) & 694${}_{-47,+39}$ & 0.000 & 38.8 & 38.5 & 0 & 49.2\% & 3.44 & 0.18 \\ +KNN (T) & 597${}_{-48,+54}$ & 0.000 & 39.8 & 39.7 & 0 & 51.1\% & 3.44 & 0.04 \\ +KNN (D) & 407${}_{-77,+83}$ & 0.000 & 41.2 & 41.1 & 0 & 59.7\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabicl/tabarena_leaderboard.csv b/data/tabicl/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9ff26eed457798f850c0be6a353f123ed8bd3afa --- /dev/null +++ b/data/tabicl/tabarena_leaderboard.csv @@ -0,0 +1,43 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +AutoGluon 1.3 (4h),7750.377155031171,22.84719785201697,2751.5674214709147,2.9633558383567813,0.4305752039617605,0.3953654066045534,0.05350042752511665,0.03292137769247373,32689.64667720609,276.28827431069953,7.916666666666667,0.159155,6752.988276031282,3.4718479580349393,1222.5529738029095,2.35500290690449,0.3593825434326649,0.3493492611774055,0.022091856756724426,0.014502276761886618,26140.06177622487,154.15434639040092,5.0,7,3,2,24,1645.2,31.5,36.0,0.8313008130081301,0.34643609213236404 +TABM (tuned + ensemble),32820.849396629244,7.890166007074309,3102.495096524168,1.8344804606248617,0.5092283925039975,0.4916904001038966,0.06741513792808897,0.0531492276149925,48157.60289475875,128.99098784852723,8.277777777777779,0.17235499999999998,8134.35737352901,2.5847251441743637,2387.831479177676,1.4722284599091153,0.47136645123500964,0.47309371248601884,0.032755260863256086,0.01377708489439205,44088.87353980956,121.43882105497326,5.0,4,6,3,23,1634.2,35.1,31.2,0.8224932249322493,0.29539017627252917 +TABICL (default),112.87139065964723,20.33866377333064,9.733153451045638,2.345033367232273,0.4788051732855598,0.4999558134896783,0.05432482602183145,0.042761048439141636,178.07934402242574,242.8201765656135,9.430555555555555,0.17207,25.732762111557854,3.564396858215332,8.684246340890724,1.808164283651731,0.4992564187967412,0.5554459288972347,0.022981096428129733,0.00939871045261338,151.54793227614402,132.97465082181864,7.0,8,5,1,22,1598.7,30.5,39.9,0.7943766937669376,0.35893420945385096 +REALMLP (tuned + ensemble),78320.7099595922,13.457468075516783,5777.34529914899,4.747200702818298,0.5822217846671746,0.5363650919569951,0.08100170596576858,0.048333408990573846,141412.64388451062,278.6382916983155,9.458333333333334,0.166985,23101.338454975023,5.6439330816268924,5472.29624726055,3.0648418488665286,0.5258237901453162,0.5461812667291126,0.044731996378166494,0.02214701961011789,99490.3161960261,233.98656302124476,8.0,1,1,5,29,1597.7,28.7,33.8,0.7936991869918699,0.18239141458573893 +GBM (tuned + ensemble),2757.3124551578803,12.456019589415302,657.304398009506,2.529125543711048,0.6113071815088934,0.5862746617157221,0.08764117401144472,0.05490427588548946,9019.445489261983,194.08976472235491,9.875,0.16698000000000002,1484.5727322684393,3.4757837878333198,374.34433555986243,1.4573305804667545,0.6534872634143774,0.6218047916068159,0.048374130042081465,0.01740708281579771,8683.556526223521,104.28918180708692,8.5,1,3,1,31,1584.3,32.8,22.7,0.7835365853658537,0.17646098963500922 +TABM (tuned),32820.849396629244,0.8654038354202553,3102.495096524168,0.19252750021110288,0.5912555585094392,0.5498665833450093,0.07844798244845448,0.06744705276522203,48157.60289475875,13.240850473332038,11.569444444444445,0.174175,8134.35737352901,0.26544706026713055,2387.831479177676,0.1626291486781835,0.5877464635288101,0.5841370688586468,0.037275782250363654,0.01907334143513442,44088.87353980956,10.02168824405339,12.0,1,4,3,28,1536.9,34.6,31.2,0.7422086720867209,0.18618211258130268 +CAT (tuned + ensemble),14620.521881076362,2.36174942917294,2269.8595810438947,0.6522846741920234,0.6226459246322733,0.5795784602549343,0.07694002639502172,0.04688218046996814,28484.88552620336,44.38703157771985,11.930555555555555,0.16040500000000002,4812.184866507848,1.2190414004855685,1233.4944988708792,0.5157389806383175,0.6153632608570143,0.6133377930340731,0.04311874094655577,0.022138825584558906,20694.132105891145,37.76547447057217,11.0,1,1,1,33,1529.9,27.9,28.5,0.7334010840108401,0.14347848502260266 +CAT (tuned),14620.521881076362,0.4177007556697469,2269.8595810438947,0.10130234094803403,0.6397756322608608,0.595921365819077,0.07946152837678017,0.046679526112002975,28484.88552620336,6.396687248681741,12.319444444444445,0.161705,4812.184866507848,0.12577376100752088,1233.4944988708792,0.07246540449837217,0.6663747291561838,0.6619777725537788,0.04785266987647563,0.026129509718822636,20694.132105891145,5.269306492183282,12.25,1,3,1,31,1521.7,29.1,26.2,0.7239159891598916,0.15993342501015134 +GBM (tuned),2757.3124551578803,1.9554996257946817,657.304398009506,0.5137681973731403,0.6982904100452502,0.6488862533650219,0.09547546943100074,0.06488642847403936,9019.445489261983,34.03980226584826,12.958333333333334,0.17060999999999998,1484.5727322684393,0.5311000559065078,374.34433555986243,0.2150466969054991,0.7332165833714938,0.6835161575453327,0.05123166148223679,0.024837453552070406,8683.556526223521,16.198247708611177,12.5,0,0,0,36,1503.8,32.8,25.3,0.7083333333333334,0.0919832914612663 +CAT (default),225.3881851879167,0.2547764916478852,113.28441441630724,0.11944997568510514,0.6569717560090025,0.6334563301133419,0.09235672092720176,0.05024888832619162,427.0412955109482,6.664774458427448,13.0,0.16373,17.732768376668293,0.17626664373609757,5.3143473208136776,0.0720373699728269,0.6872314742824219,0.6803142470448004,0.04732883359628853,0.02335001341945147,105.75875711549895,5.435542513489766,14.0,1,3,2,30,1502.1,22.6,24.9,0.7073170731707317,0.15658825936709342 +XGB (tuned + ensemble),5493.491215199232,6.412096421880487,917.6069037638632,2.6250017135715122,0.6804014135907539,0.6480950730376123,0.0961225966782588,0.06599826390387813,11372.296711293906,142.78657719786145,13.180555555555555,0.165745,1589.3805764291023,2.081376380390591,637.9415221019913,1.2169344189421265,0.7126419091535116,0.6677711533451594,0.05643896517815861,0.028686095761617292,9210.443722124051,76.49553505940113,11.0,0,0,1,35,1497.4,29.9,30.7,0.7029132791327913,0.10712949410746893 +MNCA (tuned),57657.94978451405,20.4386415688344,5175.01000177568,1.4870657842617447,0.7251534847407544,0.6345357500361336,0.09177847543835013,0.066116504035111,84656.4189016225,131.8799800913606,14.652777777777779,0.175815,13573.088971928755,0.5870524644851685,4614.640089198099,0.5156200362715774,0.7744612480983963,0.6562641413967298,0.06242848849069588,0.03523361966179468,72619.42076015053,27.073934594746163,13.5,2,0,1,33,1461.5,32.9,30.2,0.6670054200542005,0.14077979219617281 +MNCA (tuned + ensemble),57657.94978451405,541.5858152206297,5175.01000177568,37.18030109053553,0.6174214208973879,0.578573359765691,0.091280224261016,0.07648319937009321,84656.4189016225,3366.8719867776967,14.972222222222221,0.19167499999999998,13573.088971928755,13.54958667092853,4614.640089198099,8.395978282094799,0.6265388929939845,0.5467453849211364,0.06524742082240353,0.02282628933015263,72619.42076015053,536.5448946581089,11.0,1,2,4,29,1454.7,29.8,32.8,0.6592140921409214,0.16725857123574286 +XGB (tuned),5493.491215199232,1.3085634729008617,917.6069037638632,0.6530885382876873,0.7363747567107989,0.6897295053665443,0.1003192246432696,0.07056737035819206,11372.296711293906,29.704007586433832,15.0,0.16848000000000002,1589.3805764291023,0.36310173670450846,637.9415221019913,0.1716870718901738,0.7444722954009122,0.7092778593623184,0.06467959137432433,0.034634993554849815,9210.443722124051,11.556301431859715,14.0,0,0,0,36,1450.4,32.8,29.2,0.6585365853658537,0.08096098336783362 +TABM (default),138.894014670893,0.939259964963536,13.258693701508712,0.18837149821625798,0.7280741356398281,0.7031672840939802,0.11528709718327648,0.08598939671962084,198.59294054356857,12.617551449180137,16.291666666666668,0.175255,25.32820102903578,0.18552133904563056,9.819120211215754,0.1330683562425531,0.8404844924976476,0.7640196748614664,0.05825776091558643,0.026799112086523788,147.95052750875777,10.814297716965921,14.5,0,1,0,35,1425.2,30.7,26.3,0.6270325203252033,0.1003195000204453 +NN_TORCH (tuned + ensemble),25121.675461051862,15.268945999498719,2998.2377676701126,3.008285271799925,0.7723767872546564,0.7329991335924426,0.1049995713196365,0.07342829613614794,59110.75454743922,227.46421879425387,17.13888888888889,0.17071999999999998,8702.23048403528,3.8643554978900485,2372.2159626094954,2.0891774346723144,0.9031626632034824,0.796545307543182,0.06602425212832286,0.04001808164772668,46965.794452564674,173.17543399472862,17.0,0,0,1,35,1401.0,27.8,22.9,0.6063685636856369,0.08227944225449874 +REALMLP (tuned),78320.7099595922,0.5876796141082858,5777.34529914899,0.24772644002890626,0.8023310570642167,0.7196142224519255,0.11271280938723849,0.07745170331430798,141412.64388451062,13.205522206162579,17.38888888888889,0.16874,23101.338454975023,0.26369897921880087,5472.29624726055,0.17084459589124912,0.8787741747777617,0.7325797818153339,0.07388175037437161,0.030556202222580157,99490.3161960261,10.835281307428414,16.0,0,0,1,35,1398.7,31.0,37.3,0.6002710027100271,0.0795807238824226 +EBM (tuned + ensemble),28019.426662537786,1.0875822747195207,2272.8378900325356,0.27343562526003057,0.8303965578080257,0.7998301684426825,0.139104611556999,0.10652863155292883,26169.487179400727,18.583552688722236,17.666666666666668,0.177485,2278.5297676722207,0.3938958803812663,895.6082584208186,0.2030472330989584,0.9326307523783561,0.8571175970768821,0.0823298574360965,0.032353455252280325,16406.757135657386,11.254406005139426,18.5,0,1,1,34,1391.0,25.8,26.8,0.5934959349593496,0.0934328920963366 +FASTAI (tuned + ensemble),7322.968421160292,18.779504420875032,1297.972449752507,8.192088559474888,0.7926888983747619,0.7673069188967362,0.13929056478939184,0.07943566667656245,19993.18321597409,475.72057536620804,18.833333333333332,0.17986000000000002,2947.7962622510063,11.628821227285597,582.7734793353083,4.517300460862554,0.9972327674497973,0.820063334450488,0.07610987251289619,0.051962323371093286,17127.566654953327,460.36404533374946,18.5,0,1,2,33,1367.4,30.8,29.1,0.5650406504065041,0.09441557919127325 +MNCA (default),316.27131853140435,10.342945864171158,16.66944361197823,0.8061333025000202,0.8583270774687776,0.7849274466188256,0.13081067264693186,0.08485561499744637,268.415346120585,74.8577885297529,19.333333333333332,0.18519,30.626362359523775,0.5374451610777113,14.527288647230225,0.336567721078897,1.0,0.8518797436324906,0.06725399583885783,0.039754913635691974,215.5139276781307,21.914790751288265,21.0,1,0,0,35,1354.8,28.5,25.4,0.5528455284552846,0.09575174716206745 +EBM (tuned),28019.426662537786,0.13116049015963518,2272.8378900325356,0.036898209286527296,0.8902973079447266,0.8411638055547851,0.14660895675863106,0.1137392135429118,26169.487179400727,2.281962543329168,20.555555555555557,0.179115,2278.5297676722207,0.04349470535914103,895.6082584208186,0.02459548072380017,1.0,0.903638712948607,0.0875118921642335,0.033230693653085626,16406.757135657386,1.2360778780029142,21.0,0,0,0,36,1327.6,25.2,35.7,0.5230352303523035,0.0663003158876297 +EBM (default),118.50732698676026,0.14401131014765045,8.954596469527505,0.06265208718847394,0.8813849391122723,0.8575746263475605,0.15618392351407046,0.1173829251060243,115.00830556550737,3.283968403256123,22.055555555555557,0.18057,9.348468089103699,0.062386990918053525,3.724321540044417,0.037089624442184096,1.0,0.9382548154739399,0.09278733883112866,0.03641693127266392,64.88241965125457,2.244816344727411,23.5,3,0,0,33,1291.6,28.2,28.5,0.48644986449864497,0.12712641408804534 +XGB (default),11.683355353643865,0.5746506972813311,2.5273290583117265,0.2898449163711814,0.8979377675030106,0.8480945944789343,0.13212736937606528,0.11521166421593966,33.06311547785161,14.058796569155744,22.61111111111111,0.17317,5.340686360994974,0.2856193900108337,1.5761941364945817,0.11450144426278887,1.0,0.91672704407903,0.09869044613168698,0.05725778772571348,33.38866095520069,9.412365893361752,21.0,0,0,0,36,1277.1,33.9,21.8,0.47289972899729,0.05552162629067966 +REALMLP (default),499.37680751445856,0.5946384676444678,36.31037117287357,0.250663200278434,0.9051917890614664,0.8440600984706405,0.13339765969888573,0.09497626322643343,898.4271769217308,13.32830039320175,22.875,0.17601499999999998,147.1676659848955,0.26458012130525377,35.19995410895538,0.1841485669408721,1.0,0.8960472101429542,0.11263887324292665,0.05040272045303727,622.1033556481548,11.425702599260347,23.5,0,0,0,36,1273.5,26.9,30.0,0.46646341463414637,0.05316555619508551 +NN_TORCH (tuned),25121.675461051862,0.8252101601641856,2998.2377676701126,0.18188676084331395,0.9148909672592942,0.8429645197476189,0.13163265867904522,0.0979338033406521,59110.75454743922,12.21885557115889,23.01388888888889,0.17446499999999998,8702.23048403528,0.23337317837609184,2372.2159626094954,0.14653810958067576,1.0,0.8984711644371493,0.08673667098005605,0.06014400941853005,46965.794452564674,9.196372470124006,24.0,0,0,0,36,1268.7,31.8,21.6,0.46307588075880757,0.052482110192258194 +TABDPT (default),176.01594519710835,69.14393293923803,27.198725725356752,23.388429074834338,0.8137945017331583,0.7840494520773492,0.14650365650107092,0.11089852283070799,507.4757748221604,1409.9337088973832,23.02777777777778,0.200525,98.94233159224191,28.93788754940033,20.513119835829798,8.53560138835576,1.0,0.9339842943576007,0.09923475589347508,0.04191194931280873,462.0657004935143,1196.5084355075069,27.0,3,1,2,30,1268.8,34.7,29.1,0.4627371273712737,0.160184662131605 +XT (tuned + ensemble),1031.1996921384775,2.988154384089105,357.27527748345153,1.3397702898173773,0.9176899118580594,0.8688120880193462,0.15241364522714682,0.11524290202486896,4802.300435215387,79.00786738708753,23.305555555555557,0.18232500000000001,744.239438480801,1.8136235740449693,182.30061053451453,0.7431041876698922,1.0,0.941453859089225,0.08575210936900413,0.062337182875290234,3258.869343201663,69.06723555057565,26.5,0,0,0,36,1263.4,28.2,27.4,0.4559620596205962,0.06295015232961548 +FASTAI (tuned),7322.968421160292,1.0651015653286453,1297.972449752507,0.6247631304455121,0.9058648946317567,0.8471459785625569,0.16009499027089993,0.10119416426388964,19993.18321597409,33.55086488448845,23.48611111111111,0.18178,2947.7962622510063,0.8198094805081686,582.7734793353083,0.28744913553656576,1.0,0.9016172166129159,0.08909024228375345,0.06045194430933999,17127.566654953327,27.88231049239245,24.0,0,0,0,36,1259.2,28.2,28.6,0.4515582655826558,0.054895327587024506 +RF (tuned + ensemble),2044.775237460416,2.3872891816092126,416.2783457096328,1.255027602569144,0.932518287859348,0.9003532053450211,0.1595494360924323,0.12881789867366603,5642.134899127388,70.94429168864653,25.52777777777778,0.178315,852.5537050988939,1.9029027620951335,260.0125674942402,0.7428875097152683,1.0,0.9855078952116808,0.09488416408425421,0.07479057874905146,4493.239522943328,63.634247370273194,27.0,0,1,1,34,1210.2,31.0,23.3,0.40176151761517614,0.06479346005504813 +GBM (default),6.758794430523743,0.5789695977428813,2.490182745714093,0.15718383308630232,0.9471769132881858,0.9059712649517301,0.148345200939041,0.11387674425058363,33.5255932823166,11.26499493738597,25.666666666666668,0.18472,5.414635124471452,0.25708606508043075,1.4105395256066293,0.11924102542301018,1.0,0.9314537409293944,0.11068698010353623,0.06510168593553425,28.347402616029974,6.917663985246303,25.0,0,0,0,36,1206.0,29.8,24.3,0.3983739837398374,0.041771634889801394 +XT (tuned),1031.1996921384775,0.29890986717777485,357.27527748345153,0.16424540531360832,0.9439228284777494,0.9022142402747604,0.16661997486773536,0.1253673531425734,4802.300435215387,8.701857547554141,25.75,0.18223,744.239438480801,0.18342396948072645,182.30061053451453,0.07494392763268541,1.0,0.9726085839753669,0.10587648465771732,0.06747774841145451,3258.869343201663,8.07141276727868,29.25,0,0,0,36,1204.9,30.0,26.7,0.39634146341463417,0.04825337807791283 +RF (tuned),2044.775237460416,0.23278873113938317,416.2783457096328,0.1507722695594024,0.9551019371507523,0.9209883678532962,0.17329719554098053,0.14075801263587298,5642.134899127388,7.55687197585497,27.708333333333332,0.18139,852.5537050988939,0.17230602105458576,260.0125674942402,0.06758631242886748,1.0,0.9951901183637382,0.10801648961735072,0.08172079532650084,4493.239522943328,6.782207740325264,29.0,0,0,1,35,1155.7,28.5,27.6,0.3485772357723577,0.04733340788805532 +NN_TORCH (default),48.70578088156971,0.5936917389616555,11.125118656665736,0.17673793806000127,0.978257373569646,0.9461626627360779,0.18617243329422217,0.13932265609762545,163.6050788464746,10.908192435522393,29.59722222222222,0.180355,25.35211862458123,0.2432508071263631,6.0317417768089925,0.12717011148259783,1.0,1.0,0.1315979221742642,0.08392578273701758,147.85412694112986,8.619821917518276,30.0,0,0,0,36,1103.4,33.9,31.4,0.3025067750677507,0.037309167342424246 +FASTAI (default),31.09969735491423,1.10650484436824,4.683882685740998,0.49602131438348224,0.972512597016023,0.937491255300511,0.214763192071676,0.16309292278867282,78.50334203533181,29.20329942994179,30.61111111111111,0.19183499999999998,12.376497785250347,0.7602158255047269,2.8056596594025227,0.32364197153238683,1.0,0.9999615975422427,0.16139709053294848,0.10101015982783891,62.830982755166104,26.31473180904403,33.5,0,0,0,36,1072.9,33.5,31.0,0.2777777777777778,0.03642319913980632 +LR (tuned + ensemble),247.81690773551847,1.5417047552120537,86.9527487524539,0.32565721661614494,0.9561054304859822,0.945372194390879,0.26906892579103764,0.2196796767290125,1145.6529776499187,22.80693330823688,32.02777777777778,0.20382,171.4822693798277,0.2983522944980197,44.45837271402539,0.19711479228248058,1.0,1.0,0.2029910383523567,0.12545333458842928,713.1690352739483,13.25530093456624,35.0,0,0,1,35,1029.6,32.8,38.1,0.24322493224932248,0.04334753037904041 +RF (default),3.0601362299771955,0.1389405298380204,0.4197356009776281,0.0653398591441877,0.9965256868256966,0.9750534412705346,0.23374289847938945,0.22890241001401435,6.380264980433452,3.8863601848700204,32.84722222222222,0.21025,1.1198331514994302,0.08397722244262695,0.3277067685609757,0.03542500892500126,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.4893450212553976,34.75,0,0,0,36,1000.0,0.0,0.0,0.22323848238482386,0.03196753393423037 +LR (tuned),247.81690773551847,0.4449969635333544,86.9527487524539,0.10396867681281166,0.9723830890370608,0.9526957165635138,0.2793124361704181,0.22914434571494902,1145.6529776499187,6.67768937474542,33.111111111111114,0.20357,171.4822693798277,0.11908173561096191,44.45837271402539,0.07001089403664298,1.0,1.0,0.209915797201436,0.13903865882301508,713.1690352739483,4.422167155622043,36.0,0,0,0,36,989.1,34.1,23.6,0.21680216802168023,0.03806036380315983 +LR (default),6.169420995020572,0.45878614010634244,2.106396664251635,0.12050053609932035,0.9795929927195421,0.9593614170235713,0.2916458967947873,0.2548930671246872,29.155480122785303,7.705503358920759,33.388888888888886,0.20875,5.325402127371894,0.12779696782430014,1.4313534079421033,0.0896510462814927,1.0,1.0,0.2098231228117865,0.1419072705068749,21.98095065107635,4.735441569338942,36.5,1,0,0,35,979.0,34.7,28.5,0.21002710027100271,0.05743750876797652 +XT (default),1.8043978815461381,0.17975940917745048,0.3713595805715374,0.06954273446410252,0.9892721532660226,0.9709853179280655,0.2609714953848282,0.2584310419389792,5.400149437442951,4.422216908920569,34.69444444444444,0.21292,0.9855960739983453,0.08559976683722602,0.24198385110028492,0.03905757784211604,1.0,1.0,0.18424442460299273,0.13284238574591212,4.712255061768779,3.9447928104085292,36.0,0,0,0,36,928.7,34.2,37.2,0.17818428184281843,0.031705419003530376 +KNN (tuned + ensemble),167.8303013191547,12.105770299979199,8.709396553002023,0.6510287601474662,1.0,0.9950381724620512,0.4922467034154276,0.6086097785091938,80.7590848163391,80.9066319119832,38.84722222222222,0.31954499999999997,14.630206929312813,0.18374058273103502,3.4367890290794296,0.18360265549810623,1.0,1.0,0.4552264030449861,0.6664113509907352,59.32150817198775,12.487074071232104,40.0,0,0,0,36,694.5,38.1,46.7,0.0768970189701897,0.025970231106033758 +KNN (tuned),167.8303013191547,1.8655599888460135,8.709396553002023,0.10631861792797924,1.0,0.9973606072760356,0.5109632324100885,0.654584490441368,80.7590848163391,12.961828932017745,39.84722222222222,0.32433,14.630206929312813,0.07810062832302517,3.4367890290794296,0.039402452723266784,1.0,1.0,0.4955223463378593,0.7068928996407389,59.32150817198775,2.342964973116946,41.0,0,0,0,36,596.8,53.8,47.6,0.05250677506775068,0.02518666084837813 +KNN (default),0.8106582238350385,0.22655490967962477,0.11174909219233083,0.031188125017345747,1.0,1.0,0.5965726261878597,0.9589083406549634,1.0058498096362567,2.4347704716843976,41.25,0.392575,0.22842825783623588,0.035103811158074275,0.06799989431884934,0.01944144969012079,1.0,1.0,0.6183072807462191,1.0,1.0,1.2342542936555674,42.0,0,0,0,36,406.8,82.2,76.6,0.018292682926829267,0.02432448428372201 diff --git a/data/tabicl/tuning-impact-elo-horizontal.pdf b/data/tabicl/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ea328edb768a938cc1cd91dd4625f5cfeeba78ea Binary files /dev/null and b/data/tabicl/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabicl/tuning-impact-elo-horizontal.png.zip b/data/tabicl/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..9ba326e93cb3d1dedb1b9ee2b47b2ff585feedb3 --- /dev/null +++ b/data/tabicl/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e858c95af701d1a8d5d6368f1a8c3669f399c2229854ccff0595e69c3ea3da0d +size 132739 diff --git a/data/tabpfn-cls/figures/critical-diagram.pdf b/data/tabpfn-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..949ed98e2d1001003a66751c800bfd5dddce1761 Binary files /dev/null and b/data/tabpfn-cls/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-cls/figures/critical-diagram.png.zip b/data/tabpfn-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..c621877d3cafbab56387efc63b4f34b1b84d9942 --- /dev/null +++ b/data/tabpfn-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5777d71a73c53eaab6e2d3e915fddcdb926df5cae2f76e45b5ac8305230a8a2b +size 315193 diff --git a/data/tabpfn-cls/leaderboard.tex b/data/tabpfn-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..35e6057280fc43f5c059298d0300adb0d291476f --- /dev/null +++ b/data/tabpfn-cls/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1730${}_{-37,+55}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.4}} & \textcolor{gold}{\textbf{2.0}} & \textcolor{gold}{\textbf{9}} & \textcolor{gold}{\textbf{4.1\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1613${}_{-39,+44}$}} & 0.483 & \textcolor{silver}{\textbf{8.7}} & 4.2 & \textcolor{bronze}{\textbf{2}} & \textcolor{bronze}{\textbf{8.7\%}} & 2828.45 & 1.60 \\ +TabPFNv2 (T) & \textcolor{bronze}{\textbf{1576${}_{-30,+46}$}} & \textcolor{silver}{\textbf{0.544}} & \textcolor{bronze}{\textbf{9.9}} & \textcolor{bronze}{\textbf{3.5}} & 1 & \textcolor{silver}{\textbf{7.7\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1568${}_{-29,+35}$ & 0.402 & 10.3 & 6.5 & 0 & 9.8\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1541${}_{-35,+42}$ & 0.440 & 11.3 & 4.9 & \textcolor{bronze}{\textbf{2}} & 8.9\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1540${}_{-42,+45}$ & \textcolor{bronze}{\textbf{0.499}} & 11.5 & \textcolor{silver}{\textbf{3.1}} & \textcolor{silver}{\textbf{5}} & 9.0\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1516${}_{-28,+51}$ & 0.319 & 12.2 & 6.9 & 1 & 11.2\% & 647.56 & 1.72 \\ +TabM (T) & 1510${}_{-31,+38}$ & 0.387 & 12.6 & 7.1 & 0 & 9.9\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1463${}_{-33,+36}$ & 0.262 & 14.6 & 12.6 & 0 & 11.6\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1453${}_{-31,+33}$ & 0.271 & 15.1 & 11.2 & 0 & 11.1\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1434${}_{-34,+48}$ & 0.247 & 15.8 & 9.9 & 0 & 11.4\% & 1465.86 & 0.09 \\ +CatBoost (D) & 1424${}_{-35,+44}$ & 0.228 & 16.3 & 8.2 & 1 & 12.8\% & 5.72 & 0.11 \\ +ModernNCA (T) & 1422${}_{-23,+43}$ & 0.255 & 16.3 & 8.3 & 1 & 11.1\% & 5944.88 & 0.52 \\ +TabM (D) & 1420${}_{-31,+39}$ & 0.288 & 16.3 & 10.9 & 0 & 13.2\% & 10.42 & 0.15 \\ +XGBoost (T+E) & 1412${}_{-30,+40}$ & 0.222 & 16.7 & 13.2 & 0 & 12.6\% & 766.06 & 1.92 \\ +EBM (T+E) & 1381${}_{-33,+33}$ & 0.186 & 18.3 & 12.0 & 0 & 15.2\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1378${}_{-30,+36}$ & 0.181 & 18.4 & 16.1 & 0 & 12.8\% & 766.06 & 0.28 \\ +ModernNCA (T+E) & 1374${}_{-37,+43}$ & 0.307 & 18.7 & 7.7 & 0 & 12.2\% & 5944.88 & 8.40 \\ +RealMLP (T) & 1372${}_{-32,+39}$ & 0.201 & 18.6 & 14.3 & 0 & 12.9\% & 5786.69 & 0.25 \\ +ModernNCA (D) & 1366${}_{-28,+41}$ & 0.183 & 18.9 & 9.5 & 1 & 14.6\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-35,+37}$ & 0.199 & 19.4 & 13.9 & 0 & 12.8\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1354${}_{-36,+44}$ & 0.230 & 19.5 & 10.9 & 0 & 15.1\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1318${}_{-38,+39}$ & 0.258 & 21.2 & 5.6 & \textcolor{bronze}{\textbf{2}} & 14.3\% & 27.49 & 8.86 \\ +EBM (T) & 1316${}_{-33,+30}$ & 0.120 & 21.4 & 16.7 & 0 & 16.0\% & 1109.06 & 0.03 \\ +EBM (D) & 1302${}_{-45,+29}$ & 0.153 & 22.2 & 9.5 & 1 & 16.7\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1265${}_{-36,+37}$ & 0.113 & 23.6 & 18.5 & 0 & 16.7\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-45,+41}$ & 0.102 & 24.0 & 16.0 & 0 & 17.7\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1239${}_{-32,+48}$ & 0.086 & 24.9 & 20.9 & 0 & 15.4\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1232${}_{-33,+38}$ & 0.083 & 25.1 & 21.0 & 0 & 15.4\% & 36.24 & 0.28 \\ +XGBoost (D) & 1212${}_{-31,+32}$ & 0.032 & 26.1 & 23.8 & 0 & 16.2\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1201${}_{-36,+42}$ & 0.072 & 26.5 & 21.5 & 0 & 18.9\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1156${}_{-35,+40}$ & 0.057 & 28.5 & 20.9 & 0 & 19.0\% & 527.42 & 1.39 \\ +LightGBM (D) & 1152${}_{-34,+31}$ & 0.030 & 28.6 & 26.7 & 0 & 17.2\% & 2.90 & 0.13 \\ +RandomForest (T) & 1095${}_{-33,+30}$ & 0.023 & 30.9 & 27.9 & 0 & 20.3\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1086${}_{-40,+33}$ & 0.030 & 31.4 & 27.9 & 0 & 20.9\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1045${}_{-43,+44}$ & 0.019 & 32.7 & 30.1 & 0 & 22.5\% & 4.73 & 0.62 \\ +Linear (T+E) & 1028${}_{-38,+37}$ & 0.055 & 33.3 & 22.1 & 0 & 27.9\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 34.3 & 32.3 & 0 & 25.3\% & 0.45 & 0.07 \\ +Linear (T) & 995${}_{-43,+52}$ & 0.035 & 34.2 & 26.9 & 0 & 28.6\% & 88.63 & 0.09 \\ +Linear (D) & 987${}_{-47,+35}$ & 0.025 & 34.7 & 24.5 & 0 & 30.1\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 948${}_{-42,+37}$ & 0.015 & 35.7 & 31.7 & 0 & 27.1\% & 0.40 & 0.07 \\ +KNN (T+E) & 709${}_{-55,+44}$ & 0.000 & 40.6 & 40.2 & 0 & 47.6\% & 3.71 & 0.17 \\ +KNN (T) & 607${}_{-58,+48}$ & 0.000 & 41.7 & 41.5 & 0 & 49.7\% & 3.71 & 0.04 \\ +KNN (D) & 404${}_{-91,+71}$ & 0.000 & 43.2 & 43.1 & 0 & 58.9\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-cls/tabarena_leaderboard.csv b/data/tabpfn-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..341da3321da137fc4baf9ec8a0416700852d8104 --- /dev/null +++ b/data/tabpfn-cls/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.3508180057529843,0.04072444608572467,0.02249127064828583,72438.91792383758,5032.936436246271,5.4423076923076925,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4001087758331838,0.012939467867123522,0.0036359777404727134,43032.16406688419,2931.913108083719,2.5,9,4,2,11,1730.2,54.3,37.0,0.896690518783542,0.4957843880257673 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.527521976123192,0.086800842708324,0.043258299937220426,43123.835967101855,114.22499982092596,8.73076923076923,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5421040878145871,0.02983493982634744,0.030759586434290852,44088.87353980956,109.46959390379024,7.5,2,2,2,20,1613.1,43.3,38.8,0.8202146690518783,0.2396584139862172 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.475811679601056,0.0766403575120324,0.0442810758124181,72438.91792383758,165.31166707386078,9.903846153846153,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.4593895408835382,0.05280536936566166,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,9,1,15,1576.1,45.8,29.6,0.7929338103756708,0.2823376982617133 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5678322254877531,0.09839564063585614,0.04590879055951824,112219.73072981052,290.95193637081104,10.26923076923077,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5465586836857503,0.04527754696701686,0.029565742001882556,87871.6182808383,267.1363813998378,8.5,0,1,2,23,1567.9,34.8,28.4,0.7844364937388193,0.15429723445201868 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5337145991261737,0.08907011998592504,0.04615933213503606,39712.28933278582,151.42916243848958,11.307692307692308,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.5584517906521018,0.03780698624823603,0.02506476981872757,35357.34871646478,128.98489015193604,8.0,2,1,1,22,1540.7,41.5,34.3,0.7602862254025045,0.20613108296931826 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5568562487011248,0.09004575912430655,0.05754087492207787,76.29505964660768,41.758850754001486,11.5,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5208009960396708,0.04756401740069338,0.01844263956439219,58.62232186481185,25.39697585954705,5.0,5,0,4,17,1539.6,44.8,41.4,0.7558139534883721,0.31863358201013 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6645460444945642,0.11216818462718134,0.05545122740079456,10455.248725429481,156.11534656534064,12.25,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.6945287029937819,0.04657388172920551,0.03466366526779107,9480.825611975059,103.35126359750211,11.5,1,0,2,23,1516.5,50.5,27.3,0.7383720930232558,0.14542288002842635 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.5828601382328022,0.0986473816088485,0.05699936952152234,43123.835967101855,11.305450805467302,12.615384615384615,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.6198210550262315,0.040413461666905515,0.03591570011140373,44088.87353980956,9.589800419015791,12.0,0,2,1,23,1510.5,37.4,30.1,0.7298747763864043,0.14075728057706524 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.6877138223822994,0.1164568051993735,0.06053872953879877,10455.248725429481,28.289876597893098,14.596153846153847,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7037614176458671,0.050617586147917504,0.04159812146185618,9480.825611975059,16.480248719877835,14.5,0,0,0,26,1463.0,35.1,32.1,0.6838103756708408,0.0790708424448326 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.6962542564704413,0.11135355062114688,0.0642805547471566,27448.91121504087,44.04781428274726,15.076923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.7183554115324284,0.058921425875253564,0.05124012090762392,21194.263639925673,39.04121549244424,15.5,0,0,1,25,1452.7,32.8,30.9,0.6726296958855098,0.08947798086606794 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7126283655491066,0.11423172031671344,0.06447862450345623,27448.91121504087,5.822055889865127,15.807692307692308,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7732524933611578,0.06056740960266216,0.04927421846630764,21194.263639925673,5.623705460666335,15.5,0,1,1,24,1433.8,47.8,33.6,0.6556350626118068,0.10141113999651746 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.7558591506768357,0.127512601975712,0.06848520945739753,477.2608484422213,7.423108461919998,16.307692307692307,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8157510033149784,0.06821441929234145,0.03610517615857824,107.22813859237792,5.910285969899903,16.5,1,1,0,24,1423.6,43.5,34.5,0.6440071556350626,0.12237694076494982 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6528141060417343,0.1114751577750499,0.06770686853786761,77920.67944321278,26.83390437537825,16.326923076923077,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7153665203287911,0.06458218247386,0.057542536642744974,66956.02864547497,22.936826653686147,15.5,1,0,0,25,1421.6,42.1,22.8,0.6435599284436494,0.12097192012782881 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7012823721451672,0.1315744004943588,0.07173661631718417,189.57549613749592,9.978425427673205,16.346153846153847,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.699650704977647,0.054824582544583556,0.04040915077838865,140.5904555433362,9.536419533323254,13.0,0,0,0,26,1419.6,38.1,31.0,0.6431127012522362,0.0919897125166928 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7424706630813358,0.12568008952118312,0.07381567883855328,11800.394028929062,144.45205787298235,16.71153846153846,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.760927037785122,0.06960606637692168,0.04644957387374121,11520.40078776449,81.51508510452376,14.75,0,0,0,26,1411.8,39.9,29.3,0.6346153846153846,0.07589822431189361 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8064005354583312,0.15206696850383009,0.1089393941831432,16692.42562947248,18.328617946180266,18.346153846153847,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8247783608208548,0.08037688157371176,0.043314330342592314,17695.477225433653,12.287120931350431,17.5,0,0,1,25,1381.4,32.7,32.2,0.5966010733452594,0.08314119511743266 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7659164198132343,0.12822901252324154,0.07642912196651949,11800.394028929062,30.013383573853858,18.384615384615383,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7605470316924886,0.07118581565039656,0.047811824528513944,11520.40078776449,11.544680214373663,18.5,0,0,0,26,1378.5,35.5,29.7,0.5957066189624329,0.06222405662509155 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7227589974238805,0.12863822388924467,0.07027163136044182,112219.73072981052,14.318352639712598,18.576923076923077,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.743649597152144,0.07110517080171636,0.061570080954222194,87871.6182808383,11.529830439504863,16.0,0,0,0,26,1372.4,39.0,31.6,0.5912343470483006,0.06992561021902978 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6536687085391636,0.12185573096277677,0.08889701740350106,77920.67944321278,542.9013751751211,18.73076923076923,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6950921280587661,0.07664837244770173,0.04377006890431604,66956.02864547497,426.41545163369784,15.0,0,2,3,21,1373.8,42.4,36.6,0.5876565295169947,0.13070704479997633 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7530542333525589,0.14585143948141835,0.0773932895220121,190.09277232716437,21.03504265137771,18.923076923076923,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7628608576740223,0.07974631950048428,0.051103226670839136,190.7747999331715,15.595042220247258,20.0,1,0,0,25,1366.3,40.7,27.8,0.5831842576028623,0.10524965098772952 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.7498215376019142,0.12794206929043583,0.07081910589005783,56068.072795326945,181.3894555134842,19.442307692307693,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.862511041490974,0.06569550468767094,0.05827028045864932,51500.257380537965,155.60936197975602,19.5,0,0,0,26,1356.4,36.3,34.2,0.5711091234347049,0.07215298611135 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7557846698844588,0.15139579697694264,0.08366040852235096,20955.495033608,514.94962880429,19.48076923076923,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8091414898926652,0.0708883333873106,0.048679664768711495,17602.335588839425,531.7232414230566,19.0,0,1,0,25,1353.9,44.0,35.2,0.5702146690518783,0.09205987720395913 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7245133560975703,0.14275646776738934,0.08635255976525731,529.9323761499634,1297.090837487081,21.192307692307693,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.8616283663494628,0.06641392139095825,0.04534492007639533,513.7039796057306,1113.0844664803024,24.5,2,2,1,21,1318.4,38.4,37.3,0.5304114490161002,0.17900459142867556 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.8434250327546724,0.15992716879761423,0.11675795280528733,16692.42562947248,2.268478500812272,21.403846153846153,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8873986658397226,0.0825732355632331,0.06121376901972641,17695.477225433653,1.252412448907763,23.0,0,0,0,26,1316.0,29.2,32.3,0.5254919499105546,0.06003476090570715 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8438586931313998,0.16745694305480527,0.118805198129475,75.26102802648397,3.652629917649085,22.192307692307693,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8873152505729808,0.09267785222886898,0.043293158323819234,77.97362526017017,3.340101142403973,22.5,1,0,2,23,1301.9,28.1,44.5,0.5071556350626119,0.10554087510979013 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8245875961206832,0.16677744125129712,0.09954936435915986,20955.495033608,39.61972184578173,23.634615384615383,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8497666658809411,0.08449517974726001,0.06498589353723835,17602.335588839425,37.14448504861147,22.0,0,0,0,26,1265.1,36.2,35.8,0.473613595706619,0.05399738090101664 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8564702160947721,0.17712340098435098,0.11828689054002459,6037.128774148354,86.31947081771706,24.03846153846154,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.9269961626847824,0.09673248729372463,0.07108643848262665,5339.627074654447,77.0823275943278,27.25,0,0,0,26,1259.5,40.3,44.2,0.46422182468694095,0.0626754356821208 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.8428893261232141,0.1541380745677305,0.09377895227382137,56068.072795326945,10.018943215736572,24.865384615384617,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9069473524345909,0.10357459936302066,0.09268904724445692,51500.257380537965,8.681540922799902,26.0,0,0,0,26,1239.0,47.3,31.9,0.44499105545617174,0.04779008142697727 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.847622483837925,0.15413061134430597,0.09267449638027586,704.4596577225865,14.367018773597605,25.134615384615383,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.913284574364005,0.10647523868579317,0.06746878542921386,536.9122103749642,12.116250573587863,27.5,0,0,0,26,1232.1,37.9,32.9,0.4387298747763864,0.04760521459930727 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.898756262689123,0.16237433024745374,0.11714370521843961,37.42364826794432,14.932177592236862,26.134615384615383,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.941194849755949,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.25,0,0,0,26,1211.9,31.4,30.2,0.415474060822898,0.04201478681096431 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.887564141555515,0.18857913605129123,0.1280797397016669,6037.128774148354,9.800458404720944,26.53846153846154,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.964865842827751,0.10725621698653792,0.07275376605805199,5339.627074654447,8.581565117405717,30.5,0,0,0,26,1201.4,41.6,35.9,0.40608228980322003,0.04648638701936883 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9039926301302649,0.19018684011327747,0.14314298208469173,6794.043055663636,81.1610891109226,28.5,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9907748729003225,0.11236159795018158,0.08535641713443423,6269.6136687159,75.55960345229795,30.5,0,0,0,26,1156.5,39.9,34.3,0.36046511627906974,0.0478491858038661 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.915742956827577,0.17152938547123983,0.11373807327831383,42.11806124941037,9.792256635961557,28.615384615384617,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9478076331189742,0.11000494261371879,0.09964492878261602,38.718264562373335,7.411495765404641,28.5,0,0,0,26,1152.4,30.5,33.4,0.35778175313059035,0.037403999294090134 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9347987177498762,0.20326325909664036,0.15286823505889485,6794.043055663636,8.85724716182767,30.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9952004355621868,0.12445981052443861,0.1007290587509927,6269.6136687159,8.226247917103098,32.0,0,0,0,26,1095.3,29.9,32.8,0.3041144901610018,0.03581014291575876 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9351186482653575,0.20936255847720464,0.14226141280882956,188.3149665224302,10.435316060204023,31.365384615384617,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9906922804683864,0.13026011561239426,0.10007970888609818,197.09431521312473,8.356657050571908,32.0,0,0,0,26,1086.1,32.5,39.6,0.2938282647584973,0.03578144428976621 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9376036240168233,0.22531136152799336,0.1728530866328165,79.38940070919118,32.314469844151965,32.69230769230769,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9989204657402586,0.15475454502602404,0.11423976777713259,63.05950085745769,32.34956133934325,35.5,0,0,0,26,1044.7,43.2,42.3,0.2629695885509839,0.03317460343409044 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9360140028124351,0.27869382005542503,0.24408605062730823,1448.7019616321465,20.41052886468638,33.26923076923077,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.19775557962068557,0.1779543124949825,1246.7830478036524,13.25530093456624,37.0,0,0,1,25,1028.5,36.1,38.0,0.24955277280858676,0.04515944314429413 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.9423513282510686,0.28577393687991265,0.2536463681984359,1448.7019616321465,6.009867470278231,34.25,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.20539417502158785,0.18171290050416328,1246.7830478036524,4.016028876859062,37.0,0,0,0,26,994.9,51.4,43.0,0.22674418604651161,0.037212525548754094 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9674741068203553,0.25319346313286606,0.2448343620876502,6.6754175478812146,4.153857105918795,34.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11508233454921707,5.654732996519019,3.502947097147378,36.5,0,0,0,26,1000.0,0.0,0.0,0.22584973166368516,0.030931987921747967 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9505629021800448,0.3011073713268812,0.28932043078797587,36.73103788870433,7.308777223189359,34.71153846153846,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22353997609762993,0.22370793213879017,31.50980214431158,4.703818974402271,38.5,0,0,1,25,987.0,34.2,46.6,0.21601073345259392,0.040796023063587754 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9609016392969725,0.27096026301478127,0.27365502000930814,6.0304600748852675,4.406716139948246,35.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18129521821713152,0.14553182686734578,5.148113375558061,3.7424721126192138,38.0,0,0,0,26,948.4,36.6,41.3,0.19409660107334525,0.03158544652624897 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9942158538470305,0.4761836733018972,0.5893872313062054,57.836601086323085,11.285407568076662,40.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4357561775644644,0.6183873456591064,55.06974575171796,8.266636963631449,42.0,0,0,0,26,708.8,43.4,54.7,0.07960644007155634,0.02489112819208369 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966827264588594,0.4973658893922803,0.6435244474284593,57.836601086323085,2.5027008595459077,41.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,43.0,0,0,0,26,606.6,47.2,57.4,0.05366726296958855,0.024070134689831643 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5891670346049616,0.9490733069136094,1.00065410372272,1.4527699655062962,43.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6134116269473473,1.0,1.0,1.0057335151210618,44.0,0,0,0,26,404.4,70.3,90.7,0.01744186046511628,0.02319085778775572 diff --git a/data/tabpfn-cls/tuning-impact-elo-horizontal.pdf b/data/tabpfn-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..d8f8e5e7bfab42820d989c4d6c23c26f152c88a9 Binary files /dev/null and b/data/tabpfn-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-cls/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..5eb5025d2eec888d186b09eb6c45746e09b7076a --- /dev/null +++ b/data/tabpfn-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e1edd5fcf6f47ea36a683f43b329716a9ae9358b12b1ee57dfd78dd4cac854ee +size 135646 diff --git a/data/tabpfn-imputed-cls/figures/critical-diagram.pdf b/data/tabpfn-imputed-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..4f5ac9425f25cf258b0de0477760c1fa95cdc716 Binary files /dev/null and b/data/tabpfn-imputed-cls/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-imputed-cls/figures/critical-diagram.png.zip b/data/tabpfn-imputed-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24e08506593fee7a8727aefce7940752470d6e76 --- /dev/null +++ b/data/tabpfn-imputed-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fd50f8530f0264c02045700850c01bf4b212bcd6257fa3321f09e02e249a62 +size 319568 diff --git a/data/tabpfn-imputed-cls/leaderboard.tex b/data/tabpfn-imputed-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..b41ff65b5428b5f7a450717fe825914a05f0b501 --- /dev/null +++ b/data/tabpfn-imputed-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1736${}_{-45,+46}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{4.6\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1612${}_{-37,+35}$}} & 0.483 & \textcolor{silver}{\textbf{9.3}} & 4.5 & 2 & 9.2\% & 2828.45 & 1.60 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1608${}_{-34,+36}$}} & \textcolor{bronze}{\textbf{0.543}} & \textcolor{bronze}{\textbf{9.5}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{4}} & \textcolor{silver}{\textbf{7.1\%}} & 8.89 & 1.74 \\ +TabPFNv2 (T) & 1588${}_{-42,+32}$ & \textcolor{silver}{\textbf{0.544}} & 10.3 & 3.7 & 1 & \textcolor{bronze}{\textbf{8.2\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1573${}_{-37,+29}$ & 0.402 & 10.9 & 7.1 & 0 & 10.4\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1544${}_{-44,+34}$ & 0.440 & 11.9 & 5.6 & 1 & 9.5\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1542${}_{-38,+38}$ & 0.499 & 12.0 & \textcolor{bronze}{\textbf{3.4}} & \textcolor{silver}{\textbf{4}} & 9.5\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1521${}_{-35,+40}$ & 0.319 & 12.9 & 7.1 & 1 & 11.8\% & 647.56 & 1.72 \\ +TabM (T) & 1512${}_{-40,+30}$ & 0.387 & 13.3 & 7.5 & 0 & 10.4\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1462${}_{-34,+33}$ & 0.262 & 15.3 & 13.3 & 0 & 12.2\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1456${}_{-32,+38}$ & 0.271 & 15.8 & 11.6 & 0 & 11.7\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1435${}_{-32,+36}$ & 0.247 & 16.5 & 10.5 & 0 & 12.0\% & 1465.86 & 0.09 \\ +TabM (D) & 1429${}_{-32,+35}$ & 0.288 & 17.0 & 11.2 & 0 & 13.7\% & 10.42 & 0.15 \\ +ModernNCA (T) & 1426${}_{-30,+37}$ & 0.255 & 17.1 & 8.7 & 1 & 11.7\% & 5944.88 & 0.52 \\ +CatBoost (D) & 1425${}_{-35,+32}$ & 0.228 & 17.0 & 10.0 & 0 & 13.3\% & 5.72 & 0.11 \\ +XGBoost (T+E) & 1415${}_{-33,+36}$ & 0.222 & 17.5 & 14.0 & 0 & 13.1\% & 766.06 & 1.92 \\ +EBM (T+E) & 1383${}_{-33,+31}$ & 0.186 & 19.1 & 12.5 & 0 & 15.7\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1379${}_{-31,+39}$ & 0.181 & 19.2 & 16.8 & 0 & 13.4\% & 766.06 & 0.28 \\ +RealMLP (T) & 1379${}_{-42,+36}$ & 0.201 & 19.3 & 14.8 & 0 & 13.4\% & 5786.69 & 0.25 \\ +ModernNCA (T+E) & 1375${}_{-38,+35}$ & 0.307 & 19.5 & 8.6 & 0 & 12.7\% & 5944.88 & 8.40 \\ +ModernNCA (D) & 1367${}_{-33,+31}$ & 0.183 & 19.8 & 10.0 & 1 & 15.1\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-32,+33}$ & 0.199 & 20.3 & 15.0 & 0 & 13.3\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1356${}_{-35,+32}$ & 0.230 & 20.2 & 11.2 & 0 & 15.7\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1323${}_{-33,+24}$ & 0.258 & 22.0 & 6.1 & 2 & 14.8\% & 27.49 & 8.86 \\ +EBM (T) & 1320${}_{-41,+33}$ & 0.120 & 22.1 & 17.3 & 0 & 16.5\% & 1109.06 & 0.03 \\ +EBM (D) & 1299${}_{-35,+37}$ & 0.153 & 23.0 & 9.7 & 1 & 17.3\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1269${}_{-40,+38}$ & 0.113 & 24.5 & 19.1 & 0 & 17.2\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-47,+27}$ & 0.102 & 25.0 & 17.4 & 0 & 18.2\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1238${}_{-33,+33}$ & 0.086 & 25.8 & 22.0 & 0 & 15.9\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1234${}_{-36,+37}$ & 0.083 & 26.1 & 21.8 & 0 & 15.9\% & 36.24 & 0.28 \\ +XGBoost (D) & 1213${}_{-32,+41}$ & 0.032 & 27.1 & 24.8 & 0 & 16.7\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1200${}_{-32,+35}$ & 0.072 & 27.5 & 22.6 & 0 & 19.4\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1157${}_{-30,+36}$ & 0.057 & 29.5 & 22.0 & 0 & 19.5\% & 527.42 & 1.39 \\ +LightGBM (D) & 1154${}_{-29,+39}$ & 0.030 & 29.6 & 27.7 & 0 & 17.7\% & 2.90 & 0.13 \\ +RandomForest (T) & 1096${}_{-31,+36}$ & 0.023 & 31.9 & 29.1 & 0 & 20.8\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1082${}_{-34,+36}$ & 0.030 & 32.3 & 28.8 & 0 & 21.4\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1051${}_{-33,+31}$ & 0.019 & 33.6 & 31.0 & 0 & 23.0\% & 4.73 & 0.62 \\ +Linear (T+E) & 1030${}_{-41,+39}$ & 0.055 & 34.2 & 22.9 & 0 & 28.3\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 35.3 & 33.4 & 0 & 25.8\% & 0.45 & 0.07 \\ +Linear (T) & 997${}_{-41,+37}$ & 0.035 & 35.2 & 28.4 & 0 & 29.0\% & 88.63 & 0.09 \\ +Linear (D) & 982${}_{-39,+42}$ & 0.025 & 35.7 & 25.0 & 0 & 30.5\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 956${}_{-51,+44}$ & 0.015 & 36.7 & 32.9 & 0 & 27.5\% & 0.40 & 0.07 \\ +KNN (T+E) & 704${}_{-46,+50}$ & 0.000 & 41.6 & 41.2 & 0 & 48.0\% & 3.71 & 0.17 \\ +KNN (T) & 609${}_{-48,+53}$ & 0.000 & 42.7 & 42.5 & 0 & 50.1\% & 3.71 & 0.04 \\ +KNN (D) & 408${}_{-96,+87}$ & 0.000 & 44.2 & 44.1 & 0 & 59.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-imputed-cls/tabarena_leaderboard.csv b/data/tabpfn-imputed-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9cfacb0044514fd9fcb6b0fd592ac532d061c310 --- /dev/null +++ b/data/tabpfn-imputed-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.36592302109392316,0.046199591967015556,0.028953147052208875,72438.91792383758,5032.936436246271,5.788461538461538,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4068697696081146,0.021640889295981347,0.016874625134937742,43032.16406688419,2931.913108083719,3.5,8,4,1,13,1736.3,45.7,44.6,0.8911713286713286,0.46269305993659526 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.547775172494987,0.09238776741660952,0.04985850997225548,43123.835967101855,114.22499982092596,9.307692307692308,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5540981099375837,0.04617144277378826,0.034316541138176816,44088.87353980956,109.46959390379024,8.0,2,1,1,22,1612.5,34.1,36.5,0.8111888111888111,0.22027167779430223 +TABICL (default),21.933824618351764,2.7954679915028757,10.278805574502684,2.035731958506663,0.45732829959408833,0.5181742657898207,0.07108757868512468,0.03468928052253071,145.15967621212897,119.47187491598106,9.48076923076923,0.17207,19.54939921696981,2.1857474181387158,8.891901835466445,1.7433667301105085,0.46959481282562443,0.583210155619012,0.03487584587193143,0.02062556592021359,120.41628593220491,98.62751622705991,8.0,4,3,1,18,1607.5,35.1,33.8,0.8072552447552448,0.3008875223205248 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.48513499648162556,0.08179742986099706,0.05007438478688923,72438.91792383758,165.31166707386078,10.326923076923077,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.46901679172287375,0.05804204088839793,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,8,1,16,1588.5,31.1,41.1,0.7880244755244755,0.26951399319966235 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5877560111308218,0.10395608954668528,0.05250154829922825,112219.73072981052,290.95193637081104,10.865384615384615,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5635752491354304,0.051850568481074444,0.04632297855309826,87871.6182808383,267.1363813998378,9.0,0,0,3,23,1572.9,29.0,36.1,0.7757867132867133,0.1417961559305308 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5522297755522069,0.09461405000435809,0.05278876561410328,39712.28933278582,151.42916243848958,11.884615384615385,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.565709489217623,0.04762778419021996,0.04116173129591805,35357.34871646478,128.98489015193604,8.5,1,2,0,23,1544.1,33.4,44.0,0.7526223776223776,0.17858871441651328 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5669313754038009,0.09512311589648129,0.06286359741165812,76.29505964660768,41.758850754001486,12.038461538461538,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5238664820190138,0.05018180969629027,0.021765272352146064,58.62232186481185,25.39697585954705,5.0,4,1,4,17,1541.6,37.1,37.8,0.7491258741258742,0.2935579164181083 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6837279086393804,0.1176313686476465,0.061906173521871284,10455.248725429481,156.11534656534064,12.903846153846153,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.7073670918352166,0.051567085599247775,0.05140473915859429,9480.825611975059,103.35126359750211,12.5,1,0,2,23,1521.0,39.2,34.8,0.729458041958042,0.14042554762228465 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.6024458446591694,0.10415143904294016,0.06347344157931543,43123.835967101855,11.305450805467302,13.346153846153847,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.637432321817063,0.06661219922938116,0.04216185168235247,44088.87353980956,9.589800419015791,13.0,0,2,1,23,1511.6,29.3,40.0,0.7194055944055944,0.13250415685370437 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.7058815019854675,0.12190922144153328,0.06697491769463343,10455.248725429481,28.289876597893098,15.326923076923077,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7126095533835781,0.05516713547384128,0.05173099008418312,9480.825611975059,16.480248719877835,15.5,0,0,0,26,1461.6,33.0,33.6,0.6743881118881119,0.07518973620464424 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.7126569304674935,0.11678549305379037,0.07069284985664498,27448.91121504087,44.04781428274726,15.76923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.720606902542752,0.0655088388746699,0.053959365141496254,21194.263639925673,39.04121549244424,16.0,0,0,1,25,1455.5,38.0,31.7,0.6643356643356644,0.08640388177736971 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7320002632947487,0.11966485384248739,0.07092155545579401,27448.91121504087,5.822055889865127,16.53846153846154,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7742727557913103,0.06254942442191513,0.056743152931981594,21194.263639925673,5.623705460666335,16.5,0,1,0,25,1435.3,35.7,31.2,0.6468531468531469,0.09500537263224745 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7180270297124032,0.1369795645146118,0.07804754343441613,189.57549613749592,9.978425427673205,16.96153846153846,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.7370120765139465,0.07020888606704828,0.04168408531342971,140.5904555433362,9.536419533323254,13.5,0,0,0,26,1429.3,34.7,31.2,0.6372377622377622,0.08930659071409167 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.773058737636122,0.1329171714368633,0.07483897220415464,477.2608484422213,7.423108461919998,17.0,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8295699236647479,0.07003170550807408,0.03759488482131658,107.22813859237792,5.910285969899903,17.5,0,2,0,24,1425.0,31.1,34.5,0.6363636363636364,0.10014897524065079 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6637614958528275,0.11651895782038843,0.07336435140526076,77920.67944321278,26.83390437537825,17.057692307692307,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7235503806350614,0.06755654915093501,0.057542536642744974,66956.02864547497,22.936826653686147,16.5,1,0,0,25,1425.9,36.9,29.1,0.6350524475524476,0.11459331361587798 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7582815836411924,0.13102530851919714,0.08005807343057435,11800.394028929062,144.45205787298235,17.51923076923077,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.7791345165987353,0.07343601410090844,0.05291664219529325,11520.40078776449,81.51508510452376,15.25,0,0,0,26,1415.4,35.8,32.1,0.6245629370629371,0.07166187524738382 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8130615556926872,0.1572702096615729,0.11477484338056507,16692.42562947248,18.328617946180266,19.076923076923077,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8266966373060847,0.0849735392053248,0.05122044741314488,17695.477225433653,12.287120931350431,18.5,0,0,1,25,1383.1,30.7,32.2,0.5891608391608392,0.08004680984485021 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7809467391114648,0.1335476211929174,0.08261887953032405,11800.394028929062,30.013383573853858,19.192307692307693,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7799972199454441,0.07798856953005051,0.052696435285602104,11520.40078776449,11.544680214373663,19.5,0,0,0,26,1379.3,39.0,30.4,0.5865384615384616,0.05963545192718714 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7337946286494308,0.13385871828365248,0.07632560949832082,112219.73072981052,14.318352639712598,19.346153846153847,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.74474927717263,0.07816480714694801,0.061570080954222194,87871.6182808383,11.529830439504863,17.0,0,0,0,26,1379.0,35.5,42.0,0.583041958041958,0.06741843156688952 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6676223600444691,0.12705648224037935,0.09495265900961732,77920.67944321278,542.9013751751211,19.46153846153846,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6992013611051189,0.10101560653736502,0.048750216328898435,66956.02864547497,426.41545163369784,15.0,0,1,3,22,1375.2,34.1,37.3,0.5804195804195804,0.11686086342849132 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7625155608140191,0.15085268931164045,0.08288629881402086,190.09277232716437,21.03504265137771,19.76923076923077,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7827163737065492,0.07974631950048428,0.053259111831894385,190.7747999331715,15.595042220247258,21.0,1,0,0,25,1366.6,30.2,32.3,0.5734265734265734,0.10017785315060294 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7686639558056911,0.1568263488914938,0.08996191783618754,20955.495033608,514.94962880429,20.25,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8177131221302221,0.08132829350115817,0.05523334112431259,17602.335588839425,531.7232414230566,20.0,0,1,0,25,1356.1,31.7,35.0,0.5625,0.08900824287112627 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.764065484436084,0.1332555944462707,0.07705265128451745,56068.072795326945,181.3894555134842,20.28846153846154,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.8634802144576847,0.06782483230561193,0.06047718336789526,51500.257380537965,155.60936197975602,20.0,0,0,0,26,1356.4,32.6,31.9,0.5616258741258742,0.06668189048939435 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7373052354542396,0.1481225969644462,0.09258934520941622,529.9323761499634,1297.090837487081,22.0,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.88150849404269,0.06691814537901497,0.04715293243733788,513.7039796057306,1113.0844664803024,25.5,2,0,3,21,1323.2,24.0,32.4,0.5227272727272727,0.16307441111325852 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.848789281667686,0.16506052478039396,0.12247708467085656,16692.42562947248,2.268478500812272,22.134615384615383,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8902021091214286,0.0928869358898275,0.062194782256558334,17695.477225433653,1.252412448907763,24.0,0,0,0,26,1319.5,32.8,40.9,0.5196678321678322,0.0577362774386474 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8496093431986085,0.1725067485868843,0.124266558571261,75.26102802648397,3.652629917649085,23.0,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8953860150359447,0.09554880640873625,0.056563353282588213,77.97362526017017,3.340101142403973,23.0,1,0,2,23,1298.8,36.2,34.1,0.5,0.10350085258304385 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8336048750524725,0.1719892484242808,0.10544003385780601,20955.495033608,39.61972184578173,24.48076923076923,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8551942745167906,0.09660910797742411,0.07038888057026207,17602.335588839425,37.14448504861147,23.0,0,0,0,26,1269.2,37.8,39.5,0.46634615384615385,0.0522325290233973 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8650357556797086,0.18213060995030939,0.12358064556047872,6037.128774148354,86.31947081771706,25.0,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.927972542444979,0.09673248729372463,0.07594781768304604,5339.627074654447,77.0823275943278,28.25,0,0,0,26,1260.2,26.7,47.0,0.45454545454545453,0.057367362828272636 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.850631259028558,0.15913478602343065,0.0993949625945807,56068.072795326945,10.018943215736572,25.78846153846154,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9108681738244536,0.10379368194908523,0.09268904724445692,51500.257380537965,8.681540922799902,27.0,0,0,0,26,1238.5,32.5,32.6,0.4366258741258741,0.045366926667074474 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.8569208298595739,0.15925534618050793,0.09850483276040865,704.4596577225865,14.367018773597605,26.057692307692307,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.9211958531631512,0.11529538667598471,0.06746878542921386,536.9122103749642,12.116250573587863,28.5,0,0,0,26,1233.8,37.0,35.4,0.430506993006993,0.04593533182737854 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.9070085519838873,0.16741259421914897,0.12284060876198313,37.42364826794432,14.932177592236862,27.057692307692307,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.9518900673045412,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.75,0,0,0,26,1212.8,40.2,31.9,0.40777972027972026,0.04033171361980777 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.8944270214472317,0.19352158162401475,0.1332600566819766,6037.128774148354,9.800458404720944,27.5,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.9688071317540197,0.10725621698653792,0.08413965072039432,5339.627074654447,8.581565117405717,31.5,0,0,0,26,1200.5,34.6,31.8,0.3977272727272727,0.04426880847584831 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9114679294811889,0.19492401068058715,0.147796519544612,6794.043055663636,81.1610891109226,29.46153846153846,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9937951111561848,0.11848332500543385,0.09151654391313321,6269.6136687159,75.55960345229795,31.5,0,0,0,26,1156.9,35.3,29.3,0.3531468531468531,0.04551104983248137 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.9226046869734794,0.17658302669842518,0.11946139446934875,42.11806124941037,9.792256635961557,29.576923076923077,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9543370010011993,0.11805347934940252,0.09964492878261602,38.718264562373335,7.411495765404641,29.5,0,0,0,26,1154.1,38.8,28.9,0.3505244755244755,0.036096329534928126 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9400975627550481,0.20796627140051763,0.15738142731874985,6794.043055663636,8.85724716182767,31.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9949804676438307,0.13224551901999893,0.10601779560500876,6269.6136687159,8.226247917103098,33.0,0,0,0,26,1096.4,35.3,30.8,0.2972027972027972,0.03435712054173671 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9391775521500288,0.2143711666740534,0.1478455999492002,188.3149665224302,10.435316060204023,32.32692307692308,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9983912483912485,0.14318031310217677,0.1051705699943664,197.09431521312473,8.356657050571908,33.0,0,0,0,26,1082.4,35.8,33.5,0.2880244755244755,0.03477287158574056 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9403855906437737,0.2303447532418144,0.17838953792880163,79.38940070919118,32.314469844151965,33.61538461538461,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9998853211009175,0.16088565720086695,0.11423976777713259,63.05950085745769,32.34956133934325,36.5,0,0,0,26,1051.3,30.1,32.4,0.25874125874125875,0.03221855964948352 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9377863465231412,0.28317417183852894,0.24822823084079834,1448.7019616321465,20.41052886468638,34.23076923076923,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.21769005088471027,0.1779543124949825,1246.7830478036524,13.25530093456624,38.0,0,0,1,25,1030.4,38.5,40.3,0.24475524475524477,0.04368314033358164 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.943877751101422,0.2902155463327537,0.25773756332102926,1448.7019616321465,6.009867470278231,35.21153846153846,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.22518752798442315,0.1834788883245299,1246.7830478036524,4.016028876859062,38.0,0,0,0,26,996.9,36.4,40.6,0.22246503496503497,0.03516566041445386 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9701912114787967,0.25755513106395067,0.2479832267773854,6.6754175478812146,4.153857105918795,35.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.502947097147378,37.5,0,0,0,26,1000.0,0.0,0.0,0.22071678321678323,0.029920050824686344 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9520629989740693,0.305476365274656,0.29332097764816845,36.73103788870433,7.308777223189359,35.67307692307692,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22930816077470617,0.22370793213879017,31.50980214431158,4.703818974402271,39.5,0,0,1,25,982.3,41.6,38.5,0.21197552447552448,0.03995938942367327 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9642266228854909,0.2753142737967973,0.27671727525790224,6.0304600748852675,4.406716139948246,36.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18424442460299273,0.1562338348801046,5.148113375558061,3.7424721126192138,39.0,0,0,0,26,955.5,43.9,50.8,0.1896853146853147,0.030378669430214802 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9945521006475282,0.47993514547383553,0.5914832997832359,57.836601086323085,11.285407568076662,41.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4552264030449861,0.6183873456591064,55.06974575171796,8.266636963631449,43.0,0,0,0,26,703.5,49.4,45.5,0.0777972027972028,0.024279443329967162 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966557577121117,0.5010874288021712,0.6455640735444151,57.836601086323085,2.5027008595459077,42.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,44.0,0,0,0,26,609.2,52.7,48.0,0.05244755244755245,0.023502270498812147 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5923454279012129,0.9491257378249622,1.00065410372272,1.4527699655062962,44.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6201400300158697,1.0,1.0,1.0057335151210618,45.0,0,0,0,26,407.8,86.4,95.3,0.017045454545454544,0.022663376691509682 diff --git a/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.pdf b/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..aa0fc43af3f5a7832789cced59f877ce408ba43a Binary files /dev/null and b/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24481787fa054a7bc9a1fd8d8bbf394bb80459fd --- /dev/null +++ b/data/tabpfn-imputed-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c7ba763238c3b186cb800f076efde71d7f2bd932c98b1a297881496602be9e3 +size 153061 diff --git a/data/tabpfn-imputed-reg/figures/critical-diagram.pdf b/data/tabpfn-imputed-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..c2d8f64d091632b223a523520ca37f310a802f56 Binary files /dev/null and b/data/tabpfn-imputed-reg/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-imputed-reg/figures/critical-diagram.png.zip b/data/tabpfn-imputed-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..5dde710223f33796aaf8b252ca24d174bb54c2c5 --- /dev/null +++ b/data/tabpfn-imputed-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:719a41d0fa24badcd2e52ffd016be9f940e6c6ec35e8da65cf5d12e237671d0b +size 321964 diff --git a/data/tabpfn-imputed-reg/leaderboard.tex b/data/tabpfn-imputed-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..6afd113d8bc993d3ee92cb44469340102255c379 --- /dev/null +++ b/data/tabpfn-imputed-reg/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +RealMLP (T+E) & \textcolor{gold}{\textbf{1795${}_{-87,+77}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{gold}{\textbf{5.7}} & 3.9 & 0 & \textcolor{silver}{\textbf{1.7\%}} & 7793.93 & 5.22 \\ +TabPFNv2 (T+E) & \textcolor{silver}{\textbf{1781${}_{-93,+99}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{silver}{\textbf{5.9}} & \textcolor{gold}{\textbf{1.7}} & \textcolor{gold}{\textbf{3}} & \textcolor{gold}{\textbf{0.8\%}} & 6577.99 & 56.80 \\ +TabDPT (D) & \textcolor{bronze}{\textbf{1766${}_{-86,+85}$}} & \textcolor{gold}{\textbf{0.787}} & 6.4 & \textcolor{silver}{\textbf{2.0}} & \textcolor{gold}{\textbf{3}} & 3.0\% & 35.70 & 46.71 \\ +AutoGluon 1.3 (4h) & 1760${}_{-85,+91}$ & 0.691 & \textcolor{bronze}{\textbf{6.3}} & \textcolor{bronze}{\textbf{3.1}} & \textcolor{bronze}{\textbf{1}} & 3.3\% & 4737.61 & 4.70 \\ +CatBoost (T+E) & 1699${}_{-74,+85}$ & 0.605 & 7.9 & 6.9 & 0 & 4.2\% & 3558.14 & 0.97 \\ +CatBoost (T) & 1663${}_{-83,+73}$ & 0.572 & 9.0 & 7.3 & 0 & 4.3\% & 3558.14 & 0.10 \\ +LightGBM (T+E) & 1622${}_{-82,+67}$ & 0.515 & 10.3 & 8.0 & 0 & 5.5\% & 1003.11 & 4.89 \\ +TabM (T+E) & 1561${}_{-85,+66}$ & 0.487 & 12.1 & 7.1 & 0 & 3.0\% & 4158.29 & 1.95 \\ +TabPFNv2 (T) & 1556${}_{-75,+84}$ & 0.524 & 12.3 & 4.6 & 0 & \textcolor{bronze}{\textbf{2.9\%}} & 6577.99 & 0.50 \\ +XGBoost (T+E) & 1556${}_{-77,+66}$ & 0.453 & 12.6 & 12.0 & 0 & 5.7\% & 933.42 & 2.76 \\ +ModernNCA (T+E) & 1546${}_{-87,+54}$ & 0.371 & 12.9 & 8.2 & 0 & 6.4\% & 9308.22 & 7.89 \\ +XGBoost (T) & 1514${}_{-64,+74}$ & 0.407 & 13.9 & 13.4 & 0 & 5.9\% & 933.42 & 0.39 \\ +LightGBM (T) & 1473${}_{-78,+69}$ & 0.381 & 15.4 & 14.1 & 0 & 6.3\% & 1003.11 & 0.89 \\ +CatBoost (D) & 1467${}_{-78,+81}$ & 0.376 & 15.7 & 12.5 & 0 & 6.8\% & 10.91 & 0.20 \\ +TabPFNv2 (D) & 1427${}_{-79,+91}$ & 0.424 & 17.3 & 6.4 & 0 & 5.5\% & 9.10 & 0.85 \\ +TabM (T) & 1395${}_{-88,+79}$ & 0.361 & 18.6 & 14.8 & 0 & 4.2\% & 4158.29 & 0.21 \\ +RealMLP (T) & 1381${}_{-92,+74}$ & 0.295 & 18.9 & 14.9 & 0 & 4.3\% & 7793.93 & 0.36 \\ +ExtraTrees (T+E) & 1375${}_{-81,+73}$ & 0.241 & 19.1 & 10.9 & 0 & 11.2\% & 623.85 & 1.14 \\ +ModernNCA (T) & 1358${}_{-61,+62}$ & 0.125 & 20.0 & 17.7 & 0 & 8.7\% & 9308.22 & 0.42 \\ +ExtraTrees (T) & 1356${}_{-68,+63}$ & 0.210 & 20.0 & 13.5 & 0 & 11.3\% & 623.85 & 0.29 \\ +TabM (D) & 1336${}_{-73,+68}$ & 0.319 & 20.9 & 15.3 & 0 & 6.0\% & 15.01 & 0.15 \\ +RealMLP (D) & 1328${}_{-76,+59}$ & 0.157 & 21.1 & 17.0 & 0 & 6.6\% & 41.54 & 0.31 \\ +ModernNCA (D) & 1283${}_{-85,+72}$ & 0.061 & 23.1 & 21.8 & 0 & 10.3\% & 27.82 & 0.30 \\ +LightGBM (D) & 1262${}_{-82,+56}$ & 0.016 & 23.9 & 23.6 & 0 & 9.2\% & 4.57 & 0.27 \\ +TorchMLP (T+E) & 1262${}_{-85,+64}$ & 0.136 & 23.9 & 21.6 & 0 & 8.9\% & 5692.02 & 1.35 \\ +RandomForest (T+E) & 1248${}_{-93,+55}$ & 0.122 & 24.7 & 21.3 & 0 & 12.8\% & 633.57 & 1.51 \\ +TorchMLP (T) & 1242${}_{-87,+63}$ & 0.139 & 24.7 & 21.4 & 0 & 9.4\% & 5692.02 & 0.13 \\ +EBM (T+E) & 1215${}_{-71,+90}$ & 0.186 & 25.4 & 9.7 & 0 & 13.1\% & 1904.36 & 0.16 \\ +ExtraTrees (D) & 1192${}_{-71,+66}$ & 0.085 & 26.6 & 23.4 & 0 & 13.7\% & 0.47 & 0.12 \\ +RandomForest (T) & 1186${}_{-70,+71}$ & 0.087 & 26.7 & 24.1 & 0 & 13.3\% & 633.57 & 0.25 \\ +EBM (T) & 1175${}_{-77,+75}$ & 0.151 & 27.0 & 13.0 & 0 & 13.7\% & 1904.36 & 0.02 \\ +XGBoost (D) & 1160${}_{-93,+68}$ & 0.000 & 27.7 & 27.2 & 0 & 10.4\% & 3.79 & 0.31 \\ +EBM (D) & 1102${}_{-84,+86}$ & 0.074 & 29.9 & 27.3 & 0 & 14.7\% & 7.85 & 0.07 \\ +FastaiMLP (T+E) & 1093${}_{-80,+75}$ & 0.000 & 30.0 & 29.5 & 0 & 12.7\% & 2248.07 & 7.53 \\ +FastaiMLP (T) & 1021${}_{-82,+74}$ & 0.000 & 32.1 & 31.6 & 0 & 13.2\% & 2248.07 & 0.89 \\ +TorchMLP (D) & 1008${}_{-73,+72}$ & 0.010 & 32.6 & 31.2 & 0 & 12.8\% & 24.31 & 0.13 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 32.8 & 31.7 & 0 & 15.3\% & 0.53 & 0.12 \\ +TabICL (D) & 998${}_{-73,+75}$ & 0.000 & 32.8 & 31.7 & 0 & 15.3\% & 0.53 & 0.12 \\ +FastaiMLP (D) & 886${}_{-59,+69}$ & 0.000 & 35.4 & 35.0 & 0 & 18.7\% & 7.09 & 0.84 \\ +Linear (T+E) & 463${}_{-133,+126}$ & 0.000 & 41.1 & 41.1 & 0 & 34.7\% & 149.75 & 0.13 \\ +KNN (T+E) & 419${}_{-130,+87}$ & 0.000 & 41.4 & 41.3 & 0 & 38.4\% & 2.43 & 0.18 \\ +Linear (T) & 416${}_{-140,+114}$ & 0.000 & 41.7 & 41.6 & 0 & 34.8\% & 149.75 & 0.06 \\ +KNN (T) & 342${}_{-156,+115}$ & 0.000 & 42.3 & 42.2 & 0 & 39.1\% & 2.43 & 0.03 \\ +Linear (D) & 310${}_{-151,+116}$ & 0.000 & 42.4 & 42.4 & 0 & 36.2\% & 3.45 & 0.09 \\ +KNN (D) & -34${}_{-190,+194}$ & 0.000 & 44.6 & 44.5 & 0 & 43.3\% & 0.04 & 0.03 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-imputed-reg/tabarena_leaderboard.csv b/data/tabpfn-imputed-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..64547247bebdfaf2dc9bbc5f274bdcdfd255bd3f --- /dev/null +++ b/data/tabpfn-imputed-reg/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +REALMLP (tuned + ensemble),17010.514617333338,3.266152421633403,7608.576993446122,4.947775972877548,0.24215826947341315,0.3389244583900656,0.017038290543583443,0.024279931908817002,222286.57801104392,292.1412689552373,5.714285714285714,4.12234,6952.188049999872,2.6860701560974123,7793.932791479678,5.223270868033627,0.19393332805279354,0.3397522712955251,0.008809518245537662,0.024876069447861582,155770.2311222541,294.5757042702358,4.0,0,1,1,5,1795.2,76.8,86.8,0.8928571428571429,0.25357142857142856 +TABPFNV2 (tuned + ensemble),13720.707571309333,144.4128799729877,6347.594191591684,104.41432658354127,0.2415939115786189,0.3302133765170469,0.008040709459012918,0.018722358351318587,189200.55764735988,8406.36796869519,5.857142857142857,4.11819,6554.713698530197,35.598623005549115,6577.9935429270845,56.7950324715632,0.06610997682360657,0.25030247372883757,0.0006200613284135237,0.005184858759349278,150467.84733998418,5370.844516439232,2.0,3,2,0,2,1781.4,98.1,92.6,0.8896103896103896,0.5884711779448623 +AutoGluon 1.3 (4h),6519.395389483089,8.374968978715321,4847.228712217428,6.324376295240136,0.3085910704411772,0.3756961336550062,0.033251519267014036,0.049602203108293184,112121.1836885888,503.3434027862167,6.285714285714286,4.16484,5893.662480862936,2.3565427462259927,4737.606472356081,4.698652813671368,0.23133371369265834,0.30893679782746253,0.015883109174547605,0.03516384582661509,94686.22259252104,291.4795027754577,5.0,1,1,0,5,1760.0,90.7,84.6,0.8798701298701299,0.3248917748917749 +TABDPT (default),60.7062628246489,23.490844258051073,38.96865681756451,39.22338625261095,0.21328360616453232,0.30831524812970973,0.029680920262059787,0.014635575677645899,964.3036575691822,2256.3060022287436,6.428571428571429,4.26747,45.33390234311422,23.403914364178977,35.69555633627602,46.714399928500946,0.18149452492987356,0.2944607543670049,0.0039773225368111564,0.00227534979039058,904.9754515353668,2312.870048476579,4.0,3,0,0,4,1765.5,84.3,85.5,0.8766233766233766,0.49988662131519274 +CAT (tuned + ensemble),11360.685129829437,1.178349671288142,7479.02149818403,1.2406086528694698,0.3954429862751284,0.45630330723246004,0.041752236791189334,0.05160245061199872,183467.75682165983,88.84726445881574,7.857142857142857,4.20863,12951.390986363092,0.6367310682932535,3558.1422644149734,0.9657383741190037,0.36849553490903875,0.48332107057126966,0.01735502485245788,0.056013107516843214,172301.38506843304,80.04598787510639,8.0,0,0,0,7,1698.8,84.7,73.2,0.8441558441558441,0.1439255189255189 +CAT (tuned),11360.685129829437,0.18431518115694562,7479.02149818403,0.19033233887232168,0.42772839942623186,0.4797279640538269,0.04293561079285979,0.05673883273885333,183467.75682165983,14.175855537943391,9.0,4.23064,12951.390986363092,0.0923632542292277,3558.1422644149734,0.10450043094654878,0.5120020111687084,0.49061208598211514,0.018380899773098314,0.05607775104310802,172301.38506843304,9.480941702142749,10.0,0,0,1,6,1663.2,72.8,82.3,0.8181818181818182,0.13734955520669806 +GBM (tuned + ensemble),1550.5350551957176,11.328482860232157,1095.0718838551823,8.219062398413474,0.48543215706848314,0.5340835874571865,0.054708320326180546,0.06759479279110976,25605.476583745745,681.2643137899079,10.285714285714286,4.21165,1305.722465435664,4.427071142196655,1003.1068347227413,4.891030532077303,0.3027896779514977,0.44773308883751844,0.02669651687138408,0.03428335611022812,24184.993298853384,538.7114397419949,7.0,0,0,0,7,1622.5,66.9,81.8,0.788961038961039,0.12504095737178442 +TABM (tuned + ensemble),10282.773118314668,1.393584348285009,6394.137481456323,1.7230444286943332,0.5128718562532983,0.520777821387516,0.029684820879418625,0.08174102125202472,165087.49919995712,107.44763645274216,12.142857142857142,4.1458,7158.767174800237,0.8681228558222452,4158.291053548661,1.9464638022920298,0.35916463746804045,0.4272044906933773,0.0321092955544684,0.02730307361680199,145459.09074336805,107.26839105814781,10.0,0,0,1,6,1561.1,65.8,84.1,0.7467532467532467,0.14015566700988696 +TABPFNV2 (tuned),13720.707571309333,6.545461165715778,6347.594191591684,3.614348507333379,0.4760165239054399,0.5171178705486137,0.02872612273306815,0.0659409144684833,189200.55764735988,352.062795224747,12.285714285714286,4.17059,6554.713698530197,0.8012150287628174,6577.9935429270845,0.49833421776949843,0.3837541852536315,0.5181120270487152,0.015535650264428491,0.020282570852584572,150467.84733998418,67.61540858146891,7.0,0,2,0,5,1556.4,83.2,74.1,0.7435064935064936,0.21710014465556338 +XGB (tuned + ensemble),2079.7157167616347,3.992967984789894,1290.0643219432125,4.827720071303388,0.5469310959609862,0.587011300950283,0.05747125828952447,0.07213297680810152,31656.727052852842,333.48376000852966,12.571428571428571,4.22219,1648.719575548172,3.0598979949951173,933.4246756427857,2.7591173692295947,0.5452119463463313,0.6543437580481863,0.024632962058414143,0.08171864665901157,36911.29969286658,164.97865636299682,13.0,0,0,0,7,1556.0,65.1,76.7,0.737012987012987,0.08305093599211245 +MNCA (tuned + ensemble),11292.984456203474,5.393656354101878,8931.621967973691,7.01935255663346,0.629033079050954,0.5974350514650133,0.06383934286795147,0.060402731632731896,206810.59926615006,439.24782647955817,12.857142857142858,4.48697,9914.869807489713,3.95055410861969,9308.21673391936,7.88533754215507,0.5415267425434339,0.5733368095831641,0.021528134535190535,0.05208175360617587,222151.86792304332,413.4566390611155,9.0,0,0,1,6,1546.4,53.2,86.6,0.7305194805194806,0.12226317959400666 +XGB (tuned),2079.7157167616347,0.7477609078089397,1290.0643219432125,0.9786740038738302,0.5931661199323147,0.6182685551812842,0.05912174825282644,0.07889616636298961,31656.727052852842,70.06455347228189,13.857142857142858,4.23559,1648.719575548172,0.48153924147288,933.4246756427857,0.38757804886481584,0.6620816436761664,0.6377270710295441,0.027719287634248357,0.08095049600797707,36911.29969286658,37.05707269936294,12.0,0,0,0,7,1514.5,73.2,63.5,0.7077922077922078,0.07449494949494949 +GBM (tuned),1550.5350551957176,1.4077377538832407,1095.0718838551823,1.2781273508842104,0.6186778290078211,0.6418712970322261,0.06336854992568382,0.08812536702511353,25605.476583745745,102.58551273372913,15.428571428571429,4.23482,1305.722465435664,0.5831021388371785,1003.1068347227413,0.8935510846842087,0.5552965190223861,0.5497771953690628,0.027972044011269404,0.0771469045019801,24184.993298853384,73.3040824604662,14.0,0,0,0,7,1473.4,68.1,77.4,0.672077922077922,0.07088017024991815 +CAT (default),34.406632555855644,0.13612949961707704,24.995110587630016,0.18681141105942162,0.6237900244125646,0.6675667972832624,0.06846154890412891,0.08126436753648981,594.4248060555409,11.797969177620454,15.714285714285714,4.21395,45.3870238383611,0.10361001491546631,10.909956325719385,0.1965340299278453,0.5847419575275037,0.6867027709758126,0.022724551212545796,0.08888361196746222,412.72250349454265,9.618489444383828,16.0,0,0,0,7,1467.3,80.1,77.3,0.6655844155844156,0.07979304309136241 +TABPFNV2 (default),10.17094054751926,0.6529079978428189,8.130392025686353,0.7583587688298037,0.5759929931993246,0.6106433106521835,0.054501672943579625,0.10235002340399287,180.76463109970453,48.615119220322335,17.285714285714285,4.25916,9.120386091868083,0.4424108028411865,9.10218172841126,0.8496304484655789,0.4230930305474229,0.4525599653494939,0.039920435247708164,0.025924057113489832,198.0827015794515,48.06257528515414,13.0,0,0,2,5,1426.7,91.0,78.7,0.6298701298701299,0.15600085164545094 +TABM (tuned),10282.773118314668,0.16793914030468654,6394.137481456323,0.20684678947882645,0.6392492711824916,0.6283272878354954,0.04171656575678051,0.11343262381340333,165087.49919995712,13.009409484385971,18.571428571428573,4.26932,7158.767174800237,0.10640169779459635,4158.291053548661,0.207815816005071,0.46522673304240253,0.5524525384273968,0.042976216396108935,0.03536573040794217,145459.09074336805,12.524825096296247,15.0,0,0,0,7,1395.1,78.2,87.7,0.6006493506493507,0.06746218980447488 +REALMLP (tuned),17010.514617333338,0.1779377165294829,7608.576993446122,0.3097772518477419,0.7047967586156892,0.6634161257272674,0.04329176014839616,0.09606004530911617,222286.57801104392,17.624625939228846,18.857142857142858,4.2931,6952.188049999872,0.17935961882273357,7793.932791479678,0.35800323118310096,0.7895709633169843,0.7618942589884037,0.043563415018409435,0.04951944914046118,155770.2311222541,19.19488348672616,20.0,0,0,0,7,1381.3,73.7,92.0,0.5941558441558441,0.06689903234020882 +XT (tuned + ensemble),660.5781686419533,1.1831508216403779,587.7119735330767,1.316998538867821,0.7590185126625372,0.7873727562992079,0.11152236742279202,0.09736117781705758,12611.439486121615,82.99538684812504,19.142857142857142,5.04794,628.4651173035304,0.7619378328323364,623.8510097889447,1.1438343022436304,1.0,0.9703763858301026,0.025903530399211117,0.10013551316301275,14081.344732130377,68.20231667792302,25.0,0,0,0,7,1375.1,72.4,80.6,0.5876623376623377,0.09159277504105091 +MNCA (tuned),11292.984456203474,0.27963828964838905,8931.621967973691,0.3656110731370408,0.8745416984197177,0.788391952443423,0.08747111667996144,0.1048378077211334,206810.59926615006,22.83746449449945,20.0,4.74895,9914.869807489713,0.2117270072301229,9308.21673391936,0.41617828752294544,1.0,0.8534860144540843,0.03236956763422549,0.09957054673434969,222151.86792304332,21.680631541507974,22.0,0,0,0,7,1357.8,61.2,60.1,0.5681818181818182,0.056389716185112605 +XT (tuned),660.5781686419533,0.21893258586762446,587.7119735330767,0.2587197553454726,0.7900820708578837,0.8052880022926969,0.11311074973251874,0.10422741821694467,12611.439486121615,16.50847126388171,20.0,5.07281,628.4651173035304,0.13890047868092856,623.8510097889447,0.29098081635084977,1.0,0.9587250854440341,0.02660912637513735,0.12806468799723258,14081.344732130377,14.85253877204164,24.0,0,0,0,7,1355.5,62.7,67.1,0.5681818181818182,0.0738201175909741 +TABM (default),41.61068485350836,0.09812178838820684,33.57485323462188,0.15124505155365406,0.6807517607831785,0.7007527060050666,0.060056891118245796,0.14437719671978216,799.4371008354477,9.016255870890259,20.857142857142858,4.27071,34.914909998575844,0.10241310596466065,15.010870798216047,0.15399858651571716,0.6869868540300672,0.6380488805811865,0.05612090882619314,0.05222355068579704,670.393510312447,7.016702417594013,18.0,0,0,0,7,1335.7,68.0,73.0,0.5487012987012987,0.06542604221175649 +REALMLP (default),86.77730172740088,0.162267147548615,39.30550135866405,0.28225561362513907,0.8426262409401332,0.803669602783858,0.06636636072819802,0.11228334752066568,1142.2795335159524,15.984953083327307,21.142857142857142,4.76491,39.05289849440257,0.1580585479736328,41.53951650766714,0.3087081015110016,0.9775778014564183,0.8988813630790086,0.0569453348396638,0.11891984655886878,875.0164668614119,17.513061397917994,21.0,0,0,0,7,1327.6,58.3,75.5,0.5422077922077922,0.05869467445097697 +MNCA (default),32.2820956619959,0.19605722162458633,25.437351400848286,0.2556706353965379,0.9394939857609429,0.8818572481338124,0.10259098508837912,0.12447145886487125,599.545254682166,15.875935676225895,23.142857142857142,4.94033,29.151979072888693,0.13998790582021078,27.82059796372352,0.29868905742963153,1.0,0.9222672489922232,0.03934956674595991,0.10305347761813116,637.198973323623,15.69192319835031,23.0,0,0,0,7,1283.3,72.0,84.1,0.4967532467532468,0.045909672060882166 +NN_TORCH (tuned + ensemble),12800.476561368458,1.1380399908338272,7081.227309206272,1.497767305174234,0.8644212313341298,0.8516898463073783,0.08862343937037091,0.15385577166637224,182552.53268590602,90.9320692691531,23.857142857142858,4.65351,7304.863066792488,0.6995723327000936,5692.02320138285,1.353764106339938,1.0,0.8805586694349655,0.0931488267963303,0.09843675731093122,158151.6866749578,90.00555655937438,23.0,0,0,0,7,1262.2,63.2,84.2,0.4805194805194805,0.046321552208279895 +GBM (default),6.38493198705098,0.5425946334051707,5.062559496806533,0.586017128679101,0.9842293100404953,0.9101901710398957,0.09214694190164817,0.1303135612595869,114.0054740463554,44.04934412762916,23.857142857142858,4.4838,6.053678401311239,0.27992521921793617,4.570868909804455,0.2745991643955073,1.0,0.9048814737591246,0.06107189731149043,0.10636019574143357,95.93839371138947,30.245100747842656,25.0,0,0,0,7,1262.3,55.1,82.0,0.4805194805194805,0.04240246525960811 +NN_TORCH (tuned),12800.476561368458,0.08545414417509048,7081.227309206272,0.11421111903323287,0.8608314951688506,0.858766925529132,0.09394846170102965,0.1725237564760349,182552.53268590602,7.06193729144408,24.714285714285715,4.7148,7304.863066792488,0.06568752924601237,5692.02320138285,0.131112832826372,1.0,0.9430447630912011,0.09751345914708842,0.10970665592293476,158151.6866749578,6.1490644891314155,24.0,0,0,0,7,1242.5,62.3,86.5,0.461038961038961,0.04663841801689928 +RF (tuned + ensemble),805.4727961623479,1.1537035166271148,616.40361374022,1.4218773833596186,0.877625154030774,0.8717094025381364,0.12760095389966494,0.1317936836999853,13870.809159323733,85.97653577971612,24.714285714285715,5.10626,639.3565126180649,0.9083369493484497,633.5687055876473,1.5119688388094148,1.0,0.9653610013274743,0.0349392877195821,0.13942046646077252,14398.077818059888,97.12788527724665,29.0,0,0,0,7,1247.6,54.7,92.5,0.461038961038961,0.047037271021261165 +EBM (tuned + ensemble),5388.437029837048,0.1423879381210085,3500.656679775311,0.16581318572573583,0.8141909618196094,0.8314890800427855,0.1311561198750961,0.1890252179697543,82984.50653621592,10.569109007396577,25.428571428571427,4.37076,5200.331783827146,0.08374606768290202,1904.3573570607775,0.16013811522000573,1.0,0.9499481345479782,0.05778648960524235,0.1850552693192548,74151.4651845794,10.422047745376007,30.0,0,1,0,6,1215.2,89.3,70.4,0.4448051948051948,0.10318073175216032 +XT (default),0.7346943915836395,0.08473905533079117,0.46008623067859417,0.10458806079610798,0.9145279857275386,0.8990182587123957,0.13682802410988154,0.14082210751652371,11.201251938981406,6.5797630484480845,26.571428571428573,5.13889,0.4775619904200236,0.05924500624338786,0.4735771133343534,0.11693369290932056,1.0,1.0,0.07315765129610696,0.15512506986029978,10.84674207217604,6.1589221066128665,31.0,0,0,0,7,1191.9,65.7,70.7,0.41883116883116883,0.042659370354794876 +RF (tuned),805.4727961623479,0.17611904144287108,616.40361374022,0.22106795182548725,0.9129471887632264,0.9020232019832842,0.133394399377122,0.14125246260950458,13870.809159323733,13.301758986609267,26.714285714285715,5.18858,639.3565126180649,0.12748352686564127,633.5687055876473,0.24849182199342837,1.0,0.9713067987593035,0.04311956405835915,0.14203697823894862,14398.077818059888,13.680869433029466,31.0,0,0,0,7,1186.2,70.8,69.7,0.4155844155844156,0.04156432136058622 +EBM (tuned),5388.437029837048,0.015351363590785436,3500.656679775311,0.01684270418671638,0.8494479068291391,0.8554561099729149,0.1367064766905373,0.2066243973235434,82984.50653621592,1.0832102622101742,27.0,4.42885,5200.331783827146,0.009351968765258789,1904.3573570607775,0.018787849694490433,1.0,0.9835214391483789,0.07014482573090552,0.1901907177021721,74151.4651845794,1.0,31.0,0,0,1,6,1175.0,74.6,76.7,0.4090909090909091,0.07704908158218855 +XGB (default),5.009572153621249,0.2941358320296757,3.869550863988617,0.42965662676352057,1.0,0.9627004190141648,0.10421501398748971,0.1578017995574142,87.11728255407873,27.91087828932339,27.714285714285715,4.75458,4.333076930046081,0.2072049856185913,3.791700492881695,0.31131523040433723,1.0,0.9646965206460226,0.050731524717382626,0.11702111523756958,81.03254703110004,17.946977539037764,26.0,0,0,0,7,1160.1,67.9,92.3,0.39285714285714285,0.03677951950010773 +EBM (default),14.400625167952642,0.03714351956806485,10.51577767224111,0.06811790140940442,0.9258962131926525,0.9052479989785692,0.1468194585212037,0.21744478697144157,241.18775335182642,3.757417170209216,29.857142857142858,4.44221,13.099289311303032,0.03613271713256836,7.847325239497551,0.07057171314954758,1.0,0.9662629570470155,0.07294156515637107,0.20738482927540008,186.18589159248134,4.271072697409173,33.0,0,0,0,7,1102.1,85.6,83.8,0.34415584415584416,0.03664415638099849 +FASTAI (tuned + ensemble),2316.2620857840493,5.69649665128617,1912.4810450161624,8.30768905371141,1.0,0.9848347830775939,0.1274626118848621,0.18774240639715484,42346.92739383132,474.26279273152795,30.0,5.27232,2238.4564303557077,6.60931183497111,2248.0726789016367,7.52967050402227,1.0,0.9851031914128898,0.10220001788754296,0.20422002836922998,45511.629375922166,409.94392325616803,28.0,0,0,0,7,1093.1,74.7,79.9,0.3409090909090909,0.033897342562756096 +FASTAI (tuned),2316.2620857840493,0.595911791589525,1912.4810450161624,0.8787119009761157,1.0,0.9960582381987314,0.1322523422162721,0.21566408860648562,42346.92739383132,51.472930446274454,32.142857142857146,5.18701,2238.4564303557077,0.45566067695617674,2248.0726789016367,0.8899622596800327,1.0,1.0,0.11787324661401322,0.19653728657746058,45511.629375922166,47.3690323348188,33.0,0,0,0,7,1020.7,74.0,81.4,0.2922077922077922,0.03160225280968315 +NN_TORCH (default),45.35887446289971,0.0802082171515813,31.67622236459062,0.1165329153506545,0.9903234435493359,0.967098701481312,0.12780665628037344,0.23160692294055926,751.5426969414585,7.0142840459527545,32.57142857142857,4.81722,35.183143361409506,0.06712730725606282,24.305650569576496,0.13416367617779604,1.0,0.9880227178998348,0.14511201063493562,0.13614151845721162,546.0088939544379,6.398331846186531,33.0,0,0,0,7,1007.6,71.2,72.7,0.2824675324675325,0.03209949096583058 +TABICL (default),1.5119719039826165,0.08608071387760223,0.5884101131506653,0.10503802269179743,1.0,0.9784967396045477,0.15345507712888892,0.1988909284118511,17.48096698761895,6.618407517134457,32.785714285714285,5.26146,0.5022362232208252,0.059631967544555665,0.5271703851240811,0.11813050784694061,1.0,1.0,0.08142906388988314,0.17217694084546725,12.02349439116373,6.19914937070286,35.5,0,0,0,7,997.5,74.3,72.3,0.2775974025974026,0.031553587880633956 +RF (default),1.5119719039826165,0.08608071387760223,0.5884101131506653,0.10503802269179743,1.0,0.9784967396045477,0.15345507712888892,0.1988909284118511,17.48096698761895,6.618407517134457,32.785714285714285,5.26146,0.5022362232208252,0.059631967544555665,0.5271703851240811,0.11813050784694061,1.0,1.0,0.08142906388988314,0.17217694084546725,12.02349439116373,6.19914937070286,35.5,0,0,0,7,1000.0,0.0,0.0,0.2775974025974026,0.031553587880633956 +FASTAI (default),8.831207484669156,0.5305940438830663,6.755940136126163,0.7105278175311491,1.0,0.9963339691444509,0.18685629150301894,0.27228181364938464,154.14927757121433,43.905972028215395,35.42857142857143,6.36878,7.107011103630066,0.42740853627522785,7.092825452724616,0.8431779863303789,1.0,1.0,0.11912950408368406,0.27762931758033993,160.3044815424371,43.14144455330922,36.0,0,0,0,7,886.3,68.6,58.6,0.21753246753246752,0.028540619296921817 +LR (tuned + ensemble),152.34852822886572,0.09327742031642369,140.44301462951617,0.1463023584921992,1.0,1.0,0.34749111122086884,0.6284394248904203,2977.405199523172,9.309183761718689,41.142857142857146,8.15038,150.0536302089691,0.057510574658711754,149.75412196503902,0.1287132100674069,1.0,1.0,0.26203835697566735,0.7414458856888512,3344.1604489764304,7.22988242769623,41.0,0,0,0,7,462.9,125.3,132.3,0.08766233766233766,0.024356949123772592 +KNN (tuned + ensemble),4.245547793025062,0.1591204832470606,2.4746151721359886,0.1895989937949328,1.0,0.991196682740939,0.3842177651885135,0.7048115612733296,62.40052816223211,11.931188445938428,41.42857142857143,8.41865,2.490966773033142,0.10710635185241699,2.4322953132177623,0.17676389939136214,1.0,1.0,0.41352429770943944,0.7907767115559304,56.15178324522979,13.124172379663548,42.0,0,0,0,7,419.0,86.6,129.4,0.08116883116883117,0.024194887702493926 +LR (tuned),152.34852822886572,0.037903934433346706,140.44301462951617,0.06382438080033122,1.0,1.0,0.34766987292821344,0.6285783845355511,2977.405199523172,3.6512091758703176,41.714285714285715,8.15927,150.0536302089691,0.03235630989074707,149.75412196503902,0.06193413547254449,1.0,1.0,0.2620998903501569,0.7430806606996537,3344.1604489764304,4.067640041721783,42.0,0,0,0,7,415.6,113.1,139.1,0.07467532467532467,0.024046771149923245 +KNN (tuned),4.245547793025062,0.02863398733593169,2.4746151721359886,0.030004193594300673,1.0,0.997572722674734,0.3910594237799242,0.7476794241583206,62.40052816223211,1.9760324688350897,42.285714285714285,8.54422,2.490966773033142,0.014579184850056966,2.4322953132177623,0.02910016936139115,1.0,1.0,0.420237807146593,0.813866558903926,56.15178324522979,1.7112793648526077,43.0,0,0,0,7,341.8,115.0,155.3,0.06168831168831169,0.023694163654458606 +LR (default),3.9008307816490295,0.0549199554655287,3.239606778702032,0.07872063839931824,1.0,1.0,0.3619225843912718,0.6950518956016436,71.92078534572094,4.852563458042672,42.42857142857143,8.22758,3.3858322699864707,0.05012191931406657,3.447264865169665,0.08708413534655632,1.0,1.0,0.26184876537499124,0.7556433033178954,75.37476586504175,4.100500531123858,42.0,0,0,0,7,310.2,115.6,150.4,0.05844155844155844,0.023594631651110056 +KNN (default),0.06228995323181152,0.01962243337479849,0.04219198036913814,0.02726788157828385,1.0,0.9997271492012134,0.432705772179096,0.9384821422991884,1.0,1.6381446334470857,44.57142857142857,9.55646,0.04463104406992594,0.014348586400349935,0.043660770227879654,0.02863989301467053,1.0,1.0,0.4573857800165779,1.0,1.0,1.5792045889101338,45.0,0,0,0,7,-34.1,193.4,189.9,0.00974025974025974,0.022448979591836733 diff --git a/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.pdf b/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1145b046e5fb944e340ac9b672f9d2726e6f35df Binary files /dev/null and b/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..fd6e402fb997d49c8c621af728c7533dc827169a --- /dev/null +++ b/data/tabpfn-imputed-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:170a46b3e8d59d2676db2220300c9c410a9b23398a21d08bdb11d476a44059e7 +size 151423 diff --git a/data/tabpfn-imputed/figures/critical-diagram.pdf b/data/tabpfn-imputed/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b082a10a3ad46e65f0226f5d6337f53575e8a5a1 Binary files /dev/null and b/data/tabpfn-imputed/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-imputed/figures/critical-diagram.png.zip b/data/tabpfn-imputed/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..2730348b3362dad5e4aa2ad742a14121f30a86a1 --- /dev/null +++ b/data/tabpfn-imputed/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2c170c7c92d1867222868e8e681c72e06638718de9296857e0851e221bc0cb6f +size 321507 diff --git a/data/tabpfn-imputed/leaderboard.tex b/data/tabpfn-imputed/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..99c020662172dc8f3d1348c07a88398fb7596cd3 --- /dev/null +++ b/data/tabpfn-imputed/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1714${}_{-31,+44}$}} & \textcolor{gold}{\textbf{0.712}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.0}} & \textcolor{gold}{\textbf{11}} & \textcolor{gold}{\textbf{3.8\%}} & 3899.42 & 55.83 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1582${}_{-30,+31}$}} & 0.477 & \textcolor{silver}{\textbf{9.8}} & 6.0 & 0 & 8.6\% & 6131.85 & 4.53 \\ +TabM (T+E) & \textcolor{bronze}{\textbf{1578${}_{-36,+26}$}} & 0.484 & \textcolor{bronze}{\textbf{9.9}} & 4.9 & 2 & \textcolor{bronze}{\textbf{7.9\%}} & 3372.56 & 1.66 \\ +AutoGluon 1.3 (4h) & 1557${}_{-35,+33}$ & \textcolor{bronze}{\textbf{0.493}} & 10.7 & 4.8 & 2 & 8.2\% & 2727.51 & 3.24 \\ +TabPFNv2 (T) & 1556${}_{-32,+34}$ & \textcolor{silver}{\textbf{0.540}} & 10.7 & \textcolor{bronze}{\textbf{3.9}} & 1 & \textcolor{silver}{\textbf{7.1\%}} & 3899.42 & 0.98 \\ +LightGBM (T+E) & 1518${}_{-37,+27}$ & 0.361 & 12.3 & 7.3 & 1 & 10.4\% & 771.57 & 2.49 \\ +TabPFNv2 (D) & 1498${}_{-41,+31}$ & 0.483 & 13.2 & \textcolor{silver}{\textbf{3.8}} & \textcolor{bronze}{\textbf{4}} & 8.7\% & 4.22 & 0.55 \\ +CatBoost (T+E) & 1477${}_{-33,+30}$ & 0.342 & 14.1 & 10.1 & 0 & 10.1\% & 2034.85 & 0.80 \\ +TabICL (D) & 1470${}_{-33,+28}$ & 0.428 & 14.4 & 4.1 & \textcolor{bronze}{\textbf{4}} & 8.9\% & 7.27 & 1.64 \\ +TabM (T) & 1469${}_{-26,+26}$ & 0.382 & 14.5 & 8.4 & 0 & 9.1\% & 3372.56 & 0.21 \\ +CatBoost (T) & 1458${}_{-26,+29}$ & 0.316 & 14.9 & 9.6 & 0 & 10.3\% & 2034.85 & 0.10 \\ +LightGBM (T) & 1448${}_{-29,+28}$ & 0.288 & 15.3 & 13.5 & 0 & 10.9\% & 771.57 & 0.32 \\ +XGBoost (T+E) & 1425${}_{-29,+26}$ & 0.271 & 16.5 & 13.5 & 0 & 11.5\% & 828.74 & 2.31 \\ +CatBoost (D) & 1415${}_{-32,+36}$ & 0.259 & 16.7 & 10.4 & 0 & 11.9\% & 8.51 & 0.12 \\ +ModernNCA (T) & 1397${}_{-30,+34}$ & 0.228 & 17.7 & 9.8 & 1 & 11.0\% & 6147.69 & 0.48 \\ +TabM (D) & 1396${}_{-30,+30}$ & 0.295 & 17.8 & 11.9 & 0 & 12.1\% & 12.24 & 0.15 \\ +XGBoost (T) & 1388${}_{-25,+23}$ & 0.229 & 18.1 & 15.9 & 0 & 11.8\% & 828.74 & 0.34 \\ +ModernNCA (T+E) & 1388${}_{-28,+31}$ & 0.320 & 18.1 & 8.5 & 0 & 11.4\% & 6147.69 & 8.15 \\ +TabDPT (D) & 1374${}_{-28,+31}$ & 0.370 & 18.7 & 4.3 & \textcolor{silver}{\textbf{5}} & 12.3\% & 28.84 & 9.01 \\ +RealMLP (T) & 1363${}_{-29,+29}$ & 0.221 & 19.2 & 14.9 & 0 & 11.5\% & 6131.85 & 0.26 \\ +EBM (T+E) & 1341${}_{-32,+40}$ & 0.186 & 20.4 & 11.8 & 0 & 15.2\% & 1331.68 & 0.21 \\ +ModernNCA (D) & 1338${}_{-32,+31}$ & 0.157 & 20.5 & 11.3 & 1 & 14.1\% & 16.16 & 0.31 \\ +TorchMLP (T+E) & 1328${}_{-33,+24}$ & 0.186 & 21.0 & 16.0 & 0 & 12.4\% & 3704.30 & 2.07 \\ +FastaiMLP (T+E) & 1301${}_{-33,+30}$ & 0.181 & 22.3 & 12.9 & 0 & 15.1\% & 1459.62 & 8.06 \\ +EBM (T) & 1283${}_{-32,+34}$ & 0.127 & 23.2 & 16.2 & 0 & 15.9\% & 1331.68 & 0.02 \\ +ExtraTrees (T+E) & 1269${}_{-27,+28}$ & 0.131 & 23.8 & 15.5 & 0 & 16.7\% & 416.39 & 1.39 \\ +EBM (D) & 1258${}_{-34,+25}$ & 0.136 & 24.5 & 11.2 & 1 & 16.7\% & 5.89 & 0.07 \\ +RealMLP (D) & 1246${}_{-32,+22}$ & 0.099 & 25.0 & 20.6 & 0 & 14.0\% & 37.06 & 0.31 \\ +TorchMLP (T) & 1232${}_{-30,+35}$ & 0.097 & 25.6 & 21.9 & 0 & 14.5\% & 3704.30 & 0.14 \\ +ExtraTrees (T) & 1226${}_{-38,+29}$ & 0.102 & 25.9 & 19.8 & 0 & 17.6\% & 416.39 & 0.18 \\ +FastaiMLP (T) & 1221${}_{-37,+31}$ & 0.089 & 26.1 & 20.9 & 0 & 16.4\% & 1459.62 & 0.89 \\ +XGBoost (D) & 1195${}_{-26,+33}$ & 0.025 & 27.2 & 25.3 & 0 & 15.4\% & 3.05 & 0.24 \\ +LightGBM (D) & 1170${}_{-29,+28}$ & 0.027 & 28.4 & 26.7 & 0 & 15.9\% & 3.39 & 0.16 \\ +RandomForest (T+E) & 1167${}_{-29,+30}$ & 0.071 & 28.5 & 21.8 & 0 & 18.1\% & 572.67 & 1.42 \\ +RandomForest (T) & 1109${}_{-29,+37}$ & 0.036 & 30.8 & 27.9 & 0 & 19.2\% & 572.67 & 0.14 \\ +TorchMLP (D) & 1070${}_{-28,+40}$ & 0.026 & 32.4 & 29.2 & 0 & 19.6\% & 11.82 & 0.15 \\ +FastaiMLP (D) & 1023${}_{-32,+31}$ & 0.015 & 34.0 & 31.8 & 0 & 22.1\% & 5.18 & 0.65 \\ +ExtraTrees (D) & 1007${}_{-29,+35}$ & 0.030 & 34.5 & 30.3 & 0 & 24.6\% & 0.42 & 0.08 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.004 & 34.8 & 33.0 & 0 & 23.5\% & 0.47 & 0.07 \\ +Linear (T+E) & 972${}_{-30,+27}$ & 0.044 & 35.7 & 25.3 & 0 & 29.7\% & 97.00 & 0.22 \\ +Linear (T) & 942${}_{-35,+38}$ & 0.028 & 36.6 & 30.5 & 0 & 30.2\% & 97.00 & 0.09 \\ +Linear (D) & 922${}_{-45,+27}$ & 0.020 & 37.1 & 27.4 & 0 & 31.7\% & 2.99 & 0.10 \\ +KNN (T+E) & 691${}_{-49,+42}$ & 0.000 & 41.5 & 41.2 & 0 & 46.0\% & 3.41 & 0.18 \\ +KNN (T) & 607${}_{-51,+41}$ & 0.000 & 42.6 & 42.5 & 0 & 47.8\% & 3.41 & 0.04 \\ +KNN (D) & 371${}_{-104,+63}$ & 0.000 & 44.3 & 44.2 & 0 & 55.8\% & 0.07 & 0.03 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-imputed/tabarena_leaderboard.csv b/data/tabpfn-imputed/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..dedb326e7f24e0ad8d6e15b4e7acee517e8584f7 --- /dev/null +++ b/data/tabpfn-imputed/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14425.017745771953,147.33083187377815,4401.396029095723,73.66242671660264,0.28759504252950974,0.3583482480018585,0.03810528355622711,0.026782979752020025,97206.53847125142,5748.512821917254,5.803030303030303,0.22151,6554.713698530197,48.78112569650014,3899.4164285155534,55.8334682198027,0.16748308582785468,0.3535617963732386,0.014027742147710298,0.009805462772927088,76922.9404189982,3038.197671565286,2.0,11,6,1,15,1714.4,43.7,30.9,0.8908402203856749,0.48937326678683374 +REALMLP (tuned + ensemble),17640.05257734851,4.903230364073808,6337.298676302214,5.714285721500293,0.5227695654892056,0.5349735605494492,0.08551898066723945,0.04651514482247435,135567.2437894661,291.2042190402348,9.772727272727273,0.23449,10127.618620618185,4.333946492936876,6131.852125259443,4.526544125425011,0.48215030896347355,0.5386632171507916,0.04086003335580146,0.02997010750546727,109144.7404232934,273.1739755067715,8.0,0,1,4,28,1582.4,30.5,30.0,0.8006198347107438,0.165506062248297 +TABM (tuned + ensemble),7759.403633462299,2.1001035646156025,4063.8948551919407,2.0262676290994976,0.5158505640872573,0.5420484616540082,0.07908714239356904,0.05662146691038834,68994.91604679843,112.78737728828092,9.909090909090908,0.22996,6619.307261109352,1.565106577343411,3372.559747313985,1.6579582265448953,0.47020083976979304,0.5421819053386484,0.038378144006785075,0.03425172068662737,47614.825535817596,107.26839105814781,8.0,2,1,2,28,1578.1,25.9,35.4,0.7975206611570248,0.20327737247639596 +AutoGluon 1.3 (4h),6055.818895130286,4.270678644549565,3922.7285559191146,3.7248760797186677,0.5066593479144164,0.5147832454528006,0.08159775560552752,0.05211282811287083,55071.75177189554,226.07763766376502,10.696969696969697,0.23252,5245.15874774456,2.643711381488376,2727.5131652494383,3.239980099911348,0.4597995521540763,0.5451307928879923,0.03289499571493004,0.03718969914596063,42990.839716832954,133.93997628187762,8.0,2,3,0,28,1557.0,32.7,34.4,0.7796143250688705,0.20962269694156876 +TABPFNV2 (tuned),14425.017745771953,5.209287579613502,4401.396029095723,2.5426283197700723,0.46046523778783577,0.49191924249583513,0.07053987986416366,0.05344001229510615,97206.53847125142,204.9255427422306,10.742424242424242,0.22735,6554.713698530197,0.8280300378799439,3899.4164285155534,0.9826616035428936,0.34176855083335494,0.4964169422314872,0.03437211181910249,0.021856445988952963,76922.9404189982,51.81666634218519,6.0,1,10,1,21,1556.0,33.5,31.8,0.7785812672176309,0.25839590411455043 +GBM (tuned + ensemble),1415.9841766067627,4.799208115006135,910.5198965732433,4.069284327482318,0.6392387450549837,0.6519851738431574,0.10428405536733554,0.06311285033655824,13668.933422648086,267.51058203721857,12.348484848484848,0.22857,1305.722465435664,2.759434594048394,771.5692555555095,2.4904788987789197,0.6693216689945842,0.6727922701071665,0.04583211069062454,0.04469640874832902,13185.517460723013,113.47473834277966,11.0,1,0,2,30,1518.3,26.8,36.5,0.7420798898071626,0.13716214969036036 +TABPFNV2 (default),11.52095359335042,1.0633954972129076,6.06940244179637,0.672221391108446,0.5171946106727229,0.5762036040928518,0.08650644617919911,0.0712395059554867,98.45527177302218,43.21321073170591,13.151515151515152,0.22839,8.920613225301107,0.4424108028411865,4.216795518748306,0.554313340790968,0.3939035196311629,0.49711705993401867,0.046436046436047484,0.024781120664368425,86.18639524525906,32.62493249187355,5.0,4,1,6,22,1497.5,30.1,40.5,0.7238292011019284,0.2643791451026961 +CAT (tuned + ensemble),6111.719856242539,1.006158811716921,3790.183958691414,0.8907615430657749,0.6581825114313259,0.6582788891752136,0.1008693477859659,0.06664337122899275,60543.81785886913,53.55072795645876,14.090909090909092,0.23306,3546.8591198126474,0.7182260619269477,2034.851316438205,0.804742177327474,0.6112565392764306,0.6936836326181555,0.057913957490063894,0.056013107516843214,22945.70625086803,41.43902270876451,13.0,0,0,1,32,1477.0,29.5,32.8,0.7024793388429752,0.09860544117243167 +TABICL (default),17.601916466818917,2.220749478067212,8.223267143306801,1.62619082060654,0.5724404784680696,0.6158184269020356,0.088559472294408,0.06951993310511383,118.07631364935412,95.53326061925603,14.424242424242424,0.21659,12.685983737309774,1.1608605914645724,7.267564825342236,1.640227848921365,0.5810932894452219,0.7216187048161464,0.04180362345559929,0.026535607179780867,110.9966550885031,89.55280278440337,10.0,4,3,1,25,1469.6,27.7,32.9,0.6949035812672176,0.24375608168176008 +TABM (tuned),7759.403633462299,0.20657940898278745,4063.8948551919407,0.21768183700894184,0.6183990992477728,0.6079358477571778,0.09090767804284569,0.07407084387139468,68994.91604679843,11.66689658584399,14.454545454545455,0.23179,6619.307261109352,0.18380210134718153,3372.559747313985,0.207815816005071,0.5745991505979262,0.6332494224873327,0.04895084966574903,0.04210334551764721,47614.825535817596,11.061181528720304,14.0,0,2,1,30,1468.6,26.0,25.5,0.6942148760330579,0.11870737596447387 +CAT (tuned),6111.719856242539,0.1329929079672303,3790.183958691414,0.13532680535635316,0.6842728639559639,0.6784879573951591,0.10338895380165729,0.0679130991218975,60543.81785886913,7.594073997033244,14.93939393939394,0.22837,3546.8591198126474,0.09593041737874348,2034.851316438205,0.09676511402452573,0.6577077283695499,0.6887106783902378,0.05854822331192988,0.05607775104310802,22945.70625086803,5.6651414641322715,14.0,0,1,1,31,1458.1,28.3,25.8,0.6831955922865014,0.10398747196622182 +GBM (tuned),1415.9841766067627,0.7482920802402174,910.5198965732433,0.7792669151777867,0.7124399645711881,0.6923035797226588,0.10949150324120158,0.07146137664352314,13668.933422648086,44.04955699034316,15.348484848484848,0.23047,1305.722465435664,0.3891807476679484,771.5692555555095,0.32246047162149255,0.745365207761696,0.6925959891885647,0.050895160045251076,0.05893446053789513,13185.517460723013,17.561755180687065,15.0,0,0,0,33,1447.8,27.8,28.3,0.6738980716253443,0.07427558585061143 +XGB (tuned + ensemble),1916.6055530242247,2.961519778778256,1177.527465794311,3.6728650211989002,0.7293222800450723,0.721951523676454,0.11542293422805444,0.07837699232883769,16012.343458246229,184.54969165931055,16.46969696969697,0.22933,1391.6566625965966,1.6547198295593262,828.736683312722,2.3118448001769525,0.7623744964555048,0.7289520999453164,0.060774906477773616,0.059085852676698046,12341.448622348875,104.93870139682326,14.0,0,0,0,33,1424.9,25.1,28.4,0.6484159779614325,0.07407773661747778 +CAT (default),152.82380405130613,0.14037318871880222,127.11927189400713,0.16217538557702282,0.740681350814587,0.7506816593794549,0.11924476665719236,0.07620193485040756,502.113809148077,8.351109219795852,16.727272727272727,0.22377,11.973733305931091,0.15812701649136013,8.50783362735807,0.12332610453035381,0.8332509641822385,0.801171738926584,0.05487383563224202,0.03792057600443833,113.47556087857672,7.212822094475863,17.0,0,2,0,31,1415.3,35.6,31.7,0.6425619834710744,0.09583105023928659 +MNCA (tuned),10961.64198641737,0.5195780056494254,6318.399005594662,0.47834981044798197,0.7723596137566766,0.6901982593720447,0.11035729454817664,0.08004053910862768,105260.96546626008,25.986174703676685,17.681818181818183,0.23735,10053.083413600922,0.4133593638737996,6147.690948218725,0.484748090925306,0.8745571721847745,0.7649747499806594,0.06726118300406625,0.07061019028987191,89084.88026764008,22.392018959457452,17.0,1,0,0,32,1397.1,33.9,29.3,0.6208677685950413,0.10224709597904896 +TABM (default),28.548644328920126,0.1798720349366416,18.542173393427806,0.19723712265145438,0.7054228305321909,0.7143627792290288,0.12066263379417053,0.0921174698888877,318.94007895221296,9.774328855022276,17.78787878787879,0.2315,20.942287389437357,0.15928708182440865,12.243907458197643,0.15399858651571716,0.6869868540300672,0.6845420920128497,0.0643765592382024,0.04493622716672658,186.77830789084513,9.305311172432566,16.0,0,0,0,33,1396.1,29.2,29.7,0.6184573002754821,0.08424101981965694 +MNCA (tuned + ensemble),10961.64198641737,10.701994556690307,6318.399005594662,8.75762576226945,0.679738245400772,0.6527341430730694,0.11364678600986435,0.08762388653573253,105260.96546626008,520.9142587851533,18.060606060606062,0.23071,10053.083413600922,8.386180957158407,6147.690948218725,8.148513113458952,0.7185505017354659,0.6981255179530825,0.07494108771929253,0.05038781311089743,89084.88026764008,418.52804655660253,14.0,0,1,4,28,1388.0,30.3,27.5,0.6122589531680441,0.11800680928178245 +XGB (tuned),1916.6055530242247,0.652148759324944,1177.527465794311,0.8866854120458099,0.7708905111580848,0.7464392455505173,0.11776031481168599,0.08182921310088948,16012.343458246229,38.50908627958101,18.060606060606062,0.23215,1391.6566625965966,0.25174130333794487,828.736683312722,0.3364459349309477,0.8207592710887125,0.7696477548662969,0.06701911684547046,0.05575461121856112,12341.448622348875,17.273530689905304,17.0,0,0,0,33,1388.2,23.0,24.6,0.6122589531680441,0.06278746656277309 +TABDPT (default),70.92158105132556,22.39591003114527,34.81759719100062,31.279468309323132,0.629987494107437,0.646307359355097,0.12299860493666727,0.07605369712691949,622.0717388752522,1500.5607209171305,18.696969696969695,0.22801,66.73408037026724,21.411699827512106,28.844939328856388,9.008305936432574,0.7203280108909103,0.7379303416341314,0.06053316447629842,0.04402125792690271,592.5429859587771,1255.434427440434,15.0,5,0,3,25,1374.2,30.7,27.9,0.5977961432506887,0.23451942539851733 +REALMLP (tuned),17640.05257734851,0.23988780349191993,6337.298676302214,0.3147901673455733,0.779232820829222,0.7188658553023052,0.11464754534587084,0.080511701943035,135567.2437894661,15.019683339609985,19.242424242424242,0.23707,10127.618620618185,0.198199192682902,6131.852125259443,0.25545227547789184,0.885134051632072,0.7483297645202374,0.06830355960720935,0.058306293379144045,109144.7404232934,13.001039963138904,18.0,0,0,0,33,1363.1,28.4,28.4,0.5853994490358126,0.0673082559733512 +EBM (tuned + ensemble),3334.2171648572994,0.32795600762672295,2025.6302479633398,0.2872227792040727,0.8143931606450567,0.8169704244942233,0.15173085728262328,0.13052492283887795,30754.382185448365,16.68266150462009,20.424242424242426,0.2332,1953.8706483443577,0.2473998334672716,1331.6775166450918,0.20701186245225042,0.9460248644293228,0.8642714012860728,0.08428901213465023,0.06235962049926,20856.10396834409,10.735277156769923,19.0,0,1,1,31,1341.2,39.2,31.4,0.5585399449035813,0.0849540054009463 +MNCA (default),29.288330355718077,0.40948904010181875,16.590279085679306,0.35807771432707775,0.8429271223076699,0.7878304641848843,0.14061535811276685,0.09170739337026186,276.94632919034655,19.94068662634551,20.484848484848484,0.22862,25.066760566499497,0.27276016076405846,16.16104653455061,0.3065299705640804,1.0,0.8401281662241025,0.07623534801441778,0.06857639602487717,228.23494043552506,15.69192319835031,22.0,1,0,0,32,1338.5,30.7,31.5,0.5571625344352618,0.08866642079823792 +NN_TORCH (tuned + ensemble),9308.194376095618,3.24606818494572,4341.07923497529,2.933380560418068,0.8141011522100134,0.7826524702875708,0.12378816761198892,0.09334422227460787,82898.10974181342,162.20152509802003,21.045454545454547,0.22969,7022.24924369653,2.462759764989217,3704.2987009192075,2.0735716422398887,0.9757817342652234,0.8711430811076922,0.06940828782484931,0.07002611736954287,61441.834728019065,130.8351426094471,20.0,0,0,0,33,1328.3,23.9,32.5,0.5444214876033058,0.062363030854006435 +FASTAI (tuned + ensemble),2586.283754560923,8.835338537861603,1679.575121937614,9.999499001724324,0.8186439717731856,0.8145183737118522,0.1505976774052386,0.11070323359154424,25493.071594867495,506.3190878191587,22.318181818181817,0.23902,2267.9460870583853,8.057986391915215,1459.621189354467,8.056269308662202,1.0,0.9506646971852195,0.08142732439291223,0.07668125691859373,23829.87732673067,529.4584115089291,25.0,0,1,0,32,1301.0,29.3,32.9,0.515495867768595,0.07731805189662351 +EBM (tuned),3334.2171648572994,0.039938361395890465,2025.6302479633398,0.03842942493274479,0.8732650510599177,0.8502034573687951,0.15904602973103044,0.1403265146274871,30754.382185448365,2.017057965351221,23.166666666666668,0.23722,1953.8706483443577,0.031091478135850694,1331.6775166450918,0.0236834002099177,1.0,0.9252984717182373,0.08863297174906726,0.06750589165199133,20856.10396834409,1.153393324267757,24.5,0,0,1,32,1282.7,33.1,31.3,0.4962121212121212,0.06183293286303491 +XT (tuned + ensemble),703.3360121879513,1.4757844792471992,499.8225911726898,1.6427389690079335,0.8688873144004177,0.8485617861141479,0.16715310395962388,0.11801894028157121,7431.679531233592,85.61436209689451,23.757575757575758,0.22915,666.2221839348475,1.1989427142673068,416.3888649592797,1.3925002488586609,1.0,0.9312796026112667,0.0949585994043769,0.08051595297351705,5771.537607935009,73.42784180878955,28.0,0,0,0,33,1269.2,27.1,26.6,0.48278236914600553,0.0646272987521953 +EBM (default),11.36624160479215,0.05860419851360899,6.793468845812911,0.0773696538255326,0.8635835253158718,0.8614114823034487,0.16705792948204298,0.14403163732281446,110.45760612579903,3.6748575166769917,24.454545454545453,0.2295,8.053060743543837,0.04657702445983887,5.893546446579368,0.07326012604396624,1.0,0.9402094393438465,0.09424073545770384,0.06816679495540655,98.43282840770306,3.379332756535156,24.0,1,0,2,30,1257.5,24.1,33.4,0.4669421487603306,0.08931912914624636 +REALMLP (default),106.23891412433149,0.2452111855099097,37.47920795758294,0.3116151414211664,0.9013016306032677,0.8456251150253311,0.13955162199365434,0.1014275480125844,797.3305404666338,14.710216960509968,25.015151515151516,0.24541,56.65627751350403,0.19186555014716256,37.06431607357004,0.3087081015110016,1.0,0.9178272410570335,0.10584984201426972,0.0783241041826278,651.2119595469192,12.659464398970895,26.0,0,0,0,33,1246.4,22.0,31.6,0.45420110192837465,0.04864185905056609 +NN_TORCH (tuned),9308.194376095618,0.17523537565160682,4341.07923497529,0.18444769841251837,0.9028056592953255,0.8523570064680737,0.1453073838944365,0.11490713099367705,82898.10974181342,9.391699534826044,25.560606060606062,0.23735,7022.24924369653,0.12440276145935059,3704.2987009192075,0.1432880461215973,1.0,0.9204081911632703,0.10352602970716274,0.09298739144390704,61441.834728019065,8.032002608597933,27.0,0,0,0,33,1231.5,34.5,29.2,0.4418044077134986,0.04563663695340095 +XT (tuned),703.3360121879513,0.18311701848450734,499.8225911726898,0.22461498391827245,0.8983919342324714,0.8755187446568758,0.1764647384955156,0.1271016182196971,7431.679531233592,11.223370223330804,25.90909090909091,0.23322,666.2221839348475,0.15195075670878092,416.3888649592797,0.1793043116994795,1.0,0.959118483428235,0.10675110715093028,0.08518673798205481,5771.537607935009,9.73206235571111,30.0,0,0,0,33,1225.6,28.5,37.6,0.43388429752066116,0.050537267985117415 +FASTAI (tuned),2586.283754560923,0.6978744876103771,1679.575121937614,0.8367693422775803,0.9106015193205547,0.8680646793562244,0.16356020771349106,0.1288208939560108,25493.071594867495,42.13403882164382,26.106060606060606,0.24143,2267.9460870583853,0.7035322189331055,1459.621189354467,0.8899622596800327,1.0,0.9716097056867237,0.09698823511745114,0.08330206169020851,23829.87732673067,41.63294608315149,27.0,0,0,0,33,1221.1,30.5,36.1,0.4294077134986226,0.047856409826548846 +XGB (default),4.978699986862414,0.2737259887284301,3.296637309611259,0.3881253273013503,0.9745229998669913,0.9188219783236431,0.15400704689728187,0.13025661893071092,47.96472220742737,17.685235315861274,27.196969696969695,0.24149,4.367259449428982,0.2072049856185913,3.054087114292606,0.2414376437664032,1.0,0.9599615615417688,0.11112425419154659,0.09755031126190183,45.44297154665395,12.112444004915941,26.0,0,0,0,33,1194.7,32.7,25.3,0.4046143250688705,0.039578217897447164 +GBM (default),5.590238305133601,0.278970559357794,3.7124463506088845,0.2797426445645519,0.9731114574560898,0.9199713048057496,0.1586723420445634,0.12176336924303563,57.36690638815629,17.058911558436503,28.363636363636363,0.24755,5.0517880121866865,0.22849366399976942,3.3870700945456824,0.15827762661722877,1.0,0.9513773231005599,0.10895934672449292,0.10403475244674416,43.79388559366092,9.72233278292159,27.0,0,0,0,33,1170.4,27.1,28.2,0.378099173553719,0.03743399468864812 +RF (tuned + ensemble),892.1954686825525,1.4854835507845638,550.3470940724247,1.5940359327620437,0.9290486520150778,0.9030343025538748,0.18064336227251276,0.144401978607873,8295.175259470323,82.18254749521213,28.454545454545453,0.23505,784.4848535855612,1.0802352163526747,572.6733661144972,1.4206488404155224,1.0,0.9909493158278216,0.11473727839757886,0.09748097407587406,7404.509259652326,76.20706350974088,30.0,0,0,0,33,1166.6,29.3,29.0,0.3760330578512397,0.04583479372101042 +RF (tuned),892.1954686825525,0.16340523231711854,550.3470940724247,0.20470746704551906,0.963610556213676,0.9320211831974011,0.19214799551676703,0.1539601317743645,8295.175259470323,9.800022397387403,30.818181818181817,0.23333,784.4848535855612,0.1385154088338216,572.6733661144972,0.14341358177200286,1.0,0.9927680249005744,0.12553920364001514,0.1162834063711407,7404.509259652326,8.630503216322625,32.0,0,0,0,33,1108.7,36.5,28.3,0.32231404958677684,0.035885920715432056 +NN_TORCH (default),31.202770278670574,0.17605404147395382,18.191025653235105,0.1946935658503532,0.9742281682834123,0.9451002201899978,0.19600899780266673,0.16561315331100365,307.7875153991938,9.709642602635572,32.378787878787875,0.23746,21.156466828452217,0.13171541690826416,11.818497713406881,0.14659688817979757,1.0,0.9967824967824969,0.14511201063493562,0.1156261970018704,210.45346039107372,7.909338196312241,33.0,0,0,0,33,1070.3,39.9,27.6,0.2868457300275482,0.0342057908481839 +FASTAI (default),10.065239688764116,0.6049560324511544,5.915643660471347,0.6570296472609841,0.9848306008996557,0.9522534285075538,0.22111992802449412,0.19830608126347074,95.24755640719609,34.77327333774118,34.0,0.25653,7.87360077434116,0.4848888476689657,5.1820077836540115,0.6521266629591491,1.0,1.0,0.1525216037950643,0.1310864418065254,93.12290255921157,34.91731418633814,36.0,0,0,0,33,1023.2,30.5,31.5,0.25,0.03143839048378862 +XT (default),0.822556706068893,0.08127484409897415,0.471496074166279,0.09148027851361618,0.9701664672021088,0.9503945456366526,0.2459384026510879,0.24789102755518555,7.127294712723843,4.867665484175484,34.515151515151516,0.2625,0.7219058142768012,0.06215476195017497,0.4238388518146615,0.07849177235630946,1.0,1.0,0.18315770932006714,0.15512506986029978,6.235138739070136,4.456198846093199,38.0,0,0,0,33,1006.8,34.5,28.2,0.23829201101928374,0.03298366659603482 +RF (default),1.0802604700981167,0.07677059518769132,0.52542853627611,0.08878926590315098,0.9962098401734872,0.9719529901721379,0.2354733014413618,0.23756970894227208,8.967503792674067,4.676640526479693,34.75757575757576,0.25251,0.7427172581354777,0.058771981133355036,0.4717373991313263,0.06943642688143947,1.0,1.0,0.16087354235509543,0.1389292060353615,7.464730143591374,4.060095192168379,36.0,0,0,0,33,1000.0,0.0,0.0,0.2327823691460055,0.03026655868503886 +LR (tuned + ensemble),170.87050911529295,0.355976880680431,121.02102704404008,0.32634615259593897,0.9564945706206911,0.9509831821091416,0.2968171589802374,0.3288790901846576,1772.9723454272128,18.055698085268993,35.696969696969695,0.24491,158.05876021915012,0.1903169314066569,96.99876412252586,0.21682568555752907,1.0,1.0,0.24584819980048034,0.25856044395126276,1402.8748923675635,12.175213508812917,40.0,0,0,1,32,972.0,26.5,30.0,0.21143250688705234,0.039583645228470625 +LR (tuned),170.87050911529295,0.11554940750301887,121.02102704404008,0.10905393298119077,0.972064260567699,0.9557824705647567,0.30240282773179056,0.3364007678210794,1772.9723454272128,5.509546013888674,36.59090909090909,0.25255,158.05876021915012,0.058829413519965276,96.99876412252586,0.0862505760249892,1.0,1.0,0.2610498572359117,0.2575122541077485,1402.8748923675635,4.067640041721783,40.5,0,0,0,33,941.9,37.3,34.9,0.19111570247933884,0.0328071081462201 +LR (default),4.536491992417409,0.12851071365754613,2.8968102616210682,0.12996863092563715,0.9799295757292101,0.9622314537371455,0.3174498056933321,0.3785366269110268,44.19552977352603,6.787762182097637,37.10606060606061,0.26248,3.583410120010376,0.07990590731302898,2.994106463982051,0.104980896680783,1.0,1.0,0.26184876537499124,0.29385559917332205,34.79737790723628,4.682238532824547,40.5,0,0,1,32,921.5,27.0,44.9,0.17940771349862258,0.036488077168887134 +KNN (tuned + ensemble),13.854036316245494,0.24958616836303815,6.0685782814510345,0.19751524231739334,1.0,0.9938403453340091,0.459631458746646,0.6155226279781042,58.80470682969773,11.4223913906534,41.54545454545455,0.31971,5.889678266313341,0.10710635185241699,3.411499284183487,0.17676389939136214,1.0,1.0,0.4456786012073394,0.6488391163428452,55.92066201498072,9.83646280007513,43.0,0,0,0,33,690.8,41.6,48.8,0.07851239669421488,0.024261507287775874 +KNN (tuned),13.854036316245494,0.05394502376466488,6.0685782814510345,0.040282320435423104,1.0,0.9968502654314559,0.4777481550095733,0.6672249054928193,58.80470682969773,2.390983322122401,42.60606060606061,0.34215,5.889678266313341,0.029062509536743164,3.411499284183487,0.03624739765555662,1.0,1.0,0.4641563796701971,0.740394718900499,55.92066201498072,2.000488003155834,44.0,0,0,0,33,606.9,40.8,50.1,0.05440771349862259,0.02354297510758564 +KNN (default),0.24162639913334188,0.029173843387000086,0.11473320891232033,0.030260209027344123,1.0,0.9999421225578331,0.5584824706268245,0.9468680054407072,1.0005153544482035,1.4920918647664636,44.31818181818182,0.40639,0.11169254779815674,0.019644896189371746,0.06555315548394994,0.02595407415488982,1.0,1.0,0.5530598349091809,1.0,1.0,1.2186499153225778,45.0,0,0,0,33,371.3,62.9,103.8,0.015495867768595042,0.022617898518851785 diff --git a/data/tabpfn-imputed/tuning-impact-elo-horizontal.pdf b/data/tabpfn-imputed/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..301c3d795424030dd12d16f2a31b8e92988f89b7 Binary files /dev/null and b/data/tabpfn-imputed/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-imputed/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-imputed/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..f887f59f0106d027450d392018487f91be293dec --- /dev/null +++ b/data/tabpfn-imputed/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7db4e4ad114130ee65c0ba99801da0bb51caffc0689a155467c1c033c6597cd2 +size 152291 diff --git a/data/tabpfn-reg/figures/critical-diagram.pdf b/data/tabpfn-reg/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..461623f5a18604f98216ddb0e1ceef1c3e0d8a17 Binary files /dev/null and b/data/tabpfn-reg/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-reg/figures/critical-diagram.png.zip b/data/tabpfn-reg/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..a05d0f1d48213186f97ece9fdf82cfc5228e0f9e --- /dev/null +++ b/data/tabpfn-reg/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59e7d8b23ce20236b9223953c24d434e9837eecd4166f8b052d21e73427045cd +size 308249 diff --git a/data/tabpfn-reg/leaderboard.tex b/data/tabpfn-reg/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..a0a60e687d0a6846a95fb9408249a9f4c91c6174 --- /dev/null +++ b/data/tabpfn-reg/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +RealMLP (T+E) & \textcolor{gold}{\textbf{1793${}_{-100,+113}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{gold}{\textbf{5.7}} & 3.9 & 0 & \textcolor{silver}{\textbf{1.7\%}} & 7793.93 & 5.22 \\ +TabPFNv2 (T+E) & \textcolor{silver}{\textbf{1785${}_{-75,+103}$}} & \textcolor{silver}{\textbf{0.758}} & \textcolor{silver}{\textbf{5.9}} & \textcolor{gold}{\textbf{1.7}} & \textcolor{gold}{\textbf{3}} & \textcolor{gold}{\textbf{0.8\%}} & 6577.99 & 56.80 \\ +AutoGluon 1.3 (4h) & \textcolor{bronze}{\textbf{1768${}_{-74,+85}$}} & 0.691 & \textcolor{bronze}{\textbf{6.3}} & \textcolor{bronze}{\textbf{3.1}} & \textcolor{bronze}{\textbf{1}} & 3.3\% & 4737.61 & 4.70 \\ +TabDPT (D) & 1758${}_{-79,+93}$ & \textcolor{gold}{\textbf{0.787}} & 6.4 & \textcolor{silver}{\textbf{2.0}} & \textcolor{gold}{\textbf{3}} & 3.0\% & 35.70 & 46.71 \\ +CatBoost (T+E) & 1703${}_{-74,+95}$ & 0.605 & 7.9 & 6.9 & 0 & 4.2\% & 3558.14 & 0.97 \\ +CatBoost (T) & 1665${}_{-72,+81}$ & 0.572 & 9.0 & 7.3 & 0 & 4.3\% & 3558.14 & 0.10 \\ +LightGBM (T+E) & 1625${}_{-73,+74}$ & 0.515 & 10.3 & 8.0 & 0 & 5.5\% & 1003.11 & 4.89 \\ +TabM (T+E) & 1558${}_{-84,+92}$ & 0.487 & 12.1 & 7.1 & 0 & 3.0\% & 4158.29 & 1.95 \\ +TabPFNv2 (T) & 1558${}_{-82,+85}$ & 0.524 & 12.1 & 4.6 & 0 & \textcolor{bronze}{\textbf{2.9\%}} & 6577.99 & 0.50 \\ +XGBoost (T+E) & 1548${}_{-80,+67}$ & 0.453 & 12.6 & 12.0 & 0 & 5.7\% & 933.42 & 2.76 \\ +ModernNCA (T+E) & 1540${}_{-68,+80}$ & 0.371 & 12.9 & 8.2 & 0 & 6.4\% & 9308.22 & 7.89 \\ +XGBoost (T) & 1517${}_{-78,+78}$ & 0.407 & 13.9 & 13.4 & 0 & 5.9\% & 933.42 & 0.39 \\ +LightGBM (T) & 1476${}_{-76,+75}$ & 0.381 & 15.4 & 14.1 & 0 & 6.3\% & 1003.11 & 0.89 \\ +CatBoost (D) & 1461${}_{-59,+85}$ & 0.376 & 15.6 & 12.5 & 0 & 6.8\% & 10.91 & 0.20 \\ +TabPFNv2 (D) & 1423${}_{-88,+85}$ & 0.424 & 17.1 & 6.4 & 0 & 5.5\% & 9.10 & 0.85 \\ +TabM (T) & 1390${}_{-76,+80}$ & 0.361 & 18.4 & 14.8 & 0 & 4.2\% & 4158.29 & 0.21 \\ +RealMLP (T) & 1386${}_{-68,+68}$ & 0.295 & 18.7 & 14.9 & 0 & 4.3\% & 7793.93 & 0.36 \\ +ExtraTrees (T+E) & 1369${}_{-57,+97}$ & 0.241 & 19.1 & 10.9 & 0 & 11.2\% & 623.85 & 1.14 \\ +ModernNCA (T) & 1352${}_{-67,+76}$ & 0.125 & 20.0 & 17.7 & 0 & 8.7\% & 9308.22 & 0.42 \\ +ExtraTrees (T) & 1350${}_{-56,+80}$ & 0.210 & 20.0 & 13.5 & 0 & 11.3\% & 623.85 & 0.29 \\ +TabM (D) & 1344${}_{-76,+74}$ & 0.319 & 20.4 & 15.2 & 0 & 6.0\% & 15.01 & 0.15 \\ +RealMLP (D) & 1336${}_{-84,+75}$ & 0.157 & 20.9 & 16.9 & 0 & 6.6\% & 41.54 & 0.31 \\ +ModernNCA (D) & 1281${}_{-70,+78}$ & 0.061 & 23.0 & 21.7 & 0 & 10.3\% & 27.82 & 0.30 \\ +LightGBM (D) & 1264${}_{-80,+70}$ & 0.016 & 23.7 & 23.5 & 0 & 9.2\% & 4.57 & 0.27 \\ +TorchMLP (T+E) & 1260${}_{-68,+85}$ & 0.136 & 23.6 & 21.5 & 0 & 8.9\% & 5692.02 & 1.35 \\ +TorchMLP (T) & 1236${}_{-70,+78}$ & 0.139 & 24.4 & 21.3 & 0 & 9.4\% & 5692.02 & 0.13 \\ +RandomForest (T+E) & 1232${}_{-78,+79}$ & 0.122 & 24.7 & 21.3 & 0 & 12.8\% & 633.57 & 1.51 \\ +EBM (T+E) & 1226${}_{-75,+65}$ & 0.186 & 25.0 & 9.7 & 0 & 13.1\% & 1904.36 & 0.16 \\ +ExtraTrees (D) & 1186${}_{-70,+56}$ & 0.085 & 26.6 & 23.4 & 0 & 13.7\% & 0.47 & 0.12 \\ +EBM (T) & 1180${}_{-68,+85}$ & 0.151 & 26.6 & 12.9 & 0 & 13.7\% & 1904.36 & 0.02 \\ +RandomForest (T) & 1175${}_{-71,+82}$ & 0.087 & 26.7 & 24.1 & 0 & 13.3\% & 633.57 & 0.25 \\ +XGBoost (D) & 1162${}_{-79,+81}$ & 0.000 & 27.4 & 26.9 & 0 & 10.4\% & 3.79 & 0.31 \\ +EBM (D) & 1098${}_{-79,+67}$ & 0.074 & 29.4 & 27.1 & 0 & 14.7\% & 7.85 & 0.07 \\ +FastaiMLP (T+E) & 1090${}_{-71,+71}$ & 0.000 & 29.6 & 29.2 & 0 & 12.7\% & 2248.07 & 7.53 \\ +FastaiMLP (T) & 1026${}_{-76,+71}$ & 0.000 & 31.7 & 31.3 & 0 & 13.2\% & 2248.07 & 0.89 \\ +TorchMLP (D) & 1003${}_{-79,+82}$ & 0.010 & 32.3 & 31.0 & 0 & 12.8\% & 24.31 & 0.13 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.000 & 32.3 & 31.2 & 0 & 15.3\% & 0.53 & 0.12 \\ +FastaiMLP (D) & 889${}_{-81,+76}$ & 0.000 & 34.7 & 34.4 & 0 & 18.7\% & 7.09 & 0.84 \\ +Linear (T+E) & 488${}_{-191,+123}$ & 0.000 & 40.1 & 40.1 & 0 & 34.7\% & 149.75 & 0.13 \\ +KNN (T+E) & 441${}_{-97,+119}$ & 0.000 & 40.4 & 40.3 & 0 & 38.4\% & 2.43 & 0.18 \\ +Linear (T) & 426${}_{-160,+137}$ & 0.000 & 40.7 & 40.6 & 0 & 34.8\% & 149.75 & 0.06 \\ +Linear (D) & 350${}_{-139,+108}$ & 0.000 & 41.4 & 41.4 & 0 & 36.2\% & 3.45 & 0.09 \\ +KNN (T) & 334${}_{-138,+156}$ & 0.000 & 41.3 & 41.2 & 0 & 39.1\% & 2.43 & 0.03 \\ +KNN (D) & 8${}_{-219,+161}$ & 0.000 & 43.6 & 43.5 & 0 & 43.3\% & 0.04 & 0.03 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-reg/tabarena_leaderboard.csv b/data/tabpfn-reg/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..24fa248f82d18d730621b3f8a5168b13c3e3448e --- /dev/null +++ b/data/tabpfn-reg/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +REALMLP (tuned + ensemble),17010.514617333338,3.266152421633403,7608.576993446122,4.947775972877548,0.24215826947341315,0.34508861688606063,0.017038290543583443,0.024279931908817002,222286.57801104392,292.1412689552373,5.714285714285714,4.12234,6952.188049999872,2.6860701560974123,7793.932791479678,5.223270868033627,0.19393332805279354,0.3430975563919833,0.008809518245537662,0.024876069447861582,155770.2311222541,294.5757042702358,4.0,0,1,1,5,1793.2,112.9,99.7,0.8903654485049833,0.25357142857142856 +TABPFNV2 (tuned + ensemble),13720.707571309333,144.4128799729877,6347.594191591684,104.41432658354127,0.2415939115786189,0.3341509321234076,0.008040709459012918,0.018722358351318587,189200.55764735988,8406.36796869519,5.857142857142857,4.11819,6554.713698530197,35.598623005549115,6577.9935429270845,56.7950324715632,0.06610997682360657,0.25312173882346195,0.0006200613284135237,0.005184858759349278,150467.84733998418,5370.844516439232,2.0,3,2,0,2,1784.7,102.2,74.6,0.8870431893687708,0.5884711779448623 +AutoGluon 1.3 (4h),6519.395389483089,8.374968978715321,4847.228712217428,6.324376295240136,0.3085910704411772,0.3825220655079529,0.033251519267014036,0.049602203108293184,112121.1836885888,503.3434027862167,6.285714285714286,4.16484,5893.662480862936,2.3565427462259927,4737.606472356081,4.698652813671368,0.23133371369265834,0.32214544440356185,0.015883109174547605,0.03516384582661509,94686.22259252104,291.4795027754577,5.0,1,1,0,5,1767.5,84.1,73.3,0.8770764119601329,0.3248917748917749 +TABDPT (default),60.7062628246489,23.490844258051073,38.96865681756451,39.22338625261095,0.21328360616453232,0.31601375327884224,0.029680920262059787,0.014635575677645899,964.3036575691822,2256.3060022287436,6.428571428571429,4.26747,45.33390234311422,23.403914364178977,35.69555633627602,46.714399928500946,0.18149452492987356,0.3024909611245904,0.0039773225368111564,0.00227534979039058,904.9754515353668,2312.870048476579,4.0,3,0,0,4,1758.2,92.5,78.4,0.8737541528239202,0.49988662131519274 +CAT (tuned + ensemble),11360.685129829437,1.178349671288142,7479.02149818403,1.2406086528694698,0.3954429862751284,0.464509402409349,0.041752236791189334,0.05160245061199872,183467.75682165983,88.84726445881574,7.857142857142857,4.20863,12951.390986363092,0.6367310682932535,3558.1422644149734,0.9657383741190037,0.36849553490903875,0.5021915866656061,0.01735502485245788,0.056013107516843214,172301.38506843304,80.04598787510639,8.0,0,0,0,7,1702.9,95.0,73.9,0.840531561461794,0.1439255189255189 +CAT (tuned),11360.685129829437,0.18431518115694562,7479.02149818403,0.19033233887232168,0.42772839942623186,0.4880115582391967,0.04293561079285979,0.05673883273885333,183467.75682165983,14.175855537943391,9.0,4.23064,12951.390986363092,0.0923632542292277,3558.1422644149734,0.10450043094654878,0.5120020111687084,0.4970112300410973,0.018380899773098314,0.05607775104310802,172301.38506843304,9.480941702142749,10.0,0,0,1,6,1664.8,80.2,71.1,0.813953488372093,0.13734955520669806 +GBM (tuned + ensemble),1550.5350551957176,11.328482860232157,1095.0718838551823,8.219062398413474,0.48543215706848314,0.5433795474282531,0.054708320326180546,0.06759479279110976,25605.476583745745,681.2643137899079,10.285714285714286,4.21165,1305.722465435664,4.427071142196655,1003.1068347227413,4.891030532077303,0.3027896779514977,0.46595554262551947,0.02669651687138408,0.03428335611022812,24184.993298853384,538.7114397419949,7.0,0,0,0,7,1624.9,73.8,73.0,0.7840531561461794,0.12504095737178442 +TABM (tuned + ensemble),10282.773118314668,1.393584348285009,6394.137481456323,1.7230444286943332,0.5128718562532983,0.5273647327974045,0.029684820879418625,0.08174102125202472,165087.49919995712,107.44763645274216,12.142857142857142,4.1458,7158.767174800237,0.8681228558222452,4158.291053548661,1.9464638022920298,0.35916463746804045,0.44366218553963405,0.0321092955544684,0.02730307361680199,145459.09074336805,107.26839105814781,10.0,0,0,1,6,1558.2,91.8,83.9,0.7408637873754153,0.14015566700988696 +TABPFNV2 (tuned),13720.707571309333,6.545461165715778,6347.594191591684,3.614348507333379,0.4760165239054399,0.521392040117008,0.02872612273306815,0.0659409144684833,189200.55764735988,352.062795224747,12.142857142857142,4.17059,6554.713698530197,0.8012150287628174,6577.9935429270845,0.49833421776949843,0.3837541852536315,0.5208855804941338,0.015535650264428491,0.020282570852584572,150467.84733998418,67.61540858146891,7.0,0,2,0,5,1558.1,84.4,81.4,0.7408637873754153,0.21727607709750568 +XGB (tuned + ensemble),2079.7157167616347,3.992967984789894,1290.0643219432125,4.827720071303388,0.5469310959609862,0.5975201030395162,0.05747125828952447,0.07213297680810152,31656.727052852842,333.48376000852966,12.571428571428571,4.22219,1648.719575548172,3.0598979949951173,933.4246756427857,2.7591173692295947,0.5452119463463313,0.6511826558926308,0.024632962058414143,0.08171864665901157,36911.29969286658,164.97865636299682,13.0,0,0,0,7,1548.1,66.3,79.8,0.7308970099667774,0.08305093599211245 +MNCA (tuned + ensemble),11292.984456203474,5.393656354101878,8931.621967973691,7.01935255663346,0.629033079050954,0.6072296511290017,0.06383934286795147,0.060402731632731896,206810.59926615006,439.24782647955817,12.857142857142858,4.48697,9914.869807489713,3.95055410861969,9308.21673391936,7.88533754215507,0.5415267425434339,0.5778749232329116,0.021528134535190535,0.05208175360617587,222151.86792304332,413.4566390611155,9.0,0,0,1,6,1539.9,79.7,67.4,0.7242524916943521,0.12226317959400666 +XGB (tuned),2079.7157167616347,0.7477609078089397,1290.0643219432125,0.9786740038738302,0.5931661199323147,0.6285362466037914,0.05912174825282644,0.07889616636298961,31656.727052852842,70.06455347228189,13.857142857142858,4.23559,1648.719575548172,0.48153924147288,933.4246756427857,0.38757804886481584,0.6620816436761664,0.6348765781499277,0.027719287634248357,0.08095049600797707,36911.29969286658,37.05707269936294,12.0,0,0,0,7,1517.0,77.2,77.9,0.7009966777408638,0.07449494949494949 +GBM (tuned),1550.5350551957176,1.4077377538832407,1095.0718838551823,1.2781273508842104,0.6186778290078211,0.6521109849275161,0.06336854992568382,0.08812536702511353,25605.476583745745,102.58551273372913,15.428571428571429,4.23482,1305.722465435664,0.5831021388371785,1003.1068347227413,0.8935510846842087,0.5552965190223861,0.567499678004668,0.027972044011269404,0.0771469045019801,24184.993298853384,73.3040824604662,14.0,0,0,0,7,1476.0,74.7,75.8,0.6644518272425249,0.07088017024991815 +CAT (default),34.406632555855644,0.13612949961707704,24.995110587630016,0.18681141105942162,0.6237900244125646,0.6752749432467652,0.06846154890412891,0.08126436753648981,594.4248060555409,11.797969177620454,15.571428571428571,4.21395,45.3870238383611,0.10361001491546631,10.909956325719385,0.1965340299278453,0.5847419575275037,0.6919434164192867,0.022724551212545796,0.08888361196746222,412.72250349454265,9.618489444383828,16.0,0,0,0,7,1461.4,84.7,58.1,0.6611295681063123,0.08003113832945766 +TABPFNV2 (default),10.17094054751926,0.6529079978428189,8.130392025686353,0.7583587688298037,0.5759929931993246,0.6167589670821245,0.054501672943579625,0.10235002340399287,180.76463109970453,48.615119220322335,17.142857142857142,4.25916,9.120386091868083,0.4424108028411865,9.10218172841126,0.8496304484655789,0.4230930305474229,0.46334677351231907,0.039920435247708164,0.025924057113489832,198.0827015794515,48.06257528515414,13.0,0,0,2,5,1423.1,84.6,87.9,0.6245847176079734,0.15608795965938824 +TABM (tuned),10282.773118314668,0.16793914030468654,6394.137481456323,0.20684678947882645,0.6392492711824916,0.6348200796667592,0.04171656575678051,0.11343262381340333,165087.49919995712,13.009409484385971,18.428571428571427,4.26932,7158.767174800237,0.10640169779459635,4158.291053548661,0.207815816005071,0.46522673304240253,0.5698757567290857,0.042976216396108935,0.03536573040794217,145459.09074336805,12.524825096296247,15.0,0,0,0,7,1390.3,79.7,75.2,0.5946843853820598,0.06761579963550407 +REALMLP (tuned),17010.514617333338,0.1779377165294829,7608.576993446122,0.3097772518477419,0.7047967586156892,0.6714657363696414,0.04329176014839616,0.09606004530911617,222286.57801104392,17.624625939228846,18.714285714285715,4.2931,6952.188049999872,0.17935961882273357,7793.932791479678,0.35800323118310096,0.7895709633169843,0.7739734607794859,0.043563415018409435,0.04951944914046118,155770.2311222541,19.19488348672616,20.0,0,0,0,7,1386.4,67.2,67.5,0.5880398671096345,0.0670343137254902 +XT (tuned + ensemble),660.5781686419533,1.1831508216403779,587.7119735330767,1.316998538867821,0.7590185126625372,0.7891367590549797,0.11152236742279202,0.09736117781705758,12611.439486121615,82.99538684812504,19.142857142857142,5.04794,628.4651173035304,0.7619378328323364,623.8510097889447,1.1438343022436304,1.0,0.9750636625508117,0.025903530399211117,0.10013551316301275,14081.344732130377,68.20231667792302,25.0,0,0,0,7,1368.7,96.2,56.9,0.5780730897009967,0.09159277504105091 +MNCA (tuned),11292.984456203474,0.27963828964838905,8931.621967973691,0.3656110731370408,0.8745416984197177,0.7937499648854651,0.08747111667996144,0.1048378077211334,206810.59926615006,22.83746449449945,20.0,4.74895,9914.869807489713,0.2117270072301229,9308.21673391936,0.41617828752294544,1.0,0.8582720331903025,0.03236956763422549,0.09957054673434969,222151.86792304332,21.680631541507974,22.0,0,0,0,7,1352.1,75.6,66.5,0.5581395348837209,0.056389716185112605 +XT (tuned),660.5781686419533,0.21893258586762446,587.7119735330767,0.2587197553454726,0.7900820708578837,0.8075420301395644,0.11311074973251874,0.10422741821694467,12611.439486121615,16.50847126388171,20.0,5.07281,628.4651173035304,0.13890047868092856,623.8510097889447,0.29098081635084977,1.0,0.966001342535169,0.02660912637513735,0.12806468799723258,14081.344732130377,14.85253877204164,24.0,0,0,0,7,1349.5,79.4,55.8,0.5581395348837209,0.0738201175909741 +TABM (default),41.61068485350836,0.09812178838820684,33.57485323462188,0.15124505155365406,0.6807517607831785,0.7090918253057739,0.060056891118245796,0.14437719671978216,799.4371008354477,9.016255870890259,20.428571428571427,4.27071,34.914909998575844,0.10241310596466065,15.010870798216047,0.15399858651571716,0.6869868540300672,0.6521420435454183,0.05612090882619314,0.05222355068579704,670.393510312447,7.016702417594013,18.0,0,0,0,7,1344.1,74.0,76.0,0.5481727574750831,0.06590036599435097 +REALMLP (default),86.77730172740088,0.162267147548615,39.30550135866405,0.28225561362513907,0.8426262409401332,0.8106865317187478,0.06636636072819802,0.11228334752066568,1142.2795335159524,15.984953083327307,20.857142857142858,4.76491,39.05289849440257,0.1580585479736328,41.53951650766714,0.3087081015110016,0.9775778014564183,0.916083975805878,0.0569453348396638,0.11891984655886878,875.0164668614119,17.513061397917994,21.0,0,0,0,7,1336.2,74.2,84.0,0.5382059800664452,0.05904177832749262 +MNCA (default),32.2820956619959,0.19605722162458633,25.437351400848286,0.2556706353965379,0.9394939857609429,0.8850758833671052,0.10259098508837912,0.12447145886487125,599.545254682166,15.875935676225895,23.0,4.94033,29.151979072888693,0.13998790582021078,27.82059796372352,0.29868905742963153,1.0,0.9264368142777736,0.03934956674595991,0.10305347761813116,637.198973323623,15.69192319835031,23.0,0,0,0,7,1281.3,77.4,69.6,0.4883720930232558,0.04608560450282446 +NN_TORCH (tuned + ensemble),12800.476561368458,1.1380399908338272,7081.227309206272,1.497767305174234,0.8644212313341298,0.8606552018095075,0.08862343937037091,0.15385577166637224,182552.53268590602,90.9320692691531,23.571428571428573,4.65351,7304.863066792488,0.6995723327000936,5692.02320138285,1.353764106339938,1.0,0.8993834289137577,0.0931488267963303,0.09843675731093122,158151.6866749578,90.00555655937438,23.0,0,0,0,7,1259.7,84.5,68.0,0.4750830564784053,0.046582150653996304 +GBM (default),6.38493198705098,0.5425946334051707,5.062559496806533,0.586017128679101,0.9842293100404953,0.9184581492662487,0.09214694190164817,0.1303135612595869,114.0054740463554,44.04934412762916,23.714285714285715,4.4838,6.053678401311239,0.27992521921793617,4.570868909804455,0.2745991643955073,1.0,0.9102434021881158,0.06107189731149043,0.10636019574143357,95.93839371138947,30.245100747842656,25.0,0,0,0,7,1264.3,69.2,79.1,0.4717607973421927,0.04260596546310832 +NN_TORCH (tuned),12800.476561368458,0.08545414417509048,7081.227309206272,0.11421111903323287,0.8608314951688506,0.866065061374633,0.09394846170102965,0.1725237564760349,182552.53268590602,7.06193729144408,24.428571428571427,4.7148,7304.863066792488,0.06568752924601237,5692.02320138285,0.131112832826372,1.0,0.9520043501564557,0.09751345914708842,0.10970665592293476,158151.6866749578,6.1490644891314155,24.0,0,0,0,7,1236.5,77.6,69.9,0.45514950166112955,0.04688360293950356 +RF (tuned + ensemble),805.4727961623479,1.1537035166271148,616.40361374022,1.4218773833596186,0.877625154030774,0.873695196304564,0.12760095389966494,0.1317936836999853,13870.809159323733,85.97653577971612,24.714285714285715,5.10626,639.3565126180649,0.9083369493484497,633.5687055876473,1.5119688388094148,1.0,0.9687194739277113,0.0349392877195821,0.13942046646077252,14398.077818059888,97.12788527724665,29.0,0,0,0,7,1232.4,78.2,77.2,0.4485049833887043,0.047037271021261165 +EBM (tuned + ensemble),5388.437029837048,0.1423879381210085,3500.656679775311,0.16581318572573583,0.8141909618196094,0.8372300943690691,0.1311561198750961,0.1890252179697543,82984.50653621592,10.569109007396577,25.0,4.37076,5200.331783827146,0.08374606768290202,1904.3573570607775,0.16013811522000573,1.0,0.9524388806964655,0.05778648960524235,0.1850552693192548,74151.4651845794,10.422047745376007,30.0,0,1,0,6,1225.9,64.6,74.2,0.4418604651162791,0.10352140856342538 +EBM (tuned),5388.437029837048,0.015351363590785436,3500.656679775311,0.01684270418671638,0.8494479068291391,0.8611525498883237,0.1367064766905373,0.2066243973235434,82984.50653621592,1.0832102622101742,26.571428571428573,4.42885,5200.331783827146,0.009351968765258789,1904.3573570607775,0.018787849694490433,1.0,0.9838364011703729,0.07014482573090552,0.1901907177021721,74151.4651845794,1.0,31.0,0,0,1,6,1180.0,84.9,67.6,0.4053156146179402,0.07737131573900916 +XT (default),0.7346943915836395,0.08473905533079117,0.46008623067859417,0.10458806079610798,0.9145279857275386,0.8998638850370638,0.13682802410988154,0.14082210751652371,11.201251938981406,6.5797630484480845,26.571428571428573,5.13889,0.4775619904200236,0.05924500624338786,0.4735771133343534,0.11693369290932056,1.0,1.0,0.07315765129610696,0.15512506986029978,10.84674207217604,6.1589221066128665,31.0,0,0,0,7,1185.9,55.9,69.5,0.4053156146179402,0.042659370354794876 +RF (tuned),805.4727961623479,0.17611904144287108,616.40361374022,0.22106795182548725,0.9129471887632264,0.9038084069687008,0.133394399377122,0.14125246260950458,13870.809159323733,13.301758986609267,26.714285714285715,5.18858,639.3565126180649,0.12748352686564127,633.5687055876473,0.24849182199342837,1.0,0.9739715212667134,0.04311956405835915,0.14203697823894862,14398.077818059888,13.680869433029466,31.0,0,0,0,7,1175.1,81.1,70.3,0.4019933554817276,0.04156432136058622 +XGB (default),5.009572153621249,0.2941358320296757,3.869550863988617,0.42965662676352057,1.0,0.9663003789632569,0.10421501398748971,0.1578017995574142,87.11728255407873,27.91087828932339,27.428571428571427,4.75458,4.333076930046081,0.2072049856185913,3.791700492881695,0.31131523040433723,1.0,0.9705413120767911,0.050731524717382626,0.11702111523756958,81.03254703110004,17.946977539037764,26.0,0,0,0,7,1161.5,80.2,78.4,0.3853820598006645,0.03720252228442986 +EBM (default),14.400625167952642,0.03714351956806485,10.51577767224111,0.06811790140940442,0.9258962131926525,0.9105676009517234,0.1468194585212037,0.21744478697144157,241.18775335182642,3.757417170209216,29.428571428571427,4.44221,13.099289311303032,0.03613271713256836,7.847325239497551,0.07057171314954758,1.0,0.9667299178416083,0.07294156515637107,0.20738482927540008,186.18589159248134,4.271072697409173,33.0,0,0,0,7,1097.5,66.7,78.5,0.3388704318936877,0.03694940668624879 +FASTAI (tuned + ensemble),2316.2620857840493,5.69649665128617,1912.4810450161624,8.30768905371141,1.0,0.9870488911977965,0.1274626118848621,0.18774240639715484,42346.92739383132,474.26279273152795,29.571428571428573,5.27232,2238.4564303557077,6.60931183497111,2248.0726789016367,7.52967050402227,1.0,0.9900151140054522,0.10220001788754296,0.20422002836922998,45511.629375922166,409.94392325616803,28.0,0,0,0,7,1090.0,70.5,70.7,0.33554817275747506,0.03428696636069908 +FASTAI (tuned),2316.2620857840493,0.595911791589525,1912.4810450161624,0.8787119009761157,1.0,0.9967758846936324,0.1322523422162721,0.21566408860648562,42346.92739383132,51.472930446274454,31.714285714285715,5.18701,2238.4564303557077,0.45566067695617674,2248.0726789016367,0.8899622596800327,1.0,1.0,0.11787324661401322,0.19653728657746058,45511.629375922166,47.3690323348188,32.0,0,0,0,7,1025.6,70.3,75.1,0.2857142857142857,0.03195251824453505 +NN_TORCH (default),45.35887446289971,0.0802082171515813,31.67622236459062,0.1165329153506545,0.9903234435493359,0.9704120580222819,0.12780665628037344,0.23160692294055926,751.5426969414585,7.0142840459527545,32.285714285714285,4.81722,35.183143361409506,0.06712730725606282,24.305650569576496,0.13416367617779604,1.0,0.9898941471678575,0.14511201063493562,0.13614151845721162,546.0088939544379,6.398331846186531,33.0,0,0,0,7,1002.7,81.2,78.2,0.2724252491694352,0.03227817407134295 +RF (default),1.5119719039826165,0.08608071387760223,0.5884101131506653,0.10503802269179743,1.0,0.9770425458267085,0.15345507712888892,0.1988909284118511,17.48096698761895,6.618407517134457,32.285714285714285,5.26146,0.5022362232208252,0.059631967544555665,0.5271703851240811,0.11813050784694061,1.0,1.0,0.08142906388988314,0.17217694084546725,12.02349439116373,6.19914937070286,35.0,0,0,0,7,1000.0,0.0,0.0,0.2724252491694352,0.03208061043089792 +FASTAI (default),8.831207484669156,0.5305940438830663,6.755940136126163,0.7105278175311491,1.0,0.9966350691817539,0.18685629150301894,0.27228181364938464,154.14927757121433,43.905972028215395,34.714285714285715,6.36878,7.107011103630066,0.42740853627522785,7.092825452724616,0.8431779863303789,1.0,1.0,0.11912950408368406,0.27762931758033993,160.3044815424371,43.14144455330922,36.0,0,0,0,7,889.1,76.0,80.6,0.2159468438538206,0.029088030746588906 +LR (tuned + ensemble),152.34852822886572,0.09327742031642369,140.44301462951617,0.1463023584921992,1.0,1.0,0.34749111122086884,0.6284394248904203,2977.405199523172,9.309183761718689,40.142857142857146,8.15038,150.0536302089691,0.057510574658711754,149.75412196503902,0.1287132100674069,1.0,1.0,0.26203835697566735,0.7414458856888512,3344.1604489764304,7.22988242769623,40.0,0,0,0,7,487.6,122.1,190.4,0.08970099667774087,0.02496637999636136 +KNN (tuned + ensemble),4.245547793025062,0.1591204832470606,2.4746151721359886,0.1895989937949328,1.0,0.991315763418419,0.3842177651885135,0.7048115612733296,62.40052816223211,11.931188445938428,40.42857142857143,8.41865,2.490966773033142,0.10710635185241699,2.4322953132177623,0.17676389939136214,1.0,1.0,0.41352429770943944,0.7907767115559304,56.15178324522979,13.124172379663548,41.0,0,0,0,7,441.2,118.9,96.3,0.08305647840531562,0.024796311969150473 +LR (tuned),152.34852822886572,0.037903934433346706,140.44301462951617,0.06382438080033122,1.0,1.0,0.34766987292821344,0.6285783845355511,2977.405199523172,3.6512091758703176,40.714285714285715,8.15927,150.0536302089691,0.03235630989074707,149.75412196503902,0.06193413547254449,1.0,1.0,0.2620998903501569,0.7430806606996537,3344.1604489764304,4.067640041721783,41.0,0,0,0,7,425.9,136.3,159.1,0.07641196013289037,0.024641310485245337 +KNN (tuned),4.245547793025062,0.02863398733593169,2.4746151721359886,0.030004193594300673,1.0,0.9977713176559868,0.3910594237799242,0.7476794241583206,62.40052816223211,1.9760324688350897,41.285714285714285,8.54422,2.490966773033142,0.014579184850056966,2.4322953132177623,0.02910016936139115,1.0,1.0,0.420237807146593,0.813866558903926,56.15178324522979,1.7112793648526077,42.0,0,0,0,7,334.4,155.7,137.5,0.06312292358803986,0.024270393591953304 +LR (default),3.9008307816490295,0.0549199554655287,3.239606778702032,0.07872063839931824,1.0,1.0,0.3619225843912718,0.6950518956016436,71.92078534572094,4.852563458042672,41.42857142857143,8.22758,3.3858322699864707,0.05012191931406657,3.447264865169665,0.08708413534655632,1.0,1.0,0.26184876537499124,0.7556433033178954,75.37476586504175,4.100500531123858,41.0,0,0,0,7,349.8,107.6,138.9,0.059800664451827246,0.02416543967066615 +KNN (default),0.06228995323181152,0.01962243337479849,0.04219198036913814,0.02726788157828385,1.0,0.9997734872659316,0.432705772179096,0.9384821422991884,1.0,1.6381446334470857,43.57142857142857,9.55646,0.04463104406992594,0.014348586400349935,0.043660770227879654,0.02863989301467053,1.0,1.0,0.4573857800165779,1.0,1.0,1.5792045889101338,44.0,0,0,0,7,8.2,160.2,218.5,0.009966777408637873,0.022964840038010772 diff --git a/data/tabpfn-reg/tuning-impact-elo-horizontal.pdf b/data/tabpfn-reg/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7e2509b1b563ab555e842f1adcd5b70facd9707c Binary files /dev/null and b/data/tabpfn-reg/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-reg/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-reg/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..205fe37d17a46a863e2acb6ebb5e834706a4424a --- /dev/null +++ b/data/tabpfn-reg/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1907594925200ee1a568c36817d0b22dc6cad8a4f5fa418c697c597e2e57112d +size 135809 diff --git a/data/tabpfn-tabicl-cls/figures/critical-diagram.pdf b/data/tabpfn-tabicl-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..54539b5f41dc2f5a4e1b4747a53d81add750da31 Binary files /dev/null and b/data/tabpfn-tabicl-cls/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-tabicl-cls/figures/critical-diagram.png.zip b/data/tabpfn-tabicl-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24e08506593fee7a8727aefce7940752470d6e76 --- /dev/null +++ b/data/tabpfn-tabicl-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fd50f8530f0264c02045700850c01bf4b212bcd6257fa3321f09e02e249a62 +size 319568 diff --git a/data/tabpfn-tabicl-cls/leaderboard.tex b/data/tabpfn-tabicl-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..b41ff65b5428b5f7a450717fe825914a05f0b501 --- /dev/null +++ b/data/tabpfn-tabicl-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1736${}_{-45,+46}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{4.6\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1612${}_{-37,+35}$}} & 0.483 & \textcolor{silver}{\textbf{9.3}} & 4.5 & 2 & 9.2\% & 2828.45 & 1.60 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1608${}_{-34,+36}$}} & \textcolor{bronze}{\textbf{0.543}} & \textcolor{bronze}{\textbf{9.5}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{4}} & \textcolor{silver}{\textbf{7.1\%}} & 8.89 & 1.74 \\ +TabPFNv2 (T) & 1588${}_{-42,+32}$ & \textcolor{silver}{\textbf{0.544}} & 10.3 & 3.7 & 1 & \textcolor{bronze}{\textbf{8.2\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1573${}_{-37,+29}$ & 0.402 & 10.9 & 7.1 & 0 & 10.4\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1544${}_{-44,+34}$ & 0.440 & 11.9 & 5.6 & 1 & 9.5\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1542${}_{-38,+38}$ & 0.499 & 12.0 & \textcolor{bronze}{\textbf{3.4}} & \textcolor{silver}{\textbf{4}} & 9.5\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1521${}_{-35,+40}$ & 0.319 & 12.9 & 7.1 & 1 & 11.8\% & 647.56 & 1.72 \\ +TabM (T) & 1512${}_{-40,+30}$ & 0.387 & 13.3 & 7.5 & 0 & 10.4\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1462${}_{-34,+33}$ & 0.262 & 15.3 & 13.3 & 0 & 12.2\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1456${}_{-32,+38}$ & 0.271 & 15.8 & 11.6 & 0 & 11.7\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1435${}_{-32,+36}$ & 0.247 & 16.5 & 10.5 & 0 & 12.0\% & 1465.86 & 0.09 \\ +TabM (D) & 1429${}_{-32,+35}$ & 0.288 & 17.0 & 11.2 & 0 & 13.7\% & 10.42 & 0.15 \\ +ModernNCA (T) & 1426${}_{-30,+37}$ & 0.255 & 17.1 & 8.7 & 1 & 11.7\% & 5944.88 & 0.52 \\ +CatBoost (D) & 1425${}_{-35,+32}$ & 0.228 & 17.0 & 10.0 & 0 & 13.3\% & 5.72 & 0.11 \\ +XGBoost (T+E) & 1415${}_{-33,+36}$ & 0.222 & 17.5 & 14.0 & 0 & 13.1\% & 766.06 & 1.92 \\ +EBM (T+E) & 1383${}_{-33,+31}$ & 0.186 & 19.1 & 12.5 & 0 & 15.7\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1379${}_{-31,+39}$ & 0.181 & 19.2 & 16.8 & 0 & 13.4\% & 766.06 & 0.28 \\ +RealMLP (T) & 1379${}_{-42,+36}$ & 0.201 & 19.3 & 14.8 & 0 & 13.4\% & 5786.69 & 0.25 \\ +ModernNCA (T+E) & 1375${}_{-38,+35}$ & 0.307 & 19.5 & 8.6 & 0 & 12.7\% & 5944.88 & 8.40 \\ +ModernNCA (D) & 1367${}_{-33,+31}$ & 0.183 & 19.8 & 10.0 & 1 & 15.1\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-32,+33}$ & 0.199 & 20.3 & 15.0 & 0 & 13.3\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1356${}_{-35,+32}$ & 0.230 & 20.2 & 11.2 & 0 & 15.7\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1323${}_{-33,+24}$ & 0.258 & 22.0 & 6.1 & 2 & 14.8\% & 27.49 & 8.86 \\ +EBM (T) & 1320${}_{-41,+33}$ & 0.120 & 22.1 & 17.3 & 0 & 16.5\% & 1109.06 & 0.03 \\ +EBM (D) & 1299${}_{-35,+37}$ & 0.153 & 23.0 & 9.7 & 1 & 17.3\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1269${}_{-40,+38}$ & 0.113 & 24.5 & 19.1 & 0 & 17.2\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-47,+27}$ & 0.102 & 25.0 & 17.4 & 0 & 18.2\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1238${}_{-33,+33}$ & 0.086 & 25.8 & 22.0 & 0 & 15.9\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1234${}_{-36,+37}$ & 0.083 & 26.1 & 21.8 & 0 & 15.9\% & 36.24 & 0.28 \\ +XGBoost (D) & 1213${}_{-32,+41}$ & 0.032 & 27.1 & 24.8 & 0 & 16.7\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1200${}_{-32,+35}$ & 0.072 & 27.5 & 22.6 & 0 & 19.4\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1157${}_{-30,+36}$ & 0.057 & 29.5 & 22.0 & 0 & 19.5\% & 527.42 & 1.39 \\ +LightGBM (D) & 1154${}_{-29,+39}$ & 0.030 & 29.6 & 27.7 & 0 & 17.7\% & 2.90 & 0.13 \\ +RandomForest (T) & 1096${}_{-31,+36}$ & 0.023 & 31.9 & 29.1 & 0 & 20.8\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1082${}_{-34,+36}$ & 0.030 & 32.3 & 28.8 & 0 & 21.4\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1051${}_{-33,+31}$ & 0.019 & 33.6 & 31.0 & 0 & 23.0\% & 4.73 & 0.62 \\ +Linear (T+E) & 1030${}_{-41,+39}$ & 0.055 & 34.2 & 22.9 & 0 & 28.3\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 35.3 & 33.4 & 0 & 25.8\% & 0.45 & 0.07 \\ +Linear (T) & 997${}_{-41,+37}$ & 0.035 & 35.2 & 28.4 & 0 & 29.0\% & 88.63 & 0.09 \\ +Linear (D) & 982${}_{-39,+42}$ & 0.025 & 35.7 & 25.0 & 0 & 30.5\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 956${}_{-51,+44}$ & 0.015 & 36.7 & 32.9 & 0 & 27.5\% & 0.40 & 0.07 \\ +KNN (T+E) & 704${}_{-46,+50}$ & 0.000 & 41.6 & 41.2 & 0 & 48.0\% & 3.71 & 0.17 \\ +KNN (T) & 609${}_{-48,+53}$ & 0.000 & 42.7 & 42.5 & 0 & 50.1\% & 3.71 & 0.04 \\ +KNN (D) & 408${}_{-96,+87}$ & 0.000 & 44.2 & 44.1 & 0 & 59.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-tabicl-cls/tabarena_leaderboard.csv b/data/tabpfn-tabicl-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9cfacb0044514fd9fcb6b0fd592ac532d061c310 --- /dev/null +++ b/data/tabpfn-tabicl-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.36592302109392316,0.046199591967015556,0.028953147052208875,72438.91792383758,5032.936436246271,5.788461538461538,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4068697696081146,0.021640889295981347,0.016874625134937742,43032.16406688419,2931.913108083719,3.5,8,4,1,13,1736.3,45.7,44.6,0.8911713286713286,0.46269305993659526 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.547775172494987,0.09238776741660952,0.04985850997225548,43123.835967101855,114.22499982092596,9.307692307692308,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5540981099375837,0.04617144277378826,0.034316541138176816,44088.87353980956,109.46959390379024,8.0,2,1,1,22,1612.5,34.1,36.5,0.8111888111888111,0.22027167779430223 +TABICL (default),21.933824618351764,2.7954679915028757,10.278805574502684,2.035731958506663,0.45732829959408833,0.5181742657898207,0.07108757868512468,0.03468928052253071,145.15967621212897,119.47187491598106,9.48076923076923,0.17207,19.54939921696981,2.1857474181387158,8.891901835466445,1.7433667301105085,0.46959481282562443,0.583210155619012,0.03487584587193143,0.02062556592021359,120.41628593220491,98.62751622705991,8.0,4,3,1,18,1607.5,35.1,33.8,0.8072552447552448,0.3008875223205248 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.48513499648162556,0.08179742986099706,0.05007438478688923,72438.91792383758,165.31166707386078,10.326923076923077,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.46901679172287375,0.05804204088839793,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,8,1,16,1588.5,31.1,41.1,0.7880244755244755,0.26951399319966235 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5877560111308218,0.10395608954668528,0.05250154829922825,112219.73072981052,290.95193637081104,10.865384615384615,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5635752491354304,0.051850568481074444,0.04632297855309826,87871.6182808383,267.1363813998378,9.0,0,0,3,23,1572.9,29.0,36.1,0.7757867132867133,0.1417961559305308 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5522297755522069,0.09461405000435809,0.05278876561410328,39712.28933278582,151.42916243848958,11.884615384615385,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.565709489217623,0.04762778419021996,0.04116173129591805,35357.34871646478,128.98489015193604,8.5,1,2,0,23,1544.1,33.4,44.0,0.7526223776223776,0.17858871441651328 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5669313754038009,0.09512311589648129,0.06286359741165812,76.29505964660768,41.758850754001486,12.038461538461538,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5238664820190138,0.05018180969629027,0.021765272352146064,58.62232186481185,25.39697585954705,5.0,4,1,4,17,1541.6,37.1,37.8,0.7491258741258742,0.2935579164181083 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6837279086393804,0.1176313686476465,0.061906173521871284,10455.248725429481,156.11534656534064,12.903846153846153,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.7073670918352166,0.051567085599247775,0.05140473915859429,9480.825611975059,103.35126359750211,12.5,1,0,2,23,1521.0,39.2,34.8,0.729458041958042,0.14042554762228465 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.6024458446591694,0.10415143904294016,0.06347344157931543,43123.835967101855,11.305450805467302,13.346153846153847,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.637432321817063,0.06661219922938116,0.04216185168235247,44088.87353980956,9.589800419015791,13.0,0,2,1,23,1511.6,29.3,40.0,0.7194055944055944,0.13250415685370437 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.7058815019854675,0.12190922144153328,0.06697491769463343,10455.248725429481,28.289876597893098,15.326923076923077,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7126095533835781,0.05516713547384128,0.05173099008418312,9480.825611975059,16.480248719877835,15.5,0,0,0,26,1461.6,33.0,33.6,0.6743881118881119,0.07518973620464424 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.7126569304674935,0.11678549305379037,0.07069284985664498,27448.91121504087,44.04781428274726,15.76923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.720606902542752,0.0655088388746699,0.053959365141496254,21194.263639925673,39.04121549244424,16.0,0,0,1,25,1455.5,38.0,31.7,0.6643356643356644,0.08640388177736971 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7320002632947487,0.11966485384248739,0.07092155545579401,27448.91121504087,5.822055889865127,16.53846153846154,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7742727557913103,0.06254942442191513,0.056743152931981594,21194.263639925673,5.623705460666335,16.5,0,1,0,25,1435.3,35.7,31.2,0.6468531468531469,0.09500537263224745 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7180270297124032,0.1369795645146118,0.07804754343441613,189.57549613749592,9.978425427673205,16.96153846153846,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.7370120765139465,0.07020888606704828,0.04168408531342971,140.5904555433362,9.536419533323254,13.5,0,0,0,26,1429.3,34.7,31.2,0.6372377622377622,0.08930659071409167 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.773058737636122,0.1329171714368633,0.07483897220415464,477.2608484422213,7.423108461919998,17.0,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8295699236647479,0.07003170550807408,0.03759488482131658,107.22813859237792,5.910285969899903,17.5,0,2,0,24,1425.0,31.1,34.5,0.6363636363636364,0.10014897524065079 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6637614958528275,0.11651895782038843,0.07336435140526076,77920.67944321278,26.83390437537825,17.057692307692307,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7235503806350614,0.06755654915093501,0.057542536642744974,66956.02864547497,22.936826653686147,16.5,1,0,0,25,1425.9,36.9,29.1,0.6350524475524476,0.11459331361587798 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7582815836411924,0.13102530851919714,0.08005807343057435,11800.394028929062,144.45205787298235,17.51923076923077,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.7791345165987353,0.07343601410090844,0.05291664219529325,11520.40078776449,81.51508510452376,15.25,0,0,0,26,1415.4,35.8,32.1,0.6245629370629371,0.07166187524738382 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8130615556926872,0.1572702096615729,0.11477484338056507,16692.42562947248,18.328617946180266,19.076923076923077,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8266966373060847,0.0849735392053248,0.05122044741314488,17695.477225433653,12.287120931350431,18.5,0,0,1,25,1383.1,30.7,32.2,0.5891608391608392,0.08004680984485021 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7809467391114648,0.1335476211929174,0.08261887953032405,11800.394028929062,30.013383573853858,19.192307692307693,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7799972199454441,0.07798856953005051,0.052696435285602104,11520.40078776449,11.544680214373663,19.5,0,0,0,26,1379.3,39.0,30.4,0.5865384615384616,0.05963545192718714 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7337946286494308,0.13385871828365248,0.07632560949832082,112219.73072981052,14.318352639712598,19.346153846153847,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.74474927717263,0.07816480714694801,0.061570080954222194,87871.6182808383,11.529830439504863,17.0,0,0,0,26,1379.0,35.5,42.0,0.583041958041958,0.06741843156688952 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6676223600444691,0.12705648224037935,0.09495265900961732,77920.67944321278,542.9013751751211,19.46153846153846,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6992013611051189,0.10101560653736502,0.048750216328898435,66956.02864547497,426.41545163369784,15.0,0,1,3,22,1375.2,34.1,37.3,0.5804195804195804,0.11686086342849132 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7625155608140191,0.15085268931164045,0.08288629881402086,190.09277232716437,21.03504265137771,19.76923076923077,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7827163737065492,0.07974631950048428,0.053259111831894385,190.7747999331715,15.595042220247258,21.0,1,0,0,25,1366.6,30.2,32.3,0.5734265734265734,0.10017785315060294 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7686639558056911,0.1568263488914938,0.08996191783618754,20955.495033608,514.94962880429,20.25,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8177131221302221,0.08132829350115817,0.05523334112431259,17602.335588839425,531.7232414230566,20.0,0,1,0,25,1356.1,31.7,35.0,0.5625,0.08900824287112627 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.764065484436084,0.1332555944462707,0.07705265128451745,56068.072795326945,181.3894555134842,20.28846153846154,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.8634802144576847,0.06782483230561193,0.06047718336789526,51500.257380537965,155.60936197975602,20.0,0,0,0,26,1356.4,32.6,31.9,0.5616258741258742,0.06668189048939435 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7373052354542396,0.1481225969644462,0.09258934520941622,529.9323761499634,1297.090837487081,22.0,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.88150849404269,0.06691814537901497,0.04715293243733788,513.7039796057306,1113.0844664803024,25.5,2,0,3,21,1323.2,24.0,32.4,0.5227272727272727,0.16307441111325852 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.848789281667686,0.16506052478039396,0.12247708467085656,16692.42562947248,2.268478500812272,22.134615384615383,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8902021091214286,0.0928869358898275,0.062194782256558334,17695.477225433653,1.252412448907763,24.0,0,0,0,26,1319.5,32.8,40.9,0.5196678321678322,0.0577362774386474 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8496093431986085,0.1725067485868843,0.124266558571261,75.26102802648397,3.652629917649085,23.0,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8953860150359447,0.09554880640873625,0.056563353282588213,77.97362526017017,3.340101142403973,23.0,1,0,2,23,1298.8,36.2,34.1,0.5,0.10350085258304385 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8336048750524725,0.1719892484242808,0.10544003385780601,20955.495033608,39.61972184578173,24.48076923076923,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8551942745167906,0.09660910797742411,0.07038888057026207,17602.335588839425,37.14448504861147,23.0,0,0,0,26,1269.2,37.8,39.5,0.46634615384615385,0.0522325290233973 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8650357556797086,0.18213060995030939,0.12358064556047872,6037.128774148354,86.31947081771706,25.0,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.927972542444979,0.09673248729372463,0.07594781768304604,5339.627074654447,77.0823275943278,28.25,0,0,0,26,1260.2,26.7,47.0,0.45454545454545453,0.057367362828272636 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.850631259028558,0.15913478602343065,0.0993949625945807,56068.072795326945,10.018943215736572,25.78846153846154,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9108681738244536,0.10379368194908523,0.09268904724445692,51500.257380537965,8.681540922799902,27.0,0,0,0,26,1238.5,32.5,32.6,0.4366258741258741,0.045366926667074474 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.8569208298595739,0.15925534618050793,0.09850483276040865,704.4596577225865,14.367018773597605,26.057692307692307,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.9211958531631512,0.11529538667598471,0.06746878542921386,536.9122103749642,12.116250573587863,28.5,0,0,0,26,1233.8,37.0,35.4,0.430506993006993,0.04593533182737854 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.9070085519838873,0.16741259421914897,0.12284060876198313,37.42364826794432,14.932177592236862,27.057692307692307,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.9518900673045412,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.75,0,0,0,26,1212.8,40.2,31.9,0.40777972027972026,0.04033171361980777 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.8944270214472317,0.19352158162401475,0.1332600566819766,6037.128774148354,9.800458404720944,27.5,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.9688071317540197,0.10725621698653792,0.08413965072039432,5339.627074654447,8.581565117405717,31.5,0,0,0,26,1200.5,34.6,31.8,0.3977272727272727,0.04426880847584831 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9114679294811889,0.19492401068058715,0.147796519544612,6794.043055663636,81.1610891109226,29.46153846153846,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9937951111561848,0.11848332500543385,0.09151654391313321,6269.6136687159,75.55960345229795,31.5,0,0,0,26,1156.9,35.3,29.3,0.3531468531468531,0.04551104983248137 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.9226046869734794,0.17658302669842518,0.11946139446934875,42.11806124941037,9.792256635961557,29.576923076923077,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9543370010011993,0.11805347934940252,0.09964492878261602,38.718264562373335,7.411495765404641,29.5,0,0,0,26,1154.1,38.8,28.9,0.3505244755244755,0.036096329534928126 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9400975627550481,0.20796627140051763,0.15738142731874985,6794.043055663636,8.85724716182767,31.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9949804676438307,0.13224551901999893,0.10601779560500876,6269.6136687159,8.226247917103098,33.0,0,0,0,26,1096.4,35.3,30.8,0.2972027972027972,0.03435712054173671 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9391775521500288,0.2143711666740534,0.1478455999492002,188.3149665224302,10.435316060204023,32.32692307692308,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9983912483912485,0.14318031310217677,0.1051705699943664,197.09431521312473,8.356657050571908,33.0,0,0,0,26,1082.4,35.8,33.5,0.2880244755244755,0.03477287158574056 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9403855906437737,0.2303447532418144,0.17838953792880163,79.38940070919118,32.314469844151965,33.61538461538461,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9998853211009175,0.16088565720086695,0.11423976777713259,63.05950085745769,32.34956133934325,36.5,0,0,0,26,1051.3,30.1,32.4,0.25874125874125875,0.03221855964948352 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9377863465231412,0.28317417183852894,0.24822823084079834,1448.7019616321465,20.41052886468638,34.23076923076923,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.21769005088471027,0.1779543124949825,1246.7830478036524,13.25530093456624,38.0,0,0,1,25,1030.4,38.5,40.3,0.24475524475524477,0.04368314033358164 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.943877751101422,0.2902155463327537,0.25773756332102926,1448.7019616321465,6.009867470278231,35.21153846153846,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.22518752798442315,0.1834788883245299,1246.7830478036524,4.016028876859062,38.0,0,0,0,26,996.9,36.4,40.6,0.22246503496503497,0.03516566041445386 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9701912114787967,0.25755513106395067,0.2479832267773854,6.6754175478812146,4.153857105918795,35.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.502947097147378,37.5,0,0,0,26,1000.0,0.0,0.0,0.22071678321678323,0.029920050824686344 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9520629989740693,0.305476365274656,0.29332097764816845,36.73103788870433,7.308777223189359,35.67307692307692,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22930816077470617,0.22370793213879017,31.50980214431158,4.703818974402271,39.5,0,0,1,25,982.3,41.6,38.5,0.21197552447552448,0.03995938942367327 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9642266228854909,0.2753142737967973,0.27671727525790224,6.0304600748852675,4.406716139948246,36.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18424442460299273,0.1562338348801046,5.148113375558061,3.7424721126192138,39.0,0,0,0,26,955.5,43.9,50.8,0.1896853146853147,0.030378669430214802 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9945521006475282,0.47993514547383553,0.5914832997832359,57.836601086323085,11.285407568076662,41.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4552264030449861,0.6183873456591064,55.06974575171796,8.266636963631449,43.0,0,0,0,26,703.5,49.4,45.5,0.0777972027972028,0.024279443329967162 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966557577121117,0.5010874288021712,0.6455640735444151,57.836601086323085,2.5027008595459077,42.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,44.0,0,0,0,26,609.2,52.7,48.0,0.05244755244755245,0.023502270498812147 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5923454279012129,0.9491257378249622,1.00065410372272,1.4527699655062962,44.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6201400300158697,1.0,1.0,1.0057335151210618,45.0,0,0,0,26,407.8,86.4,95.3,0.017045454545454544,0.022663376691509682 diff --git a/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf b/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b9e514d68368e1e6c78f4d305ba10f64b756fcf1 Binary files /dev/null and b/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4f6908fba6c41ba3576608b66bc23a0647bbfbc2 --- /dev/null +++ b/data/tabpfn-tabicl-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e190a64928288c6f71df2881dc9fadd779b153432c7535ca3ddd8d9d6b7fa4dd +size 139645 diff --git a/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.pdf b/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7b6bfc794b51ee972fdede362d44c2ecd32739f2 Binary files /dev/null and b/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.png.zip b/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24e08506593fee7a8727aefce7940752470d6e76 --- /dev/null +++ b/data/tabpfn-tabicl-imputed-cls/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fd50f8530f0264c02045700850c01bf4b212bcd6257fa3321f09e02e249a62 +size 319568 diff --git a/data/tabpfn-tabicl-imputed-cls/leaderboard.tex b/data/tabpfn-tabicl-imputed-cls/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..b41ff65b5428b5f7a450717fe825914a05f0b501 --- /dev/null +++ b/data/tabpfn-tabicl-imputed-cls/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1736${}_{-45,+46}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{4.6\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1612${}_{-37,+35}$}} & 0.483 & \textcolor{silver}{\textbf{9.3}} & 4.5 & 2 & 9.2\% & 2828.45 & 1.60 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1608${}_{-34,+36}$}} & \textcolor{bronze}{\textbf{0.543}} & \textcolor{bronze}{\textbf{9.5}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{4}} & \textcolor{silver}{\textbf{7.1\%}} & 8.89 & 1.74 \\ +TabPFNv2 (T) & 1588${}_{-42,+32}$ & \textcolor{silver}{\textbf{0.544}} & 10.3 & 3.7 & 1 & \textcolor{bronze}{\textbf{8.2\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1573${}_{-37,+29}$ & 0.402 & 10.9 & 7.1 & 0 & 10.4\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1544${}_{-44,+34}$ & 0.440 & 11.9 & 5.6 & 1 & 9.5\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1542${}_{-38,+38}$ & 0.499 & 12.0 & \textcolor{bronze}{\textbf{3.4}} & \textcolor{silver}{\textbf{4}} & 9.5\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1521${}_{-35,+40}$ & 0.319 & 12.9 & 7.1 & 1 & 11.8\% & 647.56 & 1.72 \\ +TabM (T) & 1512${}_{-40,+30}$ & 0.387 & 13.3 & 7.5 & 0 & 10.4\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1462${}_{-34,+33}$ & 0.262 & 15.3 & 13.3 & 0 & 12.2\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1456${}_{-32,+38}$ & 0.271 & 15.8 & 11.6 & 0 & 11.7\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1435${}_{-32,+36}$ & 0.247 & 16.5 & 10.5 & 0 & 12.0\% & 1465.86 & 0.09 \\ +TabM (D) & 1429${}_{-32,+35}$ & 0.288 & 17.0 & 11.2 & 0 & 13.7\% & 10.42 & 0.15 \\ +ModernNCA (T) & 1426${}_{-30,+37}$ & 0.255 & 17.1 & 8.7 & 1 & 11.7\% & 5944.88 & 0.52 \\ +CatBoost (D) & 1425${}_{-35,+32}$ & 0.228 & 17.0 & 10.0 & 0 & 13.3\% & 5.72 & 0.11 \\ +XGBoost (T+E) & 1415${}_{-33,+36}$ & 0.222 & 17.5 & 14.0 & 0 & 13.1\% & 766.06 & 1.92 \\ +EBM (T+E) & 1383${}_{-33,+31}$ & 0.186 & 19.1 & 12.5 & 0 & 15.7\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1379${}_{-31,+39}$ & 0.181 & 19.2 & 16.8 & 0 & 13.4\% & 766.06 & 0.28 \\ +RealMLP (T) & 1379${}_{-42,+36}$ & 0.201 & 19.3 & 14.8 & 0 & 13.4\% & 5786.69 & 0.25 \\ +ModernNCA (T+E) & 1375${}_{-38,+35}$ & 0.307 & 19.5 & 8.6 & 0 & 12.7\% & 5944.88 & 8.40 \\ +ModernNCA (D) & 1367${}_{-33,+31}$ & 0.183 & 19.8 & 10.0 & 1 & 15.1\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-32,+33}$ & 0.199 & 20.3 & 15.0 & 0 & 13.3\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1356${}_{-35,+32}$ & 0.230 & 20.2 & 11.2 & 0 & 15.7\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1323${}_{-33,+24}$ & 0.258 & 22.0 & 6.1 & 2 & 14.8\% & 27.49 & 8.86 \\ +EBM (T) & 1320${}_{-41,+33}$ & 0.120 & 22.1 & 17.3 & 0 & 16.5\% & 1109.06 & 0.03 \\ +EBM (D) & 1299${}_{-35,+37}$ & 0.153 & 23.0 & 9.7 & 1 & 17.3\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1269${}_{-40,+38}$ & 0.113 & 24.5 & 19.1 & 0 & 17.2\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-47,+27}$ & 0.102 & 25.0 & 17.4 & 0 & 18.2\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1238${}_{-33,+33}$ & 0.086 & 25.8 & 22.0 & 0 & 15.9\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1234${}_{-36,+37}$ & 0.083 & 26.1 & 21.8 & 0 & 15.9\% & 36.24 & 0.28 \\ +XGBoost (D) & 1213${}_{-32,+41}$ & 0.032 & 27.1 & 24.8 & 0 & 16.7\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1200${}_{-32,+35}$ & 0.072 & 27.5 & 22.6 & 0 & 19.4\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1157${}_{-30,+36}$ & 0.057 & 29.5 & 22.0 & 0 & 19.5\% & 527.42 & 1.39 \\ +LightGBM (D) & 1154${}_{-29,+39}$ & 0.030 & 29.6 & 27.7 & 0 & 17.7\% & 2.90 & 0.13 \\ +RandomForest (T) & 1096${}_{-31,+36}$ & 0.023 & 31.9 & 29.1 & 0 & 20.8\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1082${}_{-34,+36}$ & 0.030 & 32.3 & 28.8 & 0 & 21.4\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1051${}_{-33,+31}$ & 0.019 & 33.6 & 31.0 & 0 & 23.0\% & 4.73 & 0.62 \\ +Linear (T+E) & 1030${}_{-41,+39}$ & 0.055 & 34.2 & 22.9 & 0 & 28.3\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 35.3 & 33.4 & 0 & 25.8\% & 0.45 & 0.07 \\ +Linear (T) & 997${}_{-41,+37}$ & 0.035 & 35.2 & 28.4 & 0 & 29.0\% & 88.63 & 0.09 \\ +Linear (D) & 982${}_{-39,+42}$ & 0.025 & 35.7 & 25.0 & 0 & 30.5\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 956${}_{-51,+44}$ & 0.015 & 36.7 & 32.9 & 0 & 27.5\% & 0.40 & 0.07 \\ +KNN (T+E) & 704${}_{-46,+50}$ & 0.000 & 41.6 & 41.2 & 0 & 48.0\% & 3.71 & 0.17 \\ +KNN (T) & 609${}_{-48,+53}$ & 0.000 & 42.7 & 42.5 & 0 & 50.1\% & 3.71 & 0.04 \\ +KNN (D) & 408${}_{-96,+87}$ & 0.000 & 44.2 & 44.1 & 0 & 59.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-tabicl-imputed-cls/tabarena_leaderboard.csv b/data/tabpfn-tabicl-imputed-cls/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9cfacb0044514fd9fcb6b0fd592ac532d061c310 --- /dev/null +++ b/data/tabpfn-tabicl-imputed-cls/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.36592302109392316,0.046199591967015556,0.028953147052208875,72438.91792383758,5032.936436246271,5.788461538461538,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4068697696081146,0.021640889295981347,0.016874625134937742,43032.16406688419,2931.913108083719,3.5,8,4,1,13,1736.3,45.7,44.6,0.8911713286713286,0.46269305993659526 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.547775172494987,0.09238776741660952,0.04985850997225548,43123.835967101855,114.22499982092596,9.307692307692308,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5540981099375837,0.04617144277378826,0.034316541138176816,44088.87353980956,109.46959390379024,8.0,2,1,1,22,1612.5,34.1,36.5,0.8111888111888111,0.22027167779430223 +TABICL (default),21.933824618351764,2.7954679915028757,10.278805574502684,2.035731958506663,0.45732829959408833,0.5181742657898207,0.07108757868512468,0.03468928052253071,145.15967621212897,119.47187491598106,9.48076923076923,0.17207,19.54939921696981,2.1857474181387158,8.891901835466445,1.7433667301105085,0.46959481282562443,0.583210155619012,0.03487584587193143,0.02062556592021359,120.41628593220491,98.62751622705991,8.0,4,3,1,18,1607.5,35.1,33.8,0.8072552447552448,0.3008875223205248 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.48513499648162556,0.08179742986099706,0.05007438478688923,72438.91792383758,165.31166707386078,10.326923076923077,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.46901679172287375,0.05804204088839793,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,8,1,16,1588.5,31.1,41.1,0.7880244755244755,0.26951399319966235 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5877560111308218,0.10395608954668528,0.05250154829922825,112219.73072981052,290.95193637081104,10.865384615384615,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5635752491354304,0.051850568481074444,0.04632297855309826,87871.6182808383,267.1363813998378,9.0,0,0,3,23,1572.9,29.0,36.1,0.7757867132867133,0.1417961559305308 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5522297755522069,0.09461405000435809,0.05278876561410328,39712.28933278582,151.42916243848958,11.884615384615385,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.565709489217623,0.04762778419021996,0.04116173129591805,35357.34871646478,128.98489015193604,8.5,1,2,0,23,1544.1,33.4,44.0,0.7526223776223776,0.17858871441651328 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5669313754038009,0.09512311589648129,0.06286359741165812,76.29505964660768,41.758850754001486,12.038461538461538,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5238664820190138,0.05018180969629027,0.021765272352146064,58.62232186481185,25.39697585954705,5.0,4,1,4,17,1541.6,37.1,37.8,0.7491258741258742,0.2935579164181083 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6837279086393804,0.1176313686476465,0.061906173521871284,10455.248725429481,156.11534656534064,12.903846153846153,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.7073670918352166,0.051567085599247775,0.05140473915859429,9480.825611975059,103.35126359750211,12.5,1,0,2,23,1521.0,39.2,34.8,0.729458041958042,0.14042554762228465 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.6024458446591694,0.10415143904294016,0.06347344157931543,43123.835967101855,11.305450805467302,13.346153846153847,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.637432321817063,0.06661219922938116,0.04216185168235247,44088.87353980956,9.589800419015791,13.0,0,2,1,23,1511.6,29.3,40.0,0.7194055944055944,0.13250415685370437 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.7058815019854675,0.12190922144153328,0.06697491769463343,10455.248725429481,28.289876597893098,15.326923076923077,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7126095533835781,0.05516713547384128,0.05173099008418312,9480.825611975059,16.480248719877835,15.5,0,0,0,26,1461.6,33.0,33.6,0.6743881118881119,0.07518973620464424 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.7126569304674935,0.11678549305379037,0.07069284985664498,27448.91121504087,44.04781428274726,15.76923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.720606902542752,0.0655088388746699,0.053959365141496254,21194.263639925673,39.04121549244424,16.0,0,0,1,25,1455.5,38.0,31.7,0.6643356643356644,0.08640388177736971 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7320002632947487,0.11966485384248739,0.07092155545579401,27448.91121504087,5.822055889865127,16.53846153846154,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7742727557913103,0.06254942442191513,0.056743152931981594,21194.263639925673,5.623705460666335,16.5,0,1,0,25,1435.3,35.7,31.2,0.6468531468531469,0.09500537263224745 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7180270297124032,0.1369795645146118,0.07804754343441613,189.57549613749592,9.978425427673205,16.96153846153846,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.7370120765139465,0.07020888606704828,0.04168408531342971,140.5904555433362,9.536419533323254,13.5,0,0,0,26,1429.3,34.7,31.2,0.6372377622377622,0.08930659071409167 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.773058737636122,0.1329171714368633,0.07483897220415464,477.2608484422213,7.423108461919998,17.0,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8295699236647479,0.07003170550807408,0.03759488482131658,107.22813859237792,5.910285969899903,17.5,0,2,0,24,1425.0,31.1,34.5,0.6363636363636364,0.10014897524065079 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6637614958528275,0.11651895782038843,0.07336435140526076,77920.67944321278,26.83390437537825,17.057692307692307,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7235503806350614,0.06755654915093501,0.057542536642744974,66956.02864547497,22.936826653686147,16.5,1,0,0,25,1425.9,36.9,29.1,0.6350524475524476,0.11459331361587798 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7582815836411924,0.13102530851919714,0.08005807343057435,11800.394028929062,144.45205787298235,17.51923076923077,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.7791345165987353,0.07343601410090844,0.05291664219529325,11520.40078776449,81.51508510452376,15.25,0,0,0,26,1415.4,35.8,32.1,0.6245629370629371,0.07166187524738382 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8130615556926872,0.1572702096615729,0.11477484338056507,16692.42562947248,18.328617946180266,19.076923076923077,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8266966373060847,0.0849735392053248,0.05122044741314488,17695.477225433653,12.287120931350431,18.5,0,0,1,25,1383.1,30.7,32.2,0.5891608391608392,0.08004680984485021 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7809467391114648,0.1335476211929174,0.08261887953032405,11800.394028929062,30.013383573853858,19.192307692307693,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7799972199454441,0.07798856953005051,0.052696435285602104,11520.40078776449,11.544680214373663,19.5,0,0,0,26,1379.3,39.0,30.4,0.5865384615384616,0.05963545192718714 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7337946286494308,0.13385871828365248,0.07632560949832082,112219.73072981052,14.318352639712598,19.346153846153847,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.74474927717263,0.07816480714694801,0.061570080954222194,87871.6182808383,11.529830439504863,17.0,0,0,0,26,1379.0,35.5,42.0,0.583041958041958,0.06741843156688952 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6676223600444691,0.12705648224037935,0.09495265900961732,77920.67944321278,542.9013751751211,19.46153846153846,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6992013611051189,0.10101560653736502,0.048750216328898435,66956.02864547497,426.41545163369784,15.0,0,1,3,22,1375.2,34.1,37.3,0.5804195804195804,0.11686086342849132 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7625155608140191,0.15085268931164045,0.08288629881402086,190.09277232716437,21.03504265137771,19.76923076923077,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7827163737065492,0.07974631950048428,0.053259111831894385,190.7747999331715,15.595042220247258,21.0,1,0,0,25,1366.6,30.2,32.3,0.5734265734265734,0.10017785315060294 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7686639558056911,0.1568263488914938,0.08996191783618754,20955.495033608,514.94962880429,20.25,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8177131221302221,0.08132829350115817,0.05523334112431259,17602.335588839425,531.7232414230566,20.0,0,1,0,25,1356.1,31.7,35.0,0.5625,0.08900824287112627 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.764065484436084,0.1332555944462707,0.07705265128451745,56068.072795326945,181.3894555134842,20.28846153846154,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.8634802144576847,0.06782483230561193,0.06047718336789526,51500.257380537965,155.60936197975602,20.0,0,0,0,26,1356.4,32.6,31.9,0.5616258741258742,0.06668189048939435 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7373052354542396,0.1481225969644462,0.09258934520941622,529.9323761499634,1297.090837487081,22.0,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.88150849404269,0.06691814537901497,0.04715293243733788,513.7039796057306,1113.0844664803024,25.5,2,0,3,21,1323.2,24.0,32.4,0.5227272727272727,0.16307441111325852 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.848789281667686,0.16506052478039396,0.12247708467085656,16692.42562947248,2.268478500812272,22.134615384615383,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8902021091214286,0.0928869358898275,0.062194782256558334,17695.477225433653,1.252412448907763,24.0,0,0,0,26,1319.5,32.8,40.9,0.5196678321678322,0.0577362774386474 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8496093431986085,0.1725067485868843,0.124266558571261,75.26102802648397,3.652629917649085,23.0,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8953860150359447,0.09554880640873625,0.056563353282588213,77.97362526017017,3.340101142403973,23.0,1,0,2,23,1298.8,36.2,34.1,0.5,0.10350085258304385 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8336048750524725,0.1719892484242808,0.10544003385780601,20955.495033608,39.61972184578173,24.48076923076923,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8551942745167906,0.09660910797742411,0.07038888057026207,17602.335588839425,37.14448504861147,23.0,0,0,0,26,1269.2,37.8,39.5,0.46634615384615385,0.0522325290233973 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8650357556797086,0.18213060995030939,0.12358064556047872,6037.128774148354,86.31947081771706,25.0,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.927972542444979,0.09673248729372463,0.07594781768304604,5339.627074654447,77.0823275943278,28.25,0,0,0,26,1260.2,26.7,47.0,0.45454545454545453,0.057367362828272636 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.850631259028558,0.15913478602343065,0.0993949625945807,56068.072795326945,10.018943215736572,25.78846153846154,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9108681738244536,0.10379368194908523,0.09268904724445692,51500.257380537965,8.681540922799902,27.0,0,0,0,26,1238.5,32.5,32.6,0.4366258741258741,0.045366926667074474 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.8569208298595739,0.15925534618050793,0.09850483276040865,704.4596577225865,14.367018773597605,26.057692307692307,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.9211958531631512,0.11529538667598471,0.06746878542921386,536.9122103749642,12.116250573587863,28.5,0,0,0,26,1233.8,37.0,35.4,0.430506993006993,0.04593533182737854 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.9070085519838873,0.16741259421914897,0.12284060876198313,37.42364826794432,14.932177592236862,27.057692307692307,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.9518900673045412,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.75,0,0,0,26,1212.8,40.2,31.9,0.40777972027972026,0.04033171361980777 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.8944270214472317,0.19352158162401475,0.1332600566819766,6037.128774148354,9.800458404720944,27.5,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.9688071317540197,0.10725621698653792,0.08413965072039432,5339.627074654447,8.581565117405717,31.5,0,0,0,26,1200.5,34.6,31.8,0.3977272727272727,0.04426880847584831 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9114679294811889,0.19492401068058715,0.147796519544612,6794.043055663636,81.1610891109226,29.46153846153846,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9937951111561848,0.11848332500543385,0.09151654391313321,6269.6136687159,75.55960345229795,31.5,0,0,0,26,1156.9,35.3,29.3,0.3531468531468531,0.04551104983248137 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.9226046869734794,0.17658302669842518,0.11946139446934875,42.11806124941037,9.792256635961557,29.576923076923077,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9543370010011993,0.11805347934940252,0.09964492878261602,38.718264562373335,7.411495765404641,29.5,0,0,0,26,1154.1,38.8,28.9,0.3505244755244755,0.036096329534928126 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9400975627550481,0.20796627140051763,0.15738142731874985,6794.043055663636,8.85724716182767,31.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9949804676438307,0.13224551901999893,0.10601779560500876,6269.6136687159,8.226247917103098,33.0,0,0,0,26,1096.4,35.3,30.8,0.2972027972027972,0.03435712054173671 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9391775521500288,0.2143711666740534,0.1478455999492002,188.3149665224302,10.435316060204023,32.32692307692308,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9983912483912485,0.14318031310217677,0.1051705699943664,197.09431521312473,8.356657050571908,33.0,0,0,0,26,1082.4,35.8,33.5,0.2880244755244755,0.03477287158574056 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9403855906437737,0.2303447532418144,0.17838953792880163,79.38940070919118,32.314469844151965,33.61538461538461,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9998853211009175,0.16088565720086695,0.11423976777713259,63.05950085745769,32.34956133934325,36.5,0,0,0,26,1051.3,30.1,32.4,0.25874125874125875,0.03221855964948352 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9377863465231412,0.28317417183852894,0.24822823084079834,1448.7019616321465,20.41052886468638,34.23076923076923,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.21769005088471027,0.1779543124949825,1246.7830478036524,13.25530093456624,38.0,0,0,1,25,1030.4,38.5,40.3,0.24475524475524477,0.04368314033358164 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.943877751101422,0.2902155463327537,0.25773756332102926,1448.7019616321465,6.009867470278231,35.21153846153846,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.22518752798442315,0.1834788883245299,1246.7830478036524,4.016028876859062,38.0,0,0,0,26,996.9,36.4,40.6,0.22246503496503497,0.03516566041445386 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9701912114787967,0.25755513106395067,0.2479832267773854,6.6754175478812146,4.153857105918795,35.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.502947097147378,37.5,0,0,0,26,1000.0,0.0,0.0,0.22071678321678323,0.029920050824686344 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9520629989740693,0.305476365274656,0.29332097764816845,36.73103788870433,7.308777223189359,35.67307692307692,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22930816077470617,0.22370793213879017,31.50980214431158,4.703818974402271,39.5,0,0,1,25,982.3,41.6,38.5,0.21197552447552448,0.03995938942367327 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9642266228854909,0.2753142737967973,0.27671727525790224,6.0304600748852675,4.406716139948246,36.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18424442460299273,0.1562338348801046,5.148113375558061,3.7424721126192138,39.0,0,0,0,26,955.5,43.9,50.8,0.1896853146853147,0.030378669430214802 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9945521006475282,0.47993514547383553,0.5914832997832359,57.836601086323085,11.285407568076662,41.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4552264030449861,0.6183873456591064,55.06974575171796,8.266636963631449,43.0,0,0,0,26,703.5,49.4,45.5,0.0777972027972028,0.024279443329967162 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966557577121117,0.5010874288021712,0.6455640735444151,57.836601086323085,2.5027008595459077,42.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,44.0,0,0,0,26,609.2,52.7,48.0,0.05244755244755245,0.023502270498812147 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5923454279012129,0.9491257378249622,1.00065410372272,1.4527699655062962,44.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6201400300158697,1.0,1.0,1.0057335151210618,45.0,0,0,0,26,407.8,86.4,95.3,0.017045454545454544,0.022663376691509682 diff --git a/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf b/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..fbaa6eee7337be014b3a03eb5a7fbb69000bda9a Binary files /dev/null and b/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4f6908fba6c41ba3576608b66bc23a0647bbfbc2 --- /dev/null +++ b/data/tabpfn-tabicl-imputed-cls/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e190a64928288c6f71df2881dc9fadd779b153432c7535ca3ddd8d9d6b7fa4dd +size 139645 diff --git a/data/tabpfn-tabicl-imputed/figures/critical-diagram.pdf b/data/tabpfn-tabicl-imputed/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..ba1d1bdd9236c4ba3a782b6e642fa34df19a80d4 Binary files /dev/null and b/data/tabpfn-tabicl-imputed/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-tabicl-imputed/figures/critical-diagram.png.zip b/data/tabpfn-tabicl-imputed/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24e08506593fee7a8727aefce7940752470d6e76 --- /dev/null +++ b/data/tabpfn-tabicl-imputed/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fd50f8530f0264c02045700850c01bf4b212bcd6257fa3321f09e02e249a62 +size 319568 diff --git a/data/tabpfn-tabicl-imputed/leaderboard.tex b/data/tabpfn-tabicl-imputed/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..b41ff65b5428b5f7a450717fe825914a05f0b501 --- /dev/null +++ b/data/tabpfn-tabicl-imputed/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1736${}_{-45,+46}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{4.6\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1612${}_{-37,+35}$}} & 0.483 & \textcolor{silver}{\textbf{9.3}} & 4.5 & 2 & 9.2\% & 2828.45 & 1.60 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1608${}_{-34,+36}$}} & \textcolor{bronze}{\textbf{0.543}} & \textcolor{bronze}{\textbf{9.5}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{4}} & \textcolor{silver}{\textbf{7.1\%}} & 8.89 & 1.74 \\ +TabPFNv2 (T) & 1588${}_{-42,+32}$ & \textcolor{silver}{\textbf{0.544}} & 10.3 & 3.7 & 1 & \textcolor{bronze}{\textbf{8.2\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1573${}_{-37,+29}$ & 0.402 & 10.9 & 7.1 & 0 & 10.4\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1544${}_{-44,+34}$ & 0.440 & 11.9 & 5.6 & 1 & 9.5\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1542${}_{-38,+38}$ & 0.499 & 12.0 & \textcolor{bronze}{\textbf{3.4}} & \textcolor{silver}{\textbf{4}} & 9.5\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1521${}_{-35,+40}$ & 0.319 & 12.9 & 7.1 & 1 & 11.8\% & 647.56 & 1.72 \\ +TabM (T) & 1512${}_{-40,+30}$ & 0.387 & 13.3 & 7.5 & 0 & 10.4\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1462${}_{-34,+33}$ & 0.262 & 15.3 & 13.3 & 0 & 12.2\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1456${}_{-32,+38}$ & 0.271 & 15.8 & 11.6 & 0 & 11.7\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1435${}_{-32,+36}$ & 0.247 & 16.5 & 10.5 & 0 & 12.0\% & 1465.86 & 0.09 \\ +TabM (D) & 1429${}_{-32,+35}$ & 0.288 & 17.0 & 11.2 & 0 & 13.7\% & 10.42 & 0.15 \\ +ModernNCA (T) & 1426${}_{-30,+37}$ & 0.255 & 17.1 & 8.7 & 1 & 11.7\% & 5944.88 & 0.52 \\ +CatBoost (D) & 1425${}_{-35,+32}$ & 0.228 & 17.0 & 10.0 & 0 & 13.3\% & 5.72 & 0.11 \\ +XGBoost (T+E) & 1415${}_{-33,+36}$ & 0.222 & 17.5 & 14.0 & 0 & 13.1\% & 766.06 & 1.92 \\ +EBM (T+E) & 1383${}_{-33,+31}$ & 0.186 & 19.1 & 12.5 & 0 & 15.7\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1379${}_{-31,+39}$ & 0.181 & 19.2 & 16.8 & 0 & 13.4\% & 766.06 & 0.28 \\ +RealMLP (T) & 1379${}_{-42,+36}$ & 0.201 & 19.3 & 14.8 & 0 & 13.4\% & 5786.69 & 0.25 \\ +ModernNCA (T+E) & 1375${}_{-38,+35}$ & 0.307 & 19.5 & 8.6 & 0 & 12.7\% & 5944.88 & 8.40 \\ +ModernNCA (D) & 1367${}_{-33,+31}$ & 0.183 & 19.8 & 10.0 & 1 & 15.1\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-32,+33}$ & 0.199 & 20.3 & 15.0 & 0 & 13.3\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1356${}_{-35,+32}$ & 0.230 & 20.2 & 11.2 & 0 & 15.7\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1323${}_{-33,+24}$ & 0.258 & 22.0 & 6.1 & 2 & 14.8\% & 27.49 & 8.86 \\ +EBM (T) & 1320${}_{-41,+33}$ & 0.120 & 22.1 & 17.3 & 0 & 16.5\% & 1109.06 & 0.03 \\ +EBM (D) & 1299${}_{-35,+37}$ & 0.153 & 23.0 & 9.7 & 1 & 17.3\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1269${}_{-40,+38}$ & 0.113 & 24.5 & 19.1 & 0 & 17.2\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-47,+27}$ & 0.102 & 25.0 & 17.4 & 0 & 18.2\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1238${}_{-33,+33}$ & 0.086 & 25.8 & 22.0 & 0 & 15.9\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1234${}_{-36,+37}$ & 0.083 & 26.1 & 21.8 & 0 & 15.9\% & 36.24 & 0.28 \\ +XGBoost (D) & 1213${}_{-32,+41}$ & 0.032 & 27.1 & 24.8 & 0 & 16.7\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1200${}_{-32,+35}$ & 0.072 & 27.5 & 22.6 & 0 & 19.4\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1157${}_{-30,+36}$ & 0.057 & 29.5 & 22.0 & 0 & 19.5\% & 527.42 & 1.39 \\ +LightGBM (D) & 1154${}_{-29,+39}$ & 0.030 & 29.6 & 27.7 & 0 & 17.7\% & 2.90 & 0.13 \\ +RandomForest (T) & 1096${}_{-31,+36}$ & 0.023 & 31.9 & 29.1 & 0 & 20.8\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1082${}_{-34,+36}$ & 0.030 & 32.3 & 28.8 & 0 & 21.4\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1051${}_{-33,+31}$ & 0.019 & 33.6 & 31.0 & 0 & 23.0\% & 4.73 & 0.62 \\ +Linear (T+E) & 1030${}_{-41,+39}$ & 0.055 & 34.2 & 22.9 & 0 & 28.3\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 35.3 & 33.4 & 0 & 25.8\% & 0.45 & 0.07 \\ +Linear (T) & 997${}_{-41,+37}$ & 0.035 & 35.2 & 28.4 & 0 & 29.0\% & 88.63 & 0.09 \\ +Linear (D) & 982${}_{-39,+42}$ & 0.025 & 35.7 & 25.0 & 0 & 30.5\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 956${}_{-51,+44}$ & 0.015 & 36.7 & 32.9 & 0 & 27.5\% & 0.40 & 0.07 \\ +KNN (T+E) & 704${}_{-46,+50}$ & 0.000 & 41.6 & 41.2 & 0 & 48.0\% & 3.71 & 0.17 \\ +KNN (T) & 609${}_{-48,+53}$ & 0.000 & 42.7 & 42.5 & 0 & 50.1\% & 3.71 & 0.04 \\ +KNN (D) & 408${}_{-96,+87}$ & 0.000 & 44.2 & 44.1 & 0 & 59.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-tabicl-imputed/tabarena_leaderboard.csv b/data/tabpfn-tabicl-imputed/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9cfacb0044514fd9fcb6b0fd592ac532d061c310 --- /dev/null +++ b/data/tabpfn-tabicl-imputed/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.36592302109392316,0.046199591967015556,0.028953147052208875,72438.91792383758,5032.936436246271,5.788461538461538,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4068697696081146,0.021640889295981347,0.016874625134937742,43032.16406688419,2931.913108083719,3.5,8,4,1,13,1736.3,45.7,44.6,0.8911713286713286,0.46269305993659526 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.547775172494987,0.09238776741660952,0.04985850997225548,43123.835967101855,114.22499982092596,9.307692307692308,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5540981099375837,0.04617144277378826,0.034316541138176816,44088.87353980956,109.46959390379024,8.0,2,1,1,22,1612.5,34.1,36.5,0.8111888111888111,0.22027167779430223 +TABICL (default),21.933824618351764,2.7954679915028757,10.278805574502684,2.035731958506663,0.45732829959408833,0.5181742657898207,0.07108757868512468,0.03468928052253071,145.15967621212897,119.47187491598106,9.48076923076923,0.17207,19.54939921696981,2.1857474181387158,8.891901835466445,1.7433667301105085,0.46959481282562443,0.583210155619012,0.03487584587193143,0.02062556592021359,120.41628593220491,98.62751622705991,8.0,4,3,1,18,1607.5,35.1,33.8,0.8072552447552448,0.3008875223205248 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.48513499648162556,0.08179742986099706,0.05007438478688923,72438.91792383758,165.31166707386078,10.326923076923077,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.46901679172287375,0.05804204088839793,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,8,1,16,1588.5,31.1,41.1,0.7880244755244755,0.26951399319966235 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5877560111308218,0.10395608954668528,0.05250154829922825,112219.73072981052,290.95193637081104,10.865384615384615,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5635752491354304,0.051850568481074444,0.04632297855309826,87871.6182808383,267.1363813998378,9.0,0,0,3,23,1572.9,29.0,36.1,0.7757867132867133,0.1417961559305308 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5522297755522069,0.09461405000435809,0.05278876561410328,39712.28933278582,151.42916243848958,11.884615384615385,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.565709489217623,0.04762778419021996,0.04116173129591805,35357.34871646478,128.98489015193604,8.5,1,2,0,23,1544.1,33.4,44.0,0.7526223776223776,0.17858871441651328 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5669313754038009,0.09512311589648129,0.06286359741165812,76.29505964660768,41.758850754001486,12.038461538461538,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5238664820190138,0.05018180969629027,0.021765272352146064,58.62232186481185,25.39697585954705,5.0,4,1,4,17,1541.6,37.1,37.8,0.7491258741258742,0.2935579164181083 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6837279086393804,0.1176313686476465,0.061906173521871284,10455.248725429481,156.11534656534064,12.903846153846153,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.7073670918352166,0.051567085599247775,0.05140473915859429,9480.825611975059,103.35126359750211,12.5,1,0,2,23,1521.0,39.2,34.8,0.729458041958042,0.14042554762228465 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.6024458446591694,0.10415143904294016,0.06347344157931543,43123.835967101855,11.305450805467302,13.346153846153847,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.637432321817063,0.06661219922938116,0.04216185168235247,44088.87353980956,9.589800419015791,13.0,0,2,1,23,1511.6,29.3,40.0,0.7194055944055944,0.13250415685370437 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.7058815019854675,0.12190922144153328,0.06697491769463343,10455.248725429481,28.289876597893098,15.326923076923077,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7126095533835781,0.05516713547384128,0.05173099008418312,9480.825611975059,16.480248719877835,15.5,0,0,0,26,1461.6,33.0,33.6,0.6743881118881119,0.07518973620464424 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.7126569304674935,0.11678549305379037,0.07069284985664498,27448.91121504087,44.04781428274726,15.76923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.720606902542752,0.0655088388746699,0.053959365141496254,21194.263639925673,39.04121549244424,16.0,0,0,1,25,1455.5,38.0,31.7,0.6643356643356644,0.08640388177736971 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7320002632947487,0.11966485384248739,0.07092155545579401,27448.91121504087,5.822055889865127,16.53846153846154,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7742727557913103,0.06254942442191513,0.056743152931981594,21194.263639925673,5.623705460666335,16.5,0,1,0,25,1435.3,35.7,31.2,0.6468531468531469,0.09500537263224745 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7180270297124032,0.1369795645146118,0.07804754343441613,189.57549613749592,9.978425427673205,16.96153846153846,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.7370120765139465,0.07020888606704828,0.04168408531342971,140.5904555433362,9.536419533323254,13.5,0,0,0,26,1429.3,34.7,31.2,0.6372377622377622,0.08930659071409167 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.773058737636122,0.1329171714368633,0.07483897220415464,477.2608484422213,7.423108461919998,17.0,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8295699236647479,0.07003170550807408,0.03759488482131658,107.22813859237792,5.910285969899903,17.5,0,2,0,24,1425.0,31.1,34.5,0.6363636363636364,0.10014897524065079 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6637614958528275,0.11651895782038843,0.07336435140526076,77920.67944321278,26.83390437537825,17.057692307692307,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7235503806350614,0.06755654915093501,0.057542536642744974,66956.02864547497,22.936826653686147,16.5,1,0,0,25,1425.9,36.9,29.1,0.6350524475524476,0.11459331361587798 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7582815836411924,0.13102530851919714,0.08005807343057435,11800.394028929062,144.45205787298235,17.51923076923077,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.7791345165987353,0.07343601410090844,0.05291664219529325,11520.40078776449,81.51508510452376,15.25,0,0,0,26,1415.4,35.8,32.1,0.6245629370629371,0.07166187524738382 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8130615556926872,0.1572702096615729,0.11477484338056507,16692.42562947248,18.328617946180266,19.076923076923077,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8266966373060847,0.0849735392053248,0.05122044741314488,17695.477225433653,12.287120931350431,18.5,0,0,1,25,1383.1,30.7,32.2,0.5891608391608392,0.08004680984485021 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7809467391114648,0.1335476211929174,0.08261887953032405,11800.394028929062,30.013383573853858,19.192307692307693,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7799972199454441,0.07798856953005051,0.052696435285602104,11520.40078776449,11.544680214373663,19.5,0,0,0,26,1379.3,39.0,30.4,0.5865384615384616,0.05963545192718714 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7337946286494308,0.13385871828365248,0.07632560949832082,112219.73072981052,14.318352639712598,19.346153846153847,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.74474927717263,0.07816480714694801,0.061570080954222194,87871.6182808383,11.529830439504863,17.0,0,0,0,26,1379.0,35.5,42.0,0.583041958041958,0.06741843156688952 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6676223600444691,0.12705648224037935,0.09495265900961732,77920.67944321278,542.9013751751211,19.46153846153846,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6992013611051189,0.10101560653736502,0.048750216328898435,66956.02864547497,426.41545163369784,15.0,0,1,3,22,1375.2,34.1,37.3,0.5804195804195804,0.11686086342849132 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7625155608140191,0.15085268931164045,0.08288629881402086,190.09277232716437,21.03504265137771,19.76923076923077,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7827163737065492,0.07974631950048428,0.053259111831894385,190.7747999331715,15.595042220247258,21.0,1,0,0,25,1366.6,30.2,32.3,0.5734265734265734,0.10017785315060294 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7686639558056911,0.1568263488914938,0.08996191783618754,20955.495033608,514.94962880429,20.25,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8177131221302221,0.08132829350115817,0.05523334112431259,17602.335588839425,531.7232414230566,20.0,0,1,0,25,1356.1,31.7,35.0,0.5625,0.08900824287112627 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.764065484436084,0.1332555944462707,0.07705265128451745,56068.072795326945,181.3894555134842,20.28846153846154,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.8634802144576847,0.06782483230561193,0.06047718336789526,51500.257380537965,155.60936197975602,20.0,0,0,0,26,1356.4,32.6,31.9,0.5616258741258742,0.06668189048939435 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7373052354542396,0.1481225969644462,0.09258934520941622,529.9323761499634,1297.090837487081,22.0,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.88150849404269,0.06691814537901497,0.04715293243733788,513.7039796057306,1113.0844664803024,25.5,2,0,3,21,1323.2,24.0,32.4,0.5227272727272727,0.16307441111325852 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.848789281667686,0.16506052478039396,0.12247708467085656,16692.42562947248,2.268478500812272,22.134615384615383,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8902021091214286,0.0928869358898275,0.062194782256558334,17695.477225433653,1.252412448907763,24.0,0,0,0,26,1319.5,32.8,40.9,0.5196678321678322,0.0577362774386474 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8496093431986085,0.1725067485868843,0.124266558571261,75.26102802648397,3.652629917649085,23.0,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8953860150359447,0.09554880640873625,0.056563353282588213,77.97362526017017,3.340101142403973,23.0,1,0,2,23,1298.8,36.2,34.1,0.5,0.10350085258304385 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8336048750524725,0.1719892484242808,0.10544003385780601,20955.495033608,39.61972184578173,24.48076923076923,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8551942745167906,0.09660910797742411,0.07038888057026207,17602.335588839425,37.14448504861147,23.0,0,0,0,26,1269.2,37.8,39.5,0.46634615384615385,0.0522325290233973 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8650357556797086,0.18213060995030939,0.12358064556047872,6037.128774148354,86.31947081771706,25.0,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.927972542444979,0.09673248729372463,0.07594781768304604,5339.627074654447,77.0823275943278,28.25,0,0,0,26,1260.2,26.7,47.0,0.45454545454545453,0.057367362828272636 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.850631259028558,0.15913478602343065,0.0993949625945807,56068.072795326945,10.018943215736572,25.78846153846154,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9108681738244536,0.10379368194908523,0.09268904724445692,51500.257380537965,8.681540922799902,27.0,0,0,0,26,1238.5,32.5,32.6,0.4366258741258741,0.045366926667074474 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.8569208298595739,0.15925534618050793,0.09850483276040865,704.4596577225865,14.367018773597605,26.057692307692307,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.9211958531631512,0.11529538667598471,0.06746878542921386,536.9122103749642,12.116250573587863,28.5,0,0,0,26,1233.8,37.0,35.4,0.430506993006993,0.04593533182737854 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.9070085519838873,0.16741259421914897,0.12284060876198313,37.42364826794432,14.932177592236862,27.057692307692307,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.9518900673045412,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.75,0,0,0,26,1212.8,40.2,31.9,0.40777972027972026,0.04033171361980777 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.8944270214472317,0.19352158162401475,0.1332600566819766,6037.128774148354,9.800458404720944,27.5,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.9688071317540197,0.10725621698653792,0.08413965072039432,5339.627074654447,8.581565117405717,31.5,0,0,0,26,1200.5,34.6,31.8,0.3977272727272727,0.04426880847584831 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9114679294811889,0.19492401068058715,0.147796519544612,6794.043055663636,81.1610891109226,29.46153846153846,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9937951111561848,0.11848332500543385,0.09151654391313321,6269.6136687159,75.55960345229795,31.5,0,0,0,26,1156.9,35.3,29.3,0.3531468531468531,0.04551104983248137 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.9226046869734794,0.17658302669842518,0.11946139446934875,42.11806124941037,9.792256635961557,29.576923076923077,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9543370010011993,0.11805347934940252,0.09964492878261602,38.718264562373335,7.411495765404641,29.5,0,0,0,26,1154.1,38.8,28.9,0.3505244755244755,0.036096329534928126 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9400975627550481,0.20796627140051763,0.15738142731874985,6794.043055663636,8.85724716182767,31.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9949804676438307,0.13224551901999893,0.10601779560500876,6269.6136687159,8.226247917103098,33.0,0,0,0,26,1096.4,35.3,30.8,0.2972027972027972,0.03435712054173671 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9391775521500288,0.2143711666740534,0.1478455999492002,188.3149665224302,10.435316060204023,32.32692307692308,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9983912483912485,0.14318031310217677,0.1051705699943664,197.09431521312473,8.356657050571908,33.0,0,0,0,26,1082.4,35.8,33.5,0.2880244755244755,0.03477287158574056 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9403855906437737,0.2303447532418144,0.17838953792880163,79.38940070919118,32.314469844151965,33.61538461538461,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9998853211009175,0.16088565720086695,0.11423976777713259,63.05950085745769,32.34956133934325,36.5,0,0,0,26,1051.3,30.1,32.4,0.25874125874125875,0.03221855964948352 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9377863465231412,0.28317417183852894,0.24822823084079834,1448.7019616321465,20.41052886468638,34.23076923076923,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.21769005088471027,0.1779543124949825,1246.7830478036524,13.25530093456624,38.0,0,0,1,25,1030.4,38.5,40.3,0.24475524475524477,0.04368314033358164 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.943877751101422,0.2902155463327537,0.25773756332102926,1448.7019616321465,6.009867470278231,35.21153846153846,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.22518752798442315,0.1834788883245299,1246.7830478036524,4.016028876859062,38.0,0,0,0,26,996.9,36.4,40.6,0.22246503496503497,0.03516566041445386 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9701912114787967,0.25755513106395067,0.2479832267773854,6.6754175478812146,4.153857105918795,35.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.502947097147378,37.5,0,0,0,26,1000.0,0.0,0.0,0.22071678321678323,0.029920050824686344 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9520629989740693,0.305476365274656,0.29332097764816845,36.73103788870433,7.308777223189359,35.67307692307692,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22930816077470617,0.22370793213879017,31.50980214431158,4.703818974402271,39.5,0,0,1,25,982.3,41.6,38.5,0.21197552447552448,0.03995938942367327 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9642266228854909,0.2753142737967973,0.27671727525790224,6.0304600748852675,4.406716139948246,36.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18424442460299273,0.1562338348801046,5.148113375558061,3.7424721126192138,39.0,0,0,0,26,955.5,43.9,50.8,0.1896853146853147,0.030378669430214802 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9945521006475282,0.47993514547383553,0.5914832997832359,57.836601086323085,11.285407568076662,41.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4552264030449861,0.6183873456591064,55.06974575171796,8.266636963631449,43.0,0,0,0,26,703.5,49.4,45.5,0.0777972027972028,0.024279443329967162 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966557577121117,0.5010874288021712,0.6455640735444151,57.836601086323085,2.5027008595459077,42.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,44.0,0,0,0,26,609.2,52.7,48.0,0.05244755244755245,0.023502270498812147 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5923454279012129,0.9491257378249622,1.00065410372272,1.4527699655062962,44.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6201400300158697,1.0,1.0,1.0057335151210618,45.0,0,0,0,26,407.8,86.4,95.3,0.017045454545454544,0.022663376691509682 diff --git a/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.pdf b/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..13a8d3fa7c4257472e7dc2359f387fd5e33d9ab2 Binary files /dev/null and b/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4f6908fba6c41ba3576608b66bc23a0647bbfbc2 --- /dev/null +++ b/data/tabpfn-tabicl-imputed/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e190a64928288c6f71df2881dc9fadd779b153432c7535ca3ddd8d9d6b7fa4dd +size 139645 diff --git a/data/tabpfn-tabicl/figures/critical-diagram.pdf b/data/tabpfn-tabicl/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..cf84ff5e66db01f6a8ffae5075a7b2c6a22ead09 Binary files /dev/null and b/data/tabpfn-tabicl/figures/critical-diagram.pdf differ diff --git a/data/tabpfn-tabicl/figures/critical-diagram.png.zip b/data/tabpfn-tabicl/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..24e08506593fee7a8727aefce7940752470d6e76 --- /dev/null +++ b/data/tabpfn-tabicl/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:09fd50f8530f0264c02045700850c01bf4b212bcd6257fa3321f09e02e249a62 +size 319568 diff --git a/data/tabpfn-tabicl/leaderboard.tex b/data/tabpfn-tabicl/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..b41ff65b5428b5f7a450717fe825914a05f0b501 --- /dev/null +++ b/data/tabpfn-tabicl/leaderboard.tex @@ -0,0 +1,53 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1736${}_{-45,+46}$}} & \textcolor{gold}{\textbf{0.700}} & \textcolor{gold}{\textbf{5.8}} & \textcolor{gold}{\textbf{2.2}} & \textcolor{gold}{\textbf{8}} & \textcolor{gold}{\textbf{4.6\%}} & 3445.60 & 48.24 \\ +TabM (T+E) & \textcolor{silver}{\textbf{1612${}_{-37,+35}$}} & 0.483 & \textcolor{silver}{\textbf{9.3}} & 4.5 & 2 & 9.2\% & 2828.45 & 1.60 \\ +TabICL (D) & \textcolor{bronze}{\textbf{1608${}_{-34,+36}$}} & \textcolor{bronze}{\textbf{0.543}} & \textcolor{bronze}{\textbf{9.5}} & \textcolor{silver}{\textbf{3.3}} & \textcolor{silver}{\textbf{4}} & \textcolor{silver}{\textbf{7.1\%}} & 8.89 & 1.74 \\ +TabPFNv2 (T) & 1588${}_{-42,+32}$ & \textcolor{silver}{\textbf{0.544}} & 10.3 & 3.7 & 1 & \textcolor{bronze}{\textbf{8.2\%}} & 3445.60 & 1.00 \\ +RealMLP (T+E) & 1573${}_{-37,+29}$ & 0.402 & 10.9 & 7.1 & 0 & 10.4\% & 5786.69 & 4.27 \\ +AutoGluon 1.3 (4h) & 1544${}_{-44,+34}$ & 0.440 & 11.9 & 5.6 & 1 & 9.5\% & 2309.21 & 2.55 \\ +TabPFNv2 (D) & 1542${}_{-38,+38}$ & 0.499 & 12.0 & \textcolor{bronze}{\textbf{3.4}} & \textcolor{silver}{\textbf{4}} & 9.5\% & 4.06 & 0.44 \\ +LightGBM (T+E) & 1521${}_{-35,+40}$ & 0.319 & 12.9 & 7.1 & 1 & 11.8\% & 647.56 & 1.72 \\ +TabM (T) & 1512${}_{-40,+30}$ & 0.387 & 13.3 & 7.5 & 0 & 10.4\% & 2828.45 & 0.22 \\ +LightGBM (T) & 1462${}_{-34,+33}$ & 0.262 & 15.3 & 13.3 & 0 & 12.2\% & 647.56 & 0.28 \\ +CatBoost (T+E) & 1456${}_{-32,+38}$ & 0.271 & 15.8 & 11.6 & 0 & 11.7\% & 1465.86 & 0.69 \\ +CatBoost (T) & 1435${}_{-32,+36}$ & 0.247 & 16.5 & 10.5 & 0 & 12.0\% & 1465.86 & 0.09 \\ +TabM (D) & 1429${}_{-32,+35}$ & 0.288 & 17.0 & 11.2 & 0 & 13.7\% & 10.42 & 0.15 \\ +ModernNCA (T) & 1426${}_{-30,+37}$ & 0.255 & 17.1 & 8.7 & 1 & 11.7\% & 5944.88 & 0.52 \\ +CatBoost (D) & 1425${}_{-35,+32}$ & 0.228 & 17.0 & 10.0 & 0 & 13.3\% & 5.72 & 0.11 \\ +XGBoost (T+E) & 1415${}_{-33,+36}$ & 0.222 & 17.5 & 14.0 & 0 & 13.1\% & 766.06 & 1.92 \\ +EBM (T+E) & 1383${}_{-33,+31}$ & 0.186 & 19.1 & 12.5 & 0 & 15.7\% & 1109.06 & 0.23 \\ +XGBoost (T) & 1379${}_{-31,+39}$ & 0.181 & 19.2 & 16.8 & 0 & 13.4\% & 766.06 & 0.28 \\ +RealMLP (T) & 1379${}_{-42,+36}$ & 0.201 & 19.3 & 14.8 & 0 & 13.4\% & 5786.69 & 0.25 \\ +ModernNCA (T+E) & 1375${}_{-38,+35}$ & 0.307 & 19.5 & 8.6 & 0 & 12.7\% & 5944.88 & 8.40 \\ +ModernNCA (D) & 1367${}_{-33,+31}$ & 0.183 & 19.8 & 10.0 & 1 & 15.1\% & 14.80 & 0.34 \\ +TorchMLP (T+E) & 1356${}_{-32,+33}$ & 0.199 & 20.3 & 15.0 & 0 & 13.3\% & 2862.05 & 2.16 \\ +FastaiMLP (T+E) & 1356${}_{-35,+32}$ & 0.230 & 20.2 & 11.2 & 0 & 15.7\% & 1358.63 & 8.07 \\ +TabDPT (D) & 1323${}_{-33,+24}$ & 0.258 & 22.0 & 6.1 & 2 & 14.8\% & 27.49 & 8.86 \\ +EBM (T) & 1320${}_{-41,+33}$ & 0.120 & 22.1 & 17.3 & 0 & 16.5\% & 1109.06 & 0.03 \\ +EBM (D) & 1299${}_{-35,+37}$ & 0.153 & 23.0 & 9.7 & 1 & 17.3\% & 5.28 & 0.08 \\ +FastaiMLP (T) & 1269${}_{-40,+38}$ & 0.113 & 24.5 & 19.1 & 0 & 17.2\% & 1358.63 & 0.90 \\ +ExtraTrees (T+E) & 1260${}_{-47,+27}$ & 0.102 & 25.0 & 17.4 & 0 & 18.2\% & 370.85 & 1.47 \\ +TorchMLP (T) & 1238${}_{-33,+33}$ & 0.086 & 25.8 & 22.0 & 0 & 15.9\% & 2862.05 & 0.15 \\ +RealMLP (D) & 1234${}_{-36,+37}$ & 0.083 & 26.1 & 21.8 & 0 & 15.9\% & 36.24 & 0.28 \\ +XGBoost (D) & 1213${}_{-32,+41}$ & 0.032 & 27.1 & 24.8 & 0 & 16.7\% & 2.40 & 0.22 \\ +ExtraTrees (T) & 1200${}_{-32,+35}$ & 0.072 & 27.5 & 22.6 & 0 & 19.4\% & 370.85 & 0.16 \\ +RandomForest (T+E) & 1157${}_{-30,+36}$ & 0.057 & 29.5 & 22.0 & 0 & 19.5\% & 527.42 & 1.39 \\ +LightGBM (D) & 1154${}_{-29,+39}$ & 0.030 & 29.6 & 27.7 & 0 & 17.7\% & 2.90 & 0.13 \\ +RandomForest (T) & 1096${}_{-31,+36}$ & 0.023 & 31.9 & 29.1 & 0 & 20.8\% & 527.42 & 0.12 \\ +TorchMLP (D) & 1082${}_{-34,+36}$ & 0.030 & 32.3 & 28.8 & 0 & 21.4\% & 10.38 & 0.19 \\ +FastaiMLP (D) & 1051${}_{-33,+31}$ & 0.019 & 33.6 & 31.0 & 0 & 23.0\% & 4.73 & 0.62 \\ +Linear (T+E) & 1030${}_{-41,+39}$ & 0.055 & 34.2 & 22.9 & 0 & 28.3\% & 88.63 & 0.26 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.005 & 35.3 & 33.4 & 0 & 25.8\% & 0.45 & 0.07 \\ +Linear (T) & 997${}_{-41,+37}$ & 0.035 & 35.2 & 28.4 & 0 & 29.0\% & 88.63 & 0.09 \\ +Linear (D) & 982${}_{-39,+42}$ & 0.025 & 35.7 & 25.0 & 0 & 30.5\% & 2.27 & 0.11 \\ +ExtraTrees (D) & 956${}_{-51,+44}$ & 0.015 & 36.7 & 32.9 & 0 & 27.5\% & 0.40 & 0.07 \\ +KNN (T+E) & 704${}_{-46,+50}$ & 0.000 & 41.6 & 41.2 & 0 & 48.0\% & 3.71 & 0.17 \\ +KNN (T) & 609${}_{-48,+53}$ & 0.000 & 42.7 & 42.5 & 0 & 50.1\% & 3.71 & 0.04 \\ +KNN (D) & 408${}_{-96,+87}$ & 0.000 & 44.2 & 44.1 & 0 & 59.2\% & 0.07 & 0.02 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn-tabicl/tabarena_leaderboard.csv b/data/tabpfn-tabicl/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..9cfacb0044514fd9fcb6b0fd592ac532d061c310 --- /dev/null +++ b/data/tabpfn-tabicl/tabarena_leaderboard.csv @@ -0,0 +1,46 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14614.639715819583,148.11643430860633,3877.419600731425,65.38306906011915,0.2999799624009034,0.36592302109392316,0.046199591967015556,0.028953147052208875,72438.91792383758,5032.936436246271,5.788461538461538,0.17261500000000002,8173.816975453165,60.12680508957969,3445.602606086448,48.23597351862546,0.21441671773452015,0.4068697696081146,0.021640889295981347,0.016874625134937742,43032.16406688419,2931.913108083719,3.5,8,4,1,13,1736.3,45.7,44.6,0.8911713286713286,0.46269305993659526 +TABM (tuned + ensemble),7080.034926002046,2.290320276704609,3436.5218404284533,2.1079046445931953,0.5166525238887076,0.547775172494987,0.09238776741660952,0.04985850997225548,43123.835967101855,114.22499982092596,9.307692307692308,0.17379499999999998,6405.6722231189415,1.880999751885732,2828.4486752645407,1.5957955528425738,0.47136645123500964,0.5540981099375837,0.04617144277378826,0.034316541138176816,44088.87353980956,109.46959390379024,8.0,2,1,1,22,1612.5,34.1,36.5,0.8111888111888111,0.22027167779430223 +TABICL (default),21.933824618351764,2.7954679915028757,10.278805574502684,2.035731958506663,0.45732829959408833,0.5181742657898207,0.07108757868512468,0.03468928052253071,145.15967621212897,119.47187491598106,9.48076923076923,0.17207,19.54939921696981,2.1857474181387158,8.891901835466445,1.7433667301105085,0.46959481282562443,0.583210155619012,0.03487584587193143,0.02062556592021359,120.41628593220491,98.62751622705991,8.0,4,3,1,18,1607.5,35.1,33.8,0.8072552447552448,0.3008875223205248 +TABPFNV2 (tuned),14614.639715819583,4.849548537201351,3877.419600731425,2.254088269272259,0.4562783530638654,0.48513499648162556,0.08179742986099706,0.05007438478688923,72438.91792383758,165.31166707386078,10.326923076923077,0.1868,8173.816975453165,0.8871031337314181,3445.602606086448,0.9952991538273057,0.34109768962419973,0.46901679172287375,0.05804204088839793,0.02327176321840322,43032.16406688419,48.819771718398094,6.0,1,8,1,16,1588.5,31.1,41.1,0.7880244755244755,0.26951399319966235 +REALMLP (tuned + ensemble),17809.543566583365,5.343982117807764,5995.031437071161,5.920653730744878,0.5983187605703805,0.5877560111308218,0.10395608954668528,0.05250154829922825,112219.73072981052,290.95193637081104,10.865384615384615,0.17424,10344.939689996507,4.694504770967695,5786.691668992368,4.267078452745884,0.5696681015084324,0.5635752491354304,0.051850568481074444,0.04632297855309826,87871.6182808383,267.1363813998378,9.0,0,0,3,23,1572.9,29.0,36.1,0.7757867132867133,0.1417961559305308 +AutoGluon 1.3 (4h),5931.009838958377,3.1656774007357082,3673.8246676849526,3.0250106370782723,0.5599854226187498,0.5522297755522069,0.09461405000435809,0.05278876561410328,39712.28933278582,151.42916243848958,11.884615384615385,0.17691,5020.2655313505065,2.664059625731574,2309.213100138395,2.5543334551748154,0.5600808158121042,0.565709489217623,0.04762778419021996,0.04116173129591805,35357.34871646478,128.98489015193604,8.5,1,2,0,23,1544.1,33.4,44.0,0.7526223776223776,0.17858871441651328 +TABPFNV2 (default),11.884418644151115,1.1739113624279316,5.514520630749067,0.6490305586450037,0.5013642769155608,0.5669313754038009,0.09512311589648129,0.06286359741165812,76.29505964660768,41.758850754001486,12.038461538461538,0.1886,8.093020015292698,0.44081105126274955,4.062007578219401,0.4356807523460313,0.3771347187714357,0.5238664820190138,0.05018180969629027,0.021765272352146064,58.62232186481185,25.39697585954705,5.0,4,1,4,17,1541.6,37.1,37.8,0.7491258741258742,0.2935579164181083 +GBM (tuned + ensemble),1379.7589400635827,3.041326452829899,860.8328230742596,2.952036385308545,0.6806482110513491,0.6837279086393804,0.1176313686476465,0.061906173521871284,10455.248725429481,156.11534656534064,12.903846153846153,0.174085,1321.8940714677174,2.4085231754514904,647.5566852470606,1.7204157994123173,0.7005535661954161,0.7073670918352166,0.051567085599247775,0.05140473915859429,9480.825611975059,103.35126359750211,12.5,1,0,2,23,1521.0,39.2,34.8,0.729458041958042,0.14042554762228465 +TABM (tuned),7080.034926002046,0.2169825582422762,3436.5218404284533,0.22059896519012676,0.6127855914191946,0.6024458446591694,0.10415143904294016,0.06347344157931543,43123.835967101855,11.305450805467302,13.346153846153847,0.17596499999999998,6405.6722231189415,0.19745506313112046,2828.4486752645407,0.21564835018398254,0.5915500043256848,0.637432321817063,0.06661219922938116,0.04216185168235247,44088.87353980956,9.589800419015791,13.0,0,2,1,23,1511.6,29.3,40.0,0.7194055944055944,0.13250415685370437 +GBM (tuned),1379.7589400635827,0.5707490142594036,860.8328230742596,0.6449583363337497,0.737683616453633,0.7058815019854675,0.12190922144153328,0.06697491769463343,10455.248725429481,28.289876597893098,15.326923076923077,0.17371999999999999,1321.8940714677174,0.34941615396075776,647.5566852470606,0.28350492153737616,0.7826548575969626,0.7126095533835781,0.05516713547384128,0.05173099008418312,9480.825611975059,16.480248719877835,15.5,0,0,0,26,1461.6,33.0,33.6,0.6743881118881119,0.07518973620464424 +CAT (tuned + ensemble),4698.536897969144,0.9597997341400538,2797.035390366479,0.7965719365801648,0.728920075896456,0.7126569304674935,0.11678549305379037,0.07069284985664498,27448.91121504087,44.04781428274726,15.76923076923077,0.1771,3111.985966463884,0.7725222905476887,1465.8584785724734,0.6919304562740058,0.7992439551459185,0.720606902542752,0.0655088388746699,0.053959365141496254,21194.263639925673,39.04121549244424,16.0,0,0,1,25,1455.5,38.0,31.7,0.6643356643356644,0.08640388177736971 +CAT (tuned),4698.536897969144,0.11917537287769155,2797.035390366479,0.12051762325590008,0.7533425274831995,0.7320002632947487,0.11966485384248739,0.07092155545579401,27448.91121504087,5.822055889865127,16.53846153846154,0.177215,3111.985966463884,0.10264339711931017,1465.8584785724734,0.09138547661664378,0.8504867849422679,0.7742727557913103,0.06254942442191513,0.056743152931981594,21194.263639925673,5.623705460666335,16.5,0,1,0,25,1435.3,35.7,31.2,0.6468531468531469,0.09500537263224745 +TABM (default),25.031941110761757,0.20188171669968172,14.494913436183246,0.20961960333163143,0.7120650416184634,0.7180270297124032,0.1369795645146118,0.07804754343441613,189.57549613749592,9.978425427673205,16.96153846153846,0.175255,19.491187883747948,0.16552388005786473,10.418743379746541,0.15495242186411928,0.7417274447397321,0.7370120765139465,0.07020888606704828,0.04168408531342971,140.5904555433362,9.536419533323254,13.5,0,0,0,26,1429.3,34.7,31.2,0.6372377622377622,0.08930659071409167 +CAT (default),184.70535022315818,0.14151572040003588,154.6142383995702,0.1555426094856078,0.7721520925382083,0.773058737636122,0.1329171714368633,0.07483897220415464,477.2608484422213,7.423108461919998,17.0,0.176625,11.6375067697631,0.16093741522894967,5.723546572951673,0.1101036800878527,0.8708469956136012,0.8295699236647479,0.07003170550807408,0.03759488482131658,107.22813859237792,5.910285969899903,17.5,0,2,0,24,1425.0,31.1,34.5,0.6363636363636364,0.10014897524065079 +MNCA (tuned),10872.434398398032,0.5841771599573967,5614.838977261846,0.5087025474163123,0.7448490525012423,0.6637614958528275,0.11651895782038843,0.07336435140526076,77920.67944321278,26.83390437537825,17.057692307692307,0.17598,10294.846671289868,0.4166409836875068,5944.878874246984,0.5156200362715774,0.8177816514691232,0.7235503806350614,0.06755654915093501,0.057542536642744974,66956.02864547497,22.936826653686147,16.5,1,0,0,25,1425.9,36.9,29.1,0.6350524475524476,0.11459331361587798 +XGB (tuned + ensemble),1872.6912781718452,2.683822184852046,1147.2290814465296,3.3619425077092306,0.7784275988369416,0.7582815836411924,0.13102530851919714,0.08005807343057435,11800.394028929062,144.45205787298235,17.51923076923077,0.18004,1385.052251373397,1.6176902174949646,766.0569170086173,1.9172343251389494,0.8316341855574652,0.7791345165987353,0.07343601410090844,0.05291664219529325,11520.40078776449,81.51508510452376,15.25,0,0,0,26,1415.4,35.8,32.1,0.6245629370629371,0.07166187524738382 +EBM (tuned + ensemble),2781.1579704396745,0.37791664172441536,1628.507747090886,0.31990997744824035,0.8144475987903695,0.8130615556926872,0.1572702096615729,0.11477484338056507,16692.42562947248,18.328617946180266,19.076923076923077,0.190645,1861.6872274941868,0.27221977710723877,1109.0589152421817,0.22935467594400044,0.9368603191709515,0.8266966373060847,0.0849735392053248,0.05122044741314488,17695.477225433653,12.287120931350431,18.5,0,0,1,25,1383.1,30.7,32.2,0.5891608391608392,0.08004680984485021 +XGB (tuned),1872.6912781718452,0.6264070270407913,1147.2290814465296,0.8619192527074968,0.8187393857188691,0.7809467391114648,0.1335476211929174,0.08261887953032405,11800.394028929062,30.013383573853858,19.192307692307693,0.18088500000000002,1385.052251373397,0.25049915578630233,766.0569170086173,0.27754517145625424,0.9017943832788666,0.7799972199454441,0.07798856953005051,0.052696435285602104,11520.40078776449,11.544680214373663,19.5,0,0,0,26,1379.3,39.0,30.4,0.5865384615384616,0.05963545192718714 +REALMLP (tuned),17809.543566583365,0.2565666730587299,5995.031437071161,0.31613979844114337,0.7992732991174807,0.7337946286494308,0.13385871828365248,0.07632560949832082,112219.73072981052,14.318352639712598,19.346153846153847,0.174605,10344.939689996507,0.2056242651409573,5786.691668992368,0.2497109023068299,0.895201963654964,0.74474927717263,0.07816480714694801,0.061570080954222194,87871.6182808383,11.529830439504863,17.0,0,0,0,26,1379.0,35.5,42.0,0.583041958041958,0.06741843156688952 +MNCA (tuned + ensemble),10872.434398398032,12.131162534310267,5614.838977261846,9.225622394556064,0.6933896363411076,0.6676223600444691,0.12705648224037935,0.09495265900961732,77920.67944321278,542.9013751751211,19.46153846153846,0.193055,10294.846671289868,9.331144248114692,5944.878874246984,8.395978282094799,0.722368050936999,0.6992013611051189,0.10101560653736502,0.048750216328898435,66956.02864547497,426.41545163369784,15.0,0,1,3,22,1375.2,34.1,37.3,0.5804195804195804,0.11686086342849132 +MNCA (default),28.48231661941251,0.466951452768766,14.20837500082612,0.38564885096222307,0.8169283513779425,0.7625155608140191,0.15085268931164045,0.08288629881402086,190.09277232716437,21.03504265137771,19.76923076923077,0.18519,24.325043747160173,0.36861725648244226,14.804303881798361,0.336567721078897,0.9808195818002508,0.7827163737065492,0.07974631950048428,0.053259111831894385,190.7747999331715,15.595042220247258,21.0,1,0,0,25,1366.6,30.2,32.3,0.5734265734265734,0.10017785315060294 +FASTAI (tuned + ensemble),2658.981896154697,9.680411353478066,1616.869681108774,10.454986295420108,0.7698173487890432,0.7686639558056911,0.1568263488914938,0.08996191783618754,20955.495033608,514.94962880429,20.25,0.17986000000000002,2313.697349058257,8.905639794137743,1358.6299051921596,8.066833347447606,0.9823907390988809,0.8177131221302221,0.08132829350115817,0.05523334112431259,17602.335588839425,531.7232414230566,20.0,0,1,0,25,1356.1,31.7,35.0,0.5625,0.08900824287112627 +NN_TORCH (tuned + ensemble),8367.9645569837,3.8136142372066137,3603.347061143873,3.319891821445254,0.8005534385996745,0.764065484436084,0.1332555944462707,0.07705265128451745,56068.072795326945,181.3894555134842,20.28846153846154,0.17949500000000002,6973.094145007928,2.944306871626112,2862.0511040893566,2.157502904371376,0.9508009219506811,0.8634802144576847,0.06782483230561193,0.06047718336789526,51500.257380537965,155.60936197975602,20.0,0,0,0,26,1356.4,32.6,31.9,0.5616258741258742,0.06668189048939435 +TABDPT (default),73.6718590354308,22.101120046978323,33.70000421461804,29.140721170745643,0.7421770023997575,0.7373052354542396,0.1481225969644462,0.09258934520941622,529.9323761499634,1297.090837487081,22.0,0.204185,71.26209372944302,21.38572289016512,27.488664901286818,8.862313494979123,1.0,0.88150849404269,0.06691814537901497,0.04715293243733788,513.7039796057306,1113.0844664803024,25.5,2,0,3,21,1323.2,24.0,32.4,0.5227272727272727,0.16307441111325852 +EBM (tuned),2781.1579704396745,0.04655793772803412,1628.507747090886,0.04424123436436782,0.8796773591220505,0.848789281667686,0.16506052478039396,0.12247708467085656,16692.42562947248,2.268478500812272,22.134615384615383,0.19155,1861.6872274941868,0.0338150527742174,1109.0589152421817,0.02733291425041831,1.0,0.8902021091214286,0.0928869358898275,0.062194782256558334,17695.477225433653,1.252412448907763,24.0,0,0,0,26,1319.5,32.8,40.9,0.5196678321678322,0.0577362774386474 +EBM (default),10.549292183941246,0.06438207361433242,5.791308777159165,0.0798605102452594,0.8468070324259693,0.8496093431986085,0.1725067485868843,0.124266558571261,75.26102802648397,3.652629917649085,23.0,0.19248500000000002,8.052384217580158,0.051854162746005586,5.279257749622374,0.07769986864408396,1.0,0.8953860150359447,0.09554880640873625,0.056563353282588213,77.97362526017017,3.340101142403973,23.0,1,0,2,23,1298.8,36.2,34.1,0.5,0.10350085258304385 +FASTAI (tuned),2658.981896154697,0.7253259826929142,1616.869681108774,0.8254771149356669,0.8865326975991655,0.8336048750524725,0.1719892484242808,0.10544003385780601,20955.495033608,39.61972184578173,24.48076923076923,0.18178,2313.697349058257,0.7597291602028741,1358.6299051921596,0.8969521438633071,1.0,0.8551942745167906,0.09660910797742411,0.07038888057026207,17602.335588839425,37.14448504861147,23.0,0,0,0,26,1269.2,37.8,39.5,0.46634615384615385,0.0522325290233973 +XT (tuned + ensemble),714.8477392964893,1.554570463987497,476.1600651525857,1.7304383155841179,0.8984673764067701,0.8650357556797086,0.18213060995030939,0.12358064556047872,6037.128774148354,86.31947081771706,25.0,0.18718,684.9222148127026,1.3082488920953539,370.85408017752667,1.4664534567412004,1.0,0.927972542444979,0.09673248729372463,0.07594781768304604,5339.627074654447,77.0823275943278,28.25,0,0,0,26,1260.2,26.7,47.0,0.45454545454545453,0.057367362828272636 +NN_TORCH (tuned),8367.9645569837,0.19940724566451504,3603.347061143873,0.20335754670694137,0.9141063957909149,0.850631259028558,0.15913478602343065,0.0993949625945807,56068.072795326945,10.018943215736572,25.78846153846154,0.18092,6973.094145007928,0.144207231203715,2862.0511040893566,0.15177921475257505,1.0,0.9108681738244536,0.10379368194908523,0.09268904724445692,51500.257380537965,8.681540922799902,27.0,0,0,0,26,1238.5,32.5,32.6,0.4366258741258741,0.045366926667074474 +REALMLP (default),111.4785790004282,0.2675422726533352,36.9875135803688,0.319519629673943,0.9170988508971886,0.8569208298595739,0.15925534618050793,0.09850483276040865,704.4596577225865,14.367018773597605,26.057692307692307,0.17819000000000002,62.036723497178826,0.20994164678785537,36.23724475975503,0.27878779609060306,1.0,0.9211958531631512,0.11529538667598471,0.06746878542921386,536.9122103749642,12.116250573587863,28.5,0,0,0,26,1233.8,37.0,35.4,0.430506993006993,0.04593533182737854 +XGB (default),4.970388249658113,0.2682310309165563,3.142391352663509,0.37694382359999684,0.967663807523489,0.9070085519838873,0.16741259421914897,0.12284060876198313,37.42364826794432,14.932177592236862,27.057692307692307,0.18894,4.433991021580166,0.20984046989017063,2.395188706947506,0.2182544724645258,1.0,0.9518900673045412,0.11516738963641299,0.07681752222912633,34.313637563138,10.099890730056913,25.75,0,0,0,26,1212.8,40.2,31.9,0.40777972027972026,0.04033171361980777 +XT (tuned),714.8477392964893,0.1734743657275143,476.1600651525857,0.2154329300724878,0.9275522820640911,0.8944270214472317,0.19352158162401475,0.1332600566819766,6037.128774148354,9.800458404720944,27.5,0.19,684.9222148127026,0.15512712796529132,370.85408017752667,0.16132775528274945,1.0,0.9688071317540197,0.10725621698653792,0.08413965072039432,5339.627074654447,8.581565117405717,31.5,0,0,0,26,1200.5,34.6,31.8,0.3977272727272727,0.04426880847584831 +RF (tuned + ensemble),915.5438805149151,1.5748089445961848,532.5626464695567,1.640386311447312,0.9428934399339288,0.9114679294811889,0.19492401068058715,0.147796519544612,6794.043055663636,81.1610891109226,29.46153846153846,0.18906,789.1687051984999,1.122593025366465,527.4239458868619,1.3899910445458383,1.0,0.9937951111561848,0.11848332500543385,0.09151654391313321,6269.6136687159,75.55960345229795,31.5,0,0,0,26,1156.9,35.3,29.3,0.3531468531468531,0.04551104983248137 +GBM (default),5.376282313848153,0.20799484711426958,3.3489543497095178,0.19728412961063485,0.9701181894525962,0.9226046869734794,0.17658302669842518,0.11946139446934875,42.11806124941037,9.792256635961557,29.576923076923077,0.188175,5.033887876404656,0.22240020169152153,2.8984772023300502,0.13282292956587455,1.0,0.9543370010011993,0.11805347934940252,0.09964492878261602,38.718264562373335,7.411495765404641,29.5,0,0,0,26,1154.1,38.8,28.9,0.3505244755244755,0.036096329534928126 +RF (tuned),915.5438805149151,0.159982283706339,532.5626464695567,0.2003027211432199,0.9772506936041817,0.9400975627550481,0.20796627140051763,0.15738142731874985,6794.043055663636,8.85724716182767,31.923076923076923,0.190815,789.1687051984999,0.14185967445373537,527.4239458868619,0.12279197881507195,1.0,0.9949804676438307,0.13224551901999893,0.10601779560500876,6269.6136687159,8.226247917103098,33.0,0,0,0,26,1096.4,35.3,30.8,0.2972027972027972,0.03435712054173671 +NN_TORCH (default),27.39151145983965,0.20185868648382335,14.560395769408618,0.21573681790796437,0.9698948249425869,0.9391775521500288,0.2143711666740534,0.1478455999492002,188.3149665224302,10.435316060204023,32.32692307692308,0.183195,20.22722778055403,0.14770235617955524,10.376930987013111,0.18792402145583464,1.0,0.9983912483912485,0.14318031310217677,0.1051705699943664,197.09431521312473,8.356657050571908,33.0,0,0,0,26,1082.4,35.8,33.5,0.2880244755244755,0.03477287158574056 +FASTAI (default),10.397479128328143,0.6249765678348704,5.689409993948898,0.6426262937267089,0.9807465319111015,0.9403855906437737,0.2303447532418144,0.17838953792880163,79.38940070919118,32.314469844151965,33.61538461538461,0.19183499999999998,9.91800790362888,0.5629585729704962,4.729717448807326,0.6226443216085578,1.0,0.9998853211009175,0.16088565720086695,0.11423976777713259,63.05950085745769,32.34956133934325,36.5,0,0,0,26,1051.3,30.1,32.4,0.25874125874125875,0.03221855964948352 +LR (tuned + ensemble),175.85719627702338,0.42670365847074065,115.79203038641191,0.374819481777715,0.9447815704031849,0.9377863465231412,0.28317417183852894,0.24822823084079834,1448.7019616321465,20.41052886468638,34.23076923076923,0.216585,158.39292872746785,0.19311302105585734,88.63237206036187,0.25697546561814155,1.0,1.0,0.21769005088471027,0.1779543124949825,1246.7830478036524,13.25530093456624,38.0,0,0,1,25,1030.4,38.5,40.3,0.24475524475524477,0.04368314033358164 +LR (tuned),175.85719627702338,0.1364539579448537,115.79203038641191,0.12123112010680681,0.9645430999513104,0.943877751101422,0.2902155463327537,0.25773756332102926,1448.7019616321465,6.009867470278231,35.21153846153846,0.21681,158.39292872746785,0.07476819356282552,88.63237206036187,0.08838151119168902,1.0,1.0,0.22518752798442315,0.1834788883245299,1246.7830478036524,4.016028876859062,38.0,0,0,0,26,996.9,36.4,40.6,0.22246503496503497,0.03516566041445386 +RF (default),0.9640304686676744,0.07426402477117686,0.5084719578868067,0.0844146006139,0.9951894125278877,0.9701912114787967,0.25755513106395067,0.2479832267773854,6.6754175478812146,4.153857105918795,35.28846153846154,0.213105,0.8910642200046115,0.05825435982810126,0.447836563000179,0.06594795127086661,1.0,1.0,0.17586669886220047,0.11692775501298655,5.654732996519019,3.502947097147378,37.5,0,0,0,26,1000.0,0.0,0.0,0.22071678321678323,0.029920050824686344 +LR (default),4.707631549162742,0.1483236100938585,2.804518891637732,0.1437661673750307,0.9745259999639975,0.9520629989740693,0.305476365274656,0.29332097764816845,36.73103788870433,7.308777223189359,35.67307692307692,0.22103499999999998,4.746849238872528,0.08430976470311483,2.2657084486770183,0.10642460584640503,1.0,1.0,0.22930816077470617,0.22370793213879017,31.50980214431158,4.703818974402271,39.5,0,0,1,25,982.3,41.6,38.5,0.21197552447552448,0.03995938942367327 +XT (default),0.8462119445841536,0.0803421718442542,0.47456795510527094,0.08795126020679146,0.9851460583683391,0.9642266228854909,0.2753142737967973,0.27671727525790224,6.0304600748852675,4.406716139948246,36.65384615384615,0.215405,0.7635703219307794,0.06673479080200195,0.40395335630596185,0.07007418015238884,1.0,1.0,0.18424442460299273,0.1562338348801046,5.148113375558061,3.7424721126192138,39.0,0,0,0,26,955.5,43.9,50.8,0.1896853146853147,0.030378669430214802 +KNN (tuned + ensemble),16.440937072497146,0.2739423143558013,7.036183733958932,0.19964653999651732,1.0,0.9945521006475282,0.47993514547383553,0.5914832997832359,57.836601086323085,11.285407568076662,41.57692307692308,0.31118999999999997,6.799899099932777,0.1030390567249722,3.707274221912526,0.16730320161416595,1.0,1.0,0.4552264030449861,0.6183873456591064,55.06974575171796,8.266636963631449,43.0,0,0,0,26,703.5,49.4,45.5,0.0777972027972028,0.024279443329967162 +KNN (tuned),16.440937072497146,0.06075953357240074,7.036183733958932,0.04304950843110991,1.0,0.9966557577121117,0.5010874288021712,0.6455640735444151,57.836601086323085,2.5027008595459077,42.69230769230769,0.31467999999999996,6.799899099932777,0.036990099483066134,3.707274221912526,0.039402452723266784,1.0,1.0,0.4955223463378593,0.6729856958219811,55.06974575171796,2.091430487051362,44.0,0,0,0,26,609.2,52.7,48.0,0.05244755244755245,0.023502270498812147 +KNN (default),0.2899092884145231,0.0317453768518236,0.13426353967394633,0.031065835648244968,1.0,1.0,0.5923454279012129,0.9491257378249622,1.00065410372272,1.4527699655062962,44.25,0.34582,0.1258228341738383,0.02117468251122369,0.07457399441809831,0.021006283652748647,1.0,1.0,0.6201400300158697,1.0,1.0,1.0057335151210618,45.0,0,0,0,26,407.8,86.4,95.3,0.017045454545454544,0.022663376691509682 diff --git a/data/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf b/data/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..a739d2a84e525341cf68a8c79feb8cc88cf557e0 Binary files /dev/null and b/data/tabpfn-tabicl/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip b/data/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..4f6908fba6c41ba3576608b66bc23a0647bbfbc2 --- /dev/null +++ b/data/tabpfn-tabicl/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e190a64928288c6f71df2881dc9fadd779b153432c7535ca3ddd8d9d6b7fa4dd +size 139645 diff --git a/data/tabpfn/figures/critical-diagram.pdf b/data/tabpfn/figures/critical-diagram.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e6cbb3851345655347fab9ec9267a7e19e4627ce Binary files /dev/null and b/data/tabpfn/figures/critical-diagram.pdf differ diff --git a/data/tabpfn/figures/critical-diagram.png.zip b/data/tabpfn/figures/critical-diagram.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..2e996afbc8c6002ff3fefffd6b0d48a63e2f0a23 --- /dev/null +++ b/data/tabpfn/figures/critical-diagram.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7adff6e83d6bdc6996b4755fa44a9b4b221ada33465fbef54eea45ac9c3b93e8 +size 316465 diff --git a/data/tabpfn/leaderboard.tex b/data/tabpfn/leaderboard.tex new file mode 100644 index 0000000000000000000000000000000000000000..c807b083ac0087135393b2136efc4b5a8cdbf43a --- /dev/null +++ b/data/tabpfn/leaderboard.tex @@ -0,0 +1,52 @@ +\begin{tabular}{llcccccrr} +\toprule +\textbf{Model} & \textbf{Elo ($\uparrow$)} & \textbf{Norm.} & \textbf{Avg.} & \textbf{Harm.} & \textbf{\#wins ($\uparrow$)} & \textbf{Improva-} & \textbf{Train time} & \textbf{Predict time} \\ + & & \textbf{score ($\uparrow$)} & \textbf{rank ($\downarrow$)} & \textbf{mean} & & \textbf{bility ($\downarrow$)} & \textbf{per 1K [s]} & \textbf{per 1K [s]} \\ + & & & & \textbf{rank ($\downarrow$)} & & & & \\ +\midrule +TabPFNv2 (T+E) & \textcolor{gold}{\textbf{1722${}_{-34,+44}$}} & \textcolor{gold}{\textbf{0.712}} & \textcolor{gold}{\textbf{5.5}} & \textcolor{gold}{\textbf{1.9}} & \textcolor{gold}{\textbf{12}} & \textcolor{gold}{\textbf{3.4\%}} & 3899.42 & 55.83 \\ +RealMLP (T+E) & \textcolor{silver}{\textbf{1590${}_{-34,+33}$}} & 0.477 & \textcolor{silver}{\textbf{9.3}} & 5.7 & 0 & 8.1\% & 6131.85 & 4.53 \\ +TabM (T+E) & \textcolor{bronze}{\textbf{1586${}_{-32,+31}$}} & 0.484 & \textcolor{bronze}{\textbf{9.5}} & 4.6 & 2 & \textcolor{bronze}{\textbf{7.5\%}} & 3372.56 & 1.66 \\ +AutoGluon 1.3 (4h) & 1565${}_{-39,+30}$ & \textcolor{bronze}{\textbf{0.493}} & 10.2 & 4.3 & 3 & 7.7\% & 2727.51 & 3.24 \\ +TabPFNv2 (T) & 1564${}_{-30,+28}$ & \textcolor{silver}{\textbf{0.540}} & 10.4 & \textcolor{bronze}{\textbf{3.7}} & 1 & \textcolor{silver}{\textbf{6.6\%}} & 3899.42 & 0.98 \\ +LightGBM (T+E) & 1522${}_{-35,+33}$ & 0.361 & 11.8 & 7.1 & 1 & 10.0\% & 771.57 & 2.49 \\ +TabPFNv2 (D) & 1499${}_{-28,+33}$ & 0.483 & 12.7 & \textcolor{silver}{\textbf{3.5}} & \textcolor{silver}{\textbf{5}} & 8.3\% & 4.22 & 0.55 \\ +CatBoost (T+E) & 1483${}_{-32,+29}$ & 0.342 & 13.5 & 9.9 & 0 & 9.7\% & 2034.85 & 0.80 \\ +TabM (T) & 1470${}_{-26,+32}$ & 0.382 & 13.8 & 8.0 & 0 & 8.7\% & 3372.56 & 0.21 \\ +CatBoost (T) & 1462${}_{-34,+36}$ & 0.316 & 14.4 & 9.2 & 0 & 9.9\% & 2034.85 & 0.10 \\ +LightGBM (T) & 1452${}_{-34,+31}$ & 0.288 & 14.8 & 12.9 & 0 & 10.5\% & 771.57 & 0.32 \\ +XGBoost (T+E) & 1426${}_{-30,+29}$ & 0.271 & 15.8 & 12.9 & 0 & 11.1\% & 828.74 & 2.31 \\ +CatBoost (D) & 1422${}_{-29,+31}$ & 0.259 & 16.2 & 8.8 & 1 & 11.5\% & 8.51 & 0.12 \\ +ModernNCA (T) & 1401${}_{-36,+29}$ & 0.228 & 17.1 & 9.3 & 1 & 10.6\% & 6147.69 & 0.48 \\ +TabM (D) & 1396${}_{-35,+33}$ & 0.295 & 17.2 & 11.6 & 0 & 11.6\% & 12.24 & 0.15 \\ +XGBoost (T) & 1393${}_{-31,+27}$ & 0.229 & 17.4 & 15.4 & 0 & 11.4\% & 828.74 & 0.34 \\ +ModernNCA (T+E) & 1391${}_{-41,+26}$ & 0.320 & 17.5 & 7.8 & 0 & 11.0\% & 6147.69 & 8.15 \\ +TabDPT (D) & 1380${}_{-41,+30}$ & 0.370 & 18.1 & 4.0 & \textcolor{silver}{\textbf{5}} & 11.9\% & 28.84 & 9.01 \\ +RealMLP (T) & 1365${}_{-28,+35}$ & 0.221 & 18.6 & 14.4 & 0 & 11.1\% & 6131.85 & 0.26 \\ +ModernNCA (D) & 1344${}_{-39,+28}$ & 0.157 & 19.8 & 10.8 & 1 & 13.7\% & 16.16 & 0.31 \\ +EBM (T+E) & 1341${}_{-30,+31}$ & 0.186 & 19.8 & 11.4 & 0 & 14.8\% & 1331.68 & 0.21 \\ +TorchMLP (T+E) & 1331${}_{-32,+29}$ & 0.186 & 20.3 & 15.0 & 0 & 12.0\% & 3704.30 & 2.07 \\ +FastaiMLP (T+E) & 1303${}_{-33,+35}$ & 0.181 & 21.6 & 12.5 & 0 & 14.6\% & 1459.62 & 8.06 \\ +EBM (T) & 1284${}_{-29,+33}$ & 0.127 & 22.5 & 15.7 & 0 & 15.5\% & 1331.68 & 0.02 \\ +ExtraTrees (T+E) & 1273${}_{-34,+26}$ & 0.131 & 23.0 & 14.5 & 0 & 16.3\% & 416.39 & 1.39 \\ +EBM (D) & 1256${}_{-37,+34}$ & 0.136 & 23.7 & 11.0 & 1 & 16.3\% & 5.89 & 0.07 \\ +RealMLP (D) & 1249${}_{-31,+23}$ & 0.099 & 24.2 & 20.0 & 0 & 13.6\% & 37.06 & 0.31 \\ +TorchMLP (T) & 1234${}_{-25,+33}$ & 0.097 & 24.8 & 21.0 & 0 & 14.1\% & 3704.30 & 0.14 \\ +ExtraTrees (T) & 1228${}_{-35,+27}$ & 0.102 & 25.2 & 19.1 & 0 & 17.3\% & 416.39 & 0.18 \\ +FastaiMLP (T) & 1222${}_{-32,+27}$ & 0.089 & 25.3 & 20.3 & 0 & 15.9\% & 1459.62 & 0.89 \\ +XGBoost (D) & 1199${}_{-29,+30}$ & 0.025 & 26.4 & 24.4 & 0 & 15.0\% & 3.05 & 0.24 \\ +LightGBM (D) & 1170${}_{-34,+32}$ & 0.027 & 27.6 & 26.0 & 0 & 15.5\% & 3.39 & 0.16 \\ +RandomForest (T+E) & 1168${}_{-34,+33}$ & 0.071 & 27.7 & 21.0 & 0 & 17.7\% & 572.67 & 1.42 \\ +RandomForest (T) & 1112${}_{-33,+28}$ & 0.036 & 30.0 & 27.0 & 0 & 18.8\% & 572.67 & 0.14 \\ +TorchMLP (D) & 1068${}_{-33,+27}$ & 0.026 & 31.6 & 28.5 & 0 & 19.2\% & 11.82 & 0.15 \\ +FastaiMLP (D) & 1026${}_{-30,+25}$ & 0.015 & 33.1 & 31.0 & 0 & 21.7\% & 5.18 & 0.65 \\ +ExtraTrees (D) & 1007${}_{-32,+31}$ & 0.030 & 33.7 & 29.5 & 0 & 24.3\% & 0.42 & 0.08 \\ +RandomForest (D) & 1000${}_{-0,+0}$ & 0.004 & 33.9 & 32.1 & 0 & 23.2\% & 0.47 & 0.07 \\ +Linear (T+E) & 972${}_{-37,+28}$ & 0.044 & 34.7 & 24.5 & 0 & 29.3\% & 97.00 & 0.22 \\ +Linear (T) & 943${}_{-39,+36}$ & 0.028 & 35.6 & 28.9 & 0 & 29.9\% & 97.00 & 0.09 \\ +Linear (D) & 922${}_{-35,+36}$ & 0.020 & 36.1 & 26.8 & 0 & 31.4\% & 2.99 & 0.10 \\ +KNN (T+E) & 697${}_{-55,+41}$ & 0.000 & 40.5 & 40.2 & 0 & 45.7\% & 3.41 & 0.18 \\ +KNN (T) & 603${}_{-51,+57}$ & 0.000 & 41.6 & 41.5 & 0 & 47.5\% & 3.41 & 0.04 \\ +KNN (D) & 373${}_{-68,+68}$ & 0.000 & 43.3 & 43.2 & 0 & 55.6\% & 0.07 & 0.03 \\ +\bottomrule +\end{tabular} \ No newline at end of file diff --git a/data/tabpfn/tabarena_leaderboard.csv b/data/tabpfn/tabarena_leaderboard.csv new file mode 100644 index 0000000000000000000000000000000000000000..6bfbe8ecf04e21a712e55035ac60cc20bd3e7df1 --- /dev/null +++ b/data/tabpfn/tabarena_leaderboard.csv @@ -0,0 +1,45 @@ +method,time_train_s,time_infer_s,time_train_s_per_1K,time_infer_s_per_1K,normalized-error,normalized-error-task,champ_delta,loss_rescaled,time_train_s_rescaled,time_infer_s_rescaled,rank,median_metric_error,median_time_train_s,median_time_infer_s,median_time_train_s_per_1K,median_time_infer_s_per_1K,median_normalized-error,median_normalized-error-task,median_champ_delta,median_loss_rescaled,median_time_train_s_rescaled,median_time_infer_s_rescaled,median_rank,rank=1_count,rank=2_count,rank=3_count,rank>3_count,elo,elo+,elo-,winrate,mrr +TABPFNV2 (tuned + ensemble),14425.017745771953,147.33083187377815,4401.396029095723,73.66242671660264,0.28759504252950974,0.347282565892165,0.033791532255816104,0.021691804403474596,97206.53847125142,5748.512821917254,5.53030303030303,0.22151,6554.713698530197,48.78112569650014,3899.4164285155534,55.8334682198027,0.16748308582785468,0.3569968688242239,0.004663083714453231,0.0038383724774552082,76922.9404189982,3038.197671565286,2.0,12,6,2,13,1722.1,43.1,34.0,0.8946441155743481,0.5154452222510298 +REALMLP (tuned + ensemble),17640.05257734851,4.903230364073808,6337.298676302214,5.714285721500293,0.5227695654892056,0.5205835812389092,0.08113802091931344,0.04132085084573313,135567.2437894661,291.2042190402348,9.303030303030303,0.23449,10127.618620618185,4.333946492936876,6131.852125259443,4.526544125425011,0.48215030896347355,0.5296754767960605,0.03823555615847496,0.029161376498297843,109144.7404232934,273.1739755067715,8.0,0,2,3,28,1590.4,32.7,33.3,0.806906272022551,0.17535539684098442 +TABM (tuned + ensemble),7759.403633462299,2.1001035646156025,4063.8948551919407,2.0262676290994976,0.5158505640872573,0.527488621478328,0.07468532292643498,0.05142130142823952,68994.91604679843,112.78737728828092,9.454545454545455,0.22996,6619.307261109352,1.565106577343411,3372.559747313985,1.6579582265448953,0.47020083976979304,0.539573473894149,0.0321092955544684,0.027567784172117953,47614.825535817596,107.26839105814781,8.0,2,2,3,26,1586.2,30.2,31.3,0.8033826638477801,0.21855177068820775 +AutoGluon 1.3 (4h),6055.818895130286,4.270678644549565,3922.7285559191146,3.7248760797186677,0.5066593479144164,0.5016434556313996,0.07722981074251968,0.046889638099060306,55071.75177189554,226.07763766376502,10.242424242424242,0.23252,5245.15874774456,2.643711381488376,2727.5131652494383,3.239980099911348,0.4597995521540763,0.5024076802392037,0.031736497159312704,0.025571480573160055,42990.839716832954,133.93997628187762,7.0,3,2,1,27,1565.1,29.4,38.8,0.7850599013389711,0.2313227448922636 +TABPFNV2 (tuned),14425.017745771953,5.209287579613502,4401.396029095723,2.5426283197700723,0.46046523778783577,0.4854802409226216,0.06647673195285816,0.048875587042492535,97206.53847125142,204.9255427422306,10.378787878787879,0.22735,6554.713698530197,0.8280300378799439,3899.4164285155534,0.9826616035428936,0.34176855083335494,0.4778694665116624,0.03437211181910249,0.021856445988952963,76922.9404189982,51.81666634218519,6.0,1,11,1,20,1563.8,27.4,29.4,0.7818886539816773,0.26853674831779045 +GBM (tuned + ensemble),1415.9841766067627,4.799208115006135,910.5198965732433,4.069284327482318,0.6392387450549837,0.6388440602683771,0.09997972856333269,0.058027135210861426,13668.933422648086,267.51058203721857,11.833333333333334,0.22857,1305.722465435664,2.759434594048394,771.5692555555095,2.4904788987789197,0.6693216689945842,0.6259564939415712,0.04177838790149224,0.03428335611022812,13185.517460723013,113.47473834277966,11.0,1,0,2,30,1521.9,32.8,34.1,0.748062015503876,0.1410994418891387 +TABPFNV2 (default),11.52095359335042,1.0633954972129076,6.06940244179637,0.672221391108446,0.5171946106727229,0.5695628859334582,0.08250610447990993,0.06704584581218105,98.45527177302218,43.21321073170591,12.696969696969697,0.22839,8.920613225301107,0.4424108028411865,4.216795518748306,0.554313340790968,0.3939035196311629,0.49361879391754265,0.043516491196731844,0.023742158338286197,86.18639524525906,32.62493249187355,5.0,5,0,6,22,1499.0,32.4,28.0,0.7279774489076815,0.28415420757209386 +CAT (tuned + ensemble),6111.719856242539,1.006158811716921,3790.183958691414,0.8907615430657749,0.6581825114313259,0.647096257124149,0.09658963556630741,0.06159125993060797,60543.81785886913,53.55072795645876,13.545454545454545,0.23306,3546.8591198126474,0.7182260619269477,2034.851316438205,0.804742177327474,0.6112565392764306,0.66418987011517,0.04902703626297478,0.05150305067512489,22945.70625086803,41.43902270876451,13.0,0,0,1,32,1482.7,28.7,31.3,0.7082452431289641,0.10102745863625451 +TABM (tuned),7759.403633462299,0.20657940898278745,4063.8948551919407,0.21768183700894184,0.6183990992477728,0.5938819439915204,0.08657114794325833,0.06897005982586073,68994.91604679843,11.66689658584399,13.848484848484848,0.23179,6619.307261109352,0.18380210134718153,3372.559747313985,0.207815816005071,0.5745991505979262,0.614113056104093,0.041022605221466724,0.03589289842491426,47614.825535817596,11.061181528720304,13.0,0,2,1,30,1469.7,31.6,25.6,0.7011980267794221,0.12524242098340074 +CAT (tuned),6111.719856242539,0.1329929079672303,3790.183958691414,0.13532680535635316,0.6842728639559639,0.6649823761197318,0.0991083031449869,0.0628368504927829,60543.81785886913,7.594073997033244,14.363636363636363,0.22837,3546.8591198126474,0.09593041737874348,2034.851316438205,0.09676511402452573,0.6577077283695499,0.6795431711405081,0.05435194871036664,0.05056705166806485,22945.70625086803,5.6651414641322715,14.0,0,1,2,30,1461.5,35.1,33.1,0.6892177589852009,0.10903444019261638 +GBM (tuned),1415.9841766067627,0.7482920802402174,910.5198965732433,0.7792669151777867,0.7124399645711881,0.6801617053464362,0.1051956601413181,0.06639044052074433,13668.933422648086,44.04955699034316,14.772727272727273,0.23047,1305.722465435664,0.3891807476679484,771.5692555555095,0.32246047162149255,0.745365207761696,0.6744954489894297,0.049314064239551536,0.044527519630471114,13185.517460723013,17.561755180687065,14.0,0,0,0,33,1452.4,30.6,33.7,0.6797040169133193,0.07733342713075984 +XGB (tuned + ensemble),1916.6055530242247,2.961519778778256,1177.527465794311,3.6728650211989002,0.7293222800450723,0.7117235745876165,0.1112115495629525,0.07345874204421503,16012.343458246229,184.54969165931055,15.833333333333334,0.22933,1391.6566625965966,1.6547198295593262,828.736683312722,2.3118448001769525,0.7623744964555048,0.7252848836678053,0.060774906477773616,0.04674743171388845,12341.448622348875,104.93870139682326,13.0,0,0,0,33,1426.3,28.8,29.4,0.6550387596899225,0.07741546618345518 +CAT (default),152.82380405130613,0.14037318871880222,127.11927189400713,0.16217538557702282,0.740681350814587,0.7387655309189419,0.11498662102113376,0.07119593995902317,502.113809148077,8.351109219795852,16.151515151515152,0.22377,11.973733305931091,0.15812701649136013,8.50783362735807,0.12332610453035381,0.8332509641822385,0.7885095857243417,0.054760886835131806,0.03726919363819482,113.47556087857672,7.212822094475863,16.0,1,1,0,31,1421.9,30.3,28.8,0.6476391825229034,0.11339449782408784 +MNCA (tuned),10961.64198641737,0.5195780056494254,6318.399005594662,0.47834981044798197,0.7723596137566766,0.6827095912510105,0.10638339148215237,0.07558312836462096,105260.96546626008,25.986174703676685,17.106060606060606,0.23735,10053.083413600922,0.4133593638737996,6147.690948218725,0.484748090925306,0.8745571721847745,0.7651471621449312,0.06190318194365374,0.07061019028987191,89084.88026764008,22.392018959457452,17.0,1,0,0,32,1400.6,28.1,36.0,0.6254404510218464,0.10727266474604052 +TABM (default),28.548644328920126,0.1798720349366416,18.542173393427806,0.19723712265145438,0.7054228305321909,0.702938922815599,0.11640401971760755,0.08714522428137161,318.94007895221296,9.774328855022276,17.21212121212121,0.2315,20.942287389437357,0.15928708182440865,12.243907458197643,0.15399858651571716,0.6869868540300672,0.6757466833739122,0.05612090882619314,0.04238635809664446,186.77830789084513,9.305311172432566,16.0,0,0,0,33,1395.9,32.7,35.0,0.6229739252995067,0.08645560870892333 +XGB (tuned),1916.6055530242247,0.652148759324944,1177.527465794311,0.8866854120458099,0.7708905111580848,0.7367751709506252,0.11356989585982016,0.07695243441425559,16012.343458246229,38.50908627958101,17.424242424242426,0.23215,1391.6566625965966,0.25174130333794487,828.736683312722,0.3364459349309477,0.8207592710887125,0.7444907489109444,0.0623400659031782,0.049638259352643085,12341.448622348875,17.273530689905304,16.0,0,0,0,33,1393.0,26.5,30.7,0.6180408738548273,0.06482697329445536 +MNCA (tuned + ensemble),10961.64198641737,10.701994556690307,6318.399005594662,8.75762576226945,0.679738245400772,0.6438179993915535,0.10954922439720775,0.08285277496727728,105260.96546626008,520.9142587851533,17.484848484848484,0.23071,10053.083413600922,8.386180957158407,6147.690948218725,8.148513113458952,0.7185505017354659,0.690743503663037,0.07301516761779958,0.04711261954689944,89084.88026764008,418.52804655660253,14.0,0,2,4,27,1390.6,25.9,40.2,0.616631430584919,0.12891592187749792 +TABDPT (default),70.92158105132556,22.39591003114527,34.81759719100062,31.279468309323132,0.629987494107437,0.637861925196628,0.11877074556928915,0.07113986617091549,622.0717388752522,1500.5607209171305,18.060606060606062,0.22801,66.73408037026724,21.411699827512106,28.844939328856388,9.008305936432574,0.7203280108909103,0.7362192124428774,0.03615639555172845,0.040097705463089155,592.5429859587771,1255.434427440434,15.0,5,2,1,25,1380.5,29.3,41.0,0.6032417195207893,0.24707047655611855 +REALMLP (tuned),17640.05257734851,0.23988780349191993,6337.298676302214,0.3147901673455733,0.779232820829222,0.7118786087154055,0.1105344285502768,0.07574190098591821,135567.2437894661,15.019683339609985,18.606060606060606,0.23707,10127.618620618185,0.198199192682902,6131.852125259443,0.25545227547789184,0.885134051632072,0.7463352568098932,0.06677697998943288,0.058306293379144045,109144.7404232934,13.001039963138904,17.0,0,0,0,33,1364.9,34.5,27.4,0.5905567300916138,0.06931230490221836 +EBM (tuned + ensemble),3334.2171648572994,0.32795600762672295,2025.6302479633398,0.2872227792040727,0.8143931606450567,0.8129401388636393,0.14763133394621986,0.12592729619848494,30754.382185448365,16.68266150462009,19.757575757575758,0.2332,1953.8706483443577,0.2473998334672716,1331.6775166450918,0.20701186245225042,0.9460248644293228,0.8588776232550008,0.07509569687142414,0.05173584511058909,20856.10396834409,10.735277156769923,18.0,0,1,1,31,1341.3,30.8,29.8,0.5637773079633545,0.08746427069688567 +MNCA (default),29.288330355718077,0.40948904010181875,16.590279085679306,0.35807771432707775,0.8429271223076699,0.7810588257798868,0.13667497945865245,0.08737956786746706,276.94632919034655,19.94068662634551,19.78787878787879,0.22862,25.066760566499497,0.27276016076405846,16.16104653455061,0.3065299705640804,1.0,0.8394923182155353,0.07623534801441778,0.06857639602487717,228.23494043552506,15.69192319835031,21.0,1,0,0,32,1344.1,28.0,38.4,0.5630725863284003,0.09269970173335573 +NN_TORCH (tuned + ensemble),9308.194376095618,3.24606818494572,4341.07923497529,2.933380560418068,0.8141011522100134,0.7733317087974643,0.1196017538528463,0.0884329440850336,82898.10974181342,162.20152509802003,20.318181818181817,0.22969,7022.24924369653,2.462759764989217,3704.2987009192075,2.0735716422398887,0.9757817342652234,0.8634822252355945,0.06580712747027118,0.06652168038286722,61441.834728019065,130.8351426094471,20.0,0,0,0,33,1331.0,28.2,31.9,0.5507399577167019,0.06672886949918407 +FASTAI (tuned + ensemble),2586.283754560923,8.835338537861603,1679.575121937614,9.999499001724324,0.8186439717731856,0.8048407168297124,0.1463190607452892,0.10573840807155177,25493.071594867495,506.3190878191587,21.62121212121212,0.23902,2267.9460870583853,8.057986391915215,1459.621189354467,8.056269308662202,1.0,0.9463735018753228,0.07797271869376787,0.07243352000599475,23829.87732673067,529.4584115089291,24.0,0,1,0,32,1302.6,35.0,32.5,0.5204369274136716,0.07980501732811608 +EBM (tuned),3334.2171648572994,0.039938361395890465,2025.6302479633398,0.03842942493274479,0.8732650510599177,0.847185415176962,0.1550015674415676,0.13582053194552346,30754.382185448365,2.017057965351221,22.5,0.23722,1953.8706483443577,0.031091478135850694,1331.6775166450918,0.0236834002099177,1.0,0.9131276971175163,0.07651349937739893,0.06554958177928984,20856.10396834409,1.153393324267757,24.0,0,0,1,32,1284.4,33.0,28.8,0.5,0.06371221193095303 +XT (tuned + ensemble),703.3360121879513,1.4757844792471992,499.8225911726898,1.6427389690079335,0.8688873144004177,0.8421873615711798,0.16320803022886876,0.11384810299272856,7431.679531233592,85.61436209689451,23.0,0.22915,666.2221839348475,1.1989427142673068,416.3888649592797,1.3925002488586609,1.0,0.9289528549866781,0.0949585994043769,0.07137968239257503,5771.537607935009,73.42784180878955,27.0,0,0,0,33,1273.4,25.8,33.1,0.4883720930232558,0.06880941675825748 +EBM (default),11.36624160479215,0.05860419851360899,6.793468845812911,0.0773696538255326,0.8635835253158718,0.858009067517529,0.16307929482040492,0.13972874727777093,110.45760612579903,3.6748575166769917,23.727272727272727,0.2295,8.053060743543837,0.04657702445983887,5.893546446579368,0.07326012604396624,1.0,0.9448073826572545,0.09111496900003413,0.06379070050707646,98.43282840770306,3.379332756535156,24.0,1,0,2,30,1255.7,33.8,36.7,0.4714587737843552,0.09099116968661469 +REALMLP (default),106.23891412433149,0.2452111855099097,37.47920795758294,0.3116151414211664,0.9013016306032677,0.8397875849035541,0.13551395212270728,0.09683394965247975,797.3305404666338,14.710216960509968,24.227272727272727,0.24541,56.65627751350403,0.19186555014716256,37.06431607357004,0.3087081015110016,1.0,0.916083975805878,0.09023388577326896,0.0783241041826278,651.2119595469192,12.659464398970895,25.0,0,0,0,33,1249.4,23.0,30.8,0.459830866807611,0.05003115235983144 +NN_TORCH (tuned),9308.194376095618,0.17523537565160682,4341.07923497529,0.18444769841251837,0.9028056592953255,0.8478053911765454,0.1413705809293394,0.11048239558944242,82898.10974181342,9.391699534826044,24.772727272727273,0.23735,7022.24924369653,0.12440276145935059,3704.2987009192075,0.1432880461215973,1.0,0.9215584053535485,0.1030878645350336,0.09298739144390704,61441.834728019065,8.032002608597933,26.0,0,0,0,33,1233.9,33.0,24.5,0.44714587737843553,0.047597798111452545 +XT (tuned),703.3360121879513,0.18311701848450734,499.8225911726898,0.22461498391827245,0.8983919342324714,0.8705897542854648,0.17257069046852133,0.12302015635642276,7431.679531233592,11.223370223330804,25.151515151515152,0.23322,666.2221839348475,0.15195075670878092,416.3888649592797,0.1793043116994795,1.0,0.966001342535169,0.10675110715093028,0.07526259739143538,5771.537607935009,9.73206235571111,30.0,0,0,0,33,1228.3,26.8,34.4,0.4383368569415081,0.05228445108001237 +FASTAI (tuned),2586.283754560923,0.6978744876103771,1679.575121937614,0.8367693422775803,0.9106015193205547,0.8611123846058543,0.15945393539538272,0.12417976041162289,25493.071594867495,42.13403882164382,25.348484848484848,0.24143,2267.9460870583853,0.7035322189331055,1459.621189354467,0.8899622596800327,1.0,0.9671522112430685,0.08497148807239407,0.08265696789358082,23829.87732673067,41.63294608315149,26.0,0,0,0,33,1221.5,26.9,32.0,0.43375616631430586,0.04932119791327812 +XGB (default),4.978699986862414,0.2737259887284301,3.296637309611259,0.3881253273013503,0.9745229998669913,0.9130838025048484,0.15003750558624923,0.12576814947216147,47.96472220742737,17.685235315861274,26.40909090909091,0.24149,4.367259449428982,0.2072049856185913,3.054087114292606,0.2414376437664032,1.0,0.9442154777336845,0.10860616500290621,0.09722890335245435,45.44297154665395,12.112444004915941,26.0,0,0,0,33,1199.4,29.2,28.7,0.4090909090909091,0.04099400342654791 +GBM (default),5.590238305133601,0.278970559357794,3.7124463506088845,0.2797426445645519,0.9731114574560898,0.9163189067388104,0.15469068532011432,0.11725408588040206,57.36690638815629,17.058911558436503,27.575757575757574,0.24755,5.0517880121866865,0.22849366399976942,3.3870700945456824,0.15827762661722877,1.0,0.9471408521144343,0.09946698481581973,0.10403475244674416,43.79388559366092,9.72233278292159,26.0,0,0,0,33,1170.4,31.7,33.7,0.38195912614517263,0.03850744666327581 +RF (tuned + ensemble),892.1954686825525,1.4854835507845638,550.3470940724247,1.5940359327620437,0.9290486520150778,0.897565901742995,0.1769110460679657,0.14073555515460248,8295.175259470323,82.18254749521213,27.696969696969695,0.23505,784.4848535855612,1.0802352163526747,572.6733661144972,1.4206488404155224,1.0,0.990363425091157,0.1099859175027843,0.09653757927332181,7404.509259652326,76.20706350974088,29.0,0,0,0,33,1168.0,32.4,33.3,0.37914023960535587,0.04767696145604081 +RF (tuned),892.1954686825525,0.16340523231711854,550.3470940724247,0.20470746704551906,0.963610556213676,0.9282250154629602,0.1884425918834092,0.150404283327206,8295.175259470323,9.800022397387403,30.03030303030303,0.23333,784.4848535855612,0.1385154088338216,572.6733661144972,0.14341358177200286,1.0,0.9919951905241593,0.12338041740886208,0.11346596094054348,7404.509259652326,8.630503216322625,31.0,0,0,0,33,1112.3,28.0,33.0,0.324876673713883,0.037030726222237305 +NN_TORCH (default),31.202770278670574,0.17605404147395382,18.191025653235105,0.1946935658503532,0.9742281682834123,0.942605129122887,0.19206282164757377,0.16121349071556013,307.7875153991938,9.709642602635572,31.560606060606062,0.23746,21.156466828452217,0.13171541690826416,11.818497713406881,0.14659688817979757,1.0,0.9898941471678575,0.13261963205650174,0.1156261970018704,210.45346039107372,7.909338196312241,32.0,0,0,0,33,1067.8,26.3,33.0,0.2892882311486963,0.03503832636464612 +FASTAI (default),10.065239688764116,0.6049560324511544,5.915643660471347,0.6570296472609841,0.9848306008996557,0.9501254457184752,0.21715422546208968,0.19394402872724004,95.24755640719609,34.77327333774118,33.121212121212125,0.25653,7.87360077434116,0.4848888476689657,5.1820077836540115,0.6521266629591491,1.0,1.0,0.15050538424095006,0.1310864418065254,93.12290255921157,34.91731418633814,36.0,0,0,0,33,1026.4,24.4,29.8,0.25299506694855534,0.032307754682196176 +XT (default),0.822556706068893,0.08127484409897415,0.471496074166279,0.09148027851361618,0.9701664672021088,0.947954236878204,0.2425079699137419,0.2454783416017478,7.127294712723843,4.867665484175484,33.72727272727273,0.2625,0.7219058142768012,0.06215476195017497,0.4238388518146615,0.07849177235630946,1.0,1.0,0.1794327271141959,0.15384123117868195,6.235138739070136,4.456198846093199,37.0,0,0,0,33,1007.1,30.6,31.3,0.23890063424947147,0.033934460671698106 +RF (default),1.0802604700981167,0.07677059518769132,0.52542853627611,0.08878926590315098,0.9962098401734872,0.9695037757004908,0.23203683579868908,0.23508878524732915,8.967503792674067,4.676640526479693,33.86363636363637,0.25251,0.7427172581354777,0.058771981133355036,0.4717373991313263,0.06943642688143947,1.0,1.0,0.16087354235509543,0.1389292060353615,7.464730143591374,4.060095192168379,35.0,0,0,0,33,1000.0,0.0,0.0,0.2357293868921776,0.031175635120658556 +LR (tuned + ensemble),170.87050911529295,0.355976880680431,121.02102704404008,0.32634615259593897,0.9564945706206911,0.9495867900946459,0.2932871848480949,0.32561555425887745,1772.9723454272128,18.055698085268993,34.72727272727273,0.24491,158.05876021915012,0.1903169314066569,96.99876412252586,0.21682568555752907,1.0,1.0,0.21751004569734023,0.24054270012702494,1402.8748923675635,12.175213508812917,39.0,0,0,1,32,972.3,27.9,36.9,0.2156448202959831,0.04087606611291446 +LR (tuned),170.87050911529295,0.11554940750301887,121.02102704404008,0.10905393298119077,0.972064260567699,0.9545798343796298,0.29890337785985527,0.3331774019669149,1772.9723454272128,5.509546013888674,35.621212121212125,0.25255,158.05876021915012,0.058829413519965276,96.99876412252586,0.0862505760249892,1.0,1.0,0.2329968805680923,0.2575122541077485,1402.8748923675635,4.067640041721783,39.5,0,0,0,33,943.4,35.8,38.3,0.19485553206483439,0.03454590417164617 +LR (default),4.536491992417409,0.12851071365754613,2.8968102616210682,0.12996863092563715,0.9799295757292101,0.9610495592933687,0.3140075680375095,0.37538468089996596,44.19552977352603,6.787762182097637,36.13636363636363,0.26248,3.583410120010376,0.07990590731302898,2.994106463982051,0.104980896680783,1.0,1.0,0.26184876537499124,0.29385559917332205,34.79737790723628,4.682238532824547,39.5,0,0,1,32,921.5,35.2,34.5,0.1828752642706131,0.03726832355599832 +KNN (tuned + ensemble),13.854036316245494,0.24958616836303815,6.0685782814510345,0.19751524231739334,1.0,0.9936006831500522,0.4566757533990582,0.6138711800871106,58.80470682969773,11.4223913906534,40.54545454545455,0.31971,5.889678266313341,0.10710635185241699,3.411499284183487,0.17676389939136214,1.0,1.0,0.41352429770943944,0.6488391163428452,55.92066201498072,9.83646280007513,42.0,0,0,0,33,697.1,40.7,54.9,0.080338266384778,0.024871015659946344 +KNN (tuned),13.854036316245494,0.05394502376466488,6.0685782814510345,0.040282320435423104,1.0,0.9969136397430987,0.4748160330502654,0.665617927340854,58.80470682969773,2.390983322122401,41.60606060606061,0.34215,5.889678266313341,0.029062509536743164,3.411499284183487,0.03624739765555662,1.0,1.0,0.4641563796701971,0.740394718900499,55.92066201498072,2.000488003155834,43.0,0,0,0,33,603.2,56.1,50.1,0.05567300916138125,0.024112613850887754 +KNN (default),0.24162639913334188,0.029173843387000086,0.11473320891232033,0.030260209027344123,1.0,0.9999519518442885,0.555978281969172,0.946826696237823,1.0005153544482035,1.4920918647664636,43.31818181818182,0.40639,0.11169254779815674,0.019644896189371746,0.06555315548394994,0.02595407415488982,1.0,1.0,0.5396030287721358,1.0,1.0,1.2186499153225778,44.0,0,0,0,33,373.1,67.6,67.9,0.015856236786469344,0.023142914628718914 diff --git a/data/tabpfn/tuning-impact-elo-horizontal.pdf b/data/tabpfn/tuning-impact-elo-horizontal.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1b43f99538ce393e70bc09f8078cc31ba867c1af Binary files /dev/null and b/data/tabpfn/tuning-impact-elo-horizontal.pdf differ diff --git a/data/tabpfn/tuning-impact-elo-horizontal.png.zip b/data/tabpfn/tuning-impact-elo-horizontal.png.zip new file mode 100644 index 0000000000000000000000000000000000000000..0082e1650b4a3bd25e03e26f768da4c4f07b5581 --- /dev/null +++ b/data/tabpfn/tuning-impact-elo-horizontal.png.zip @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96f37ca0e8626cc2055e6be64880740be0f6b8950004c69fc545431190ec8d16 +size 135563 diff --git a/data_pdfs_to_pngs.py b/data_pdfs_to_pngs.py new file mode 100644 index 0000000000000000000000000000000000000000..7ffc71aec50f3f62080fa4bbb56e013010dcc1da --- /dev/null +++ b/data_pdfs_to_pngs.py @@ -0,0 +1,23 @@ +"""Helper script to go from PDF to PNGs we can use in HTML on the LB.""" + +from __future__ import annotations + +import glob +import os +from pathlib import Path + +from pdf2image import convert_from_path + +root_dir = "./data" +pdf_paths = glob.glob(os.path.join(root_dir, "**", "*.pdf"), recursive=True) + + +for pdf_path in pdf_paths: + # Relative path to recreate folder structure + path_to_pdf = Path(pdf_path).resolve() + path_to_png = path_to_pdf.with_suffix(".png.zip") + print(f"Converting {pdf_path}...") + + images = convert_from_path(pdf_path, dpi=800) + for _i, image in enumerate(images): + image.save(path_to_png, "PNG") diff --git a/main.py b/main.py index 93ae02c6a9a2fbb413287701514548a5b926a15a..e6b1529aa30b2363c0a7f21c22695780ff0470e9 100644 --- a/main.py +++ b/main.py @@ -1,124 +1,22 @@ from __future__ import annotations +from dataclasses import dataclass from pathlib import Path import gradio as gr import pandas as pd +import website_texts from apscheduler.schedulers.background import BackgroundScheduler from constants import Constants, model_type_emoji from gradio_leaderboard import ColumnFilter, Leaderboard, SelectColumns - -TITLE = """