UlrickBL commited on
Commit
ff2e60a
·
verified ·
1 Parent(s): 2517e9b

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +105 -90
index.html CHANGED
@@ -6,130 +6,129 @@
6
  <title>LLM Benchmark Overview</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
8
  <style>
9
-
10
  body {
11
  font-family: 'Inter', sans-serif;
12
- background-color: #1a1a2e;
13
- color: #e0e0e0;
14
  margin: 0;
15
  padding: 20px;
16
  line-height: 1.6;
17
- font-size: 14px;
18
  }
19
 
20
-
21
  h1 {
22
  text-align: center;
23
- color: #a766ff;
24
  margin-bottom: 30px;
25
  font-weight: 600;
26
  font-size: 2.2em;
27
  text-shadow: 0 0 10px rgba(167, 102, 255, 0.4);
28
  }
29
 
30
-
31
- body > div:nth-of-type(1) {
32
  max-width: 900px;
33
  margin: 0 auto 30px auto;
34
  text-align: justify;
35
- background-color: #2a2a4a;
36
  padding: 20px;
37
  border-radius: 12px;
38
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
39
  font-size: 0.95em;
40
  }
41
 
42
-
43
  .table-container {
44
  overflow-x: auto;
45
  margin-top: 20px;
46
  position: relative;
47
  border-radius: 12px;
48
- box-shadow: 0 8px 20px rgba(0, 0, 0, 0.5);
49
  }
50
 
51
-
52
  table {
53
- width: 100%;
54
  border-collapse: collapse;
55
  margin: 0 auto;
56
- background-color: #2a2a4a;
57
  border-radius: 12px;
58
- overflow: hidden;
59
- min-width: 900px;
60
- table-layout: fixed;
61
  }
62
 
63
-
64
  th, td {
65
- padding: 12px 15px;
66
  text-align: left;
67
- border: 1px solid #3a3a5a;
68
- font-size: 0.9em;
69
- vertical-align: top;
70
-
 
71
  }
72
 
73
-
74
  th {
75
- background-color: #3a3a5a;
76
- color: #c0c0c0;
77
  font-weight: 600;
78
  position: relative;
79
- white-space: normal;
80
- word-wrap: break-word;
81
  }
82
 
83
-
84
  th.resizable .resizer {
85
  position: absolute;
86
  top: 0;
87
  right: 0;
88
- width: 8px;
89
  height: 100%;
90
  cursor: col-resize;
91
- background-color: rgba(167, 102, 255, 0.2);
92
  transition: background-color 0.2s ease-in-out;
93
  }
94
 
95
  th.resizable .resizer:hover {
96
- background-color: rgba(167, 102, 255, 0.5);
97
  }
98
 
99
-
100
  tr:nth-child(even) {
101
- background-color: #2f2f50;
102
  }
103
 
104
-
105
-
106
- th:nth-child(1), td:nth-child(1) { width: 15%; min-width: 120px; }
107
- th:nth-child(2), td:nth-child(2) { width: 15%; min-width: 120px; }
108
- th:nth-child(3), td:nth-child(3) { width: 20%; min-width: 150px; }
109
- th:nth-child(4), td:nth-child(4) { width: 25%; min-width: 200px; }
110
- th:nth-child(5), td:nth-child(5) { width: 25%; min-width: 200px; }
111
- th:nth-child(6), td:nth-child(6) { width: 10%; min-width: 80px; }
112
- th:nth-child(7), td:nth-child(7) { width: 10%; min-width: 80px; }
113
 
114
 
115
-
116
  .cell-content {
117
- cursor: pointer;
118
  overflow: hidden;
119
  text-overflow: ellipsis;
120
  display: -webkit-box;
121
- -webkit-line-clamp: 4;
122
  -webkit-box-orient: vertical;
123
- white-space: normal;
124
- word-wrap: break-word;
125
  }
126
 
127
-
128
  td:hover {
129
- background-color: #3a3a5a;
130
  }
131
 
132
-
133
  .filter {
134
  margin-bottom: 25px;
135
  text-align: center;
@@ -142,16 +141,16 @@
142
  .filter label {
143
  font-size: 1em;
144
  margin-right: 5px;
145
- color: #a766ff;
146
  font-weight: 600;
147
  }
148
  .filter select, .filter input[type="text"] {
149
  padding: 8px 12px;
150
  font-size: 0.95em;
151
- border: 1px solid #5a5a7a;
152
  border-radius: 8px;
153
- background-color: #3a3a5a;
154
- color: #e0e0e0;
155
  outline: none;
156
  transition: border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out;
157
  }
@@ -160,27 +159,27 @@
160
  box-shadow: 0 0 8px rgba(167, 102, 255, 0.5);
161
  }
162
  .filter input[type="text"] {
163
- flex-grow: 1;
164
  max-width: 400px;
165
  }
166
 
167
-
168
  .modal {
169
  position: fixed;
170
  top: 50%;
171
  left: 50%;
172
  transform: translate(-50%, -50%);
173
- background-color: #2a2a4a;
174
- box-shadow: 0 8px 20px rgba(0, 0, 0, 0.6);
175
  padding: 30px;
176
  z-index: 1000;
177
  border-radius: 12px;
178
  max-width: 90%;
179
  max-height: 90%;
180
  overflow: auto;
181
- color: #e0e0e0;
182
  font-size: 1em;
183
- white-space: pre-wrap;
184
  }
185
  .overlay {
186
  position: fixed;
@@ -188,24 +187,24 @@
188
  left: 0;
189
  width: 100%;
190
  height: 100%;
191
- background: rgba(0, 0, 0, 0.8);
192
  z-index: 999;
193
  }
194
 
195
-
196
  a {
197
- color: #a766ff;
198
  text-decoration: none;
199
  transition: color 0.2s ease-in-out;
200
  }
201
  a:hover {
202
- color: #c08cff;
203
  text-decoration: underline;
204
  }
205
  </style>
206
  </head>
207
  <body>
208
- <h1>LLM Benchmark Overview</h1>
209
  <div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div>
210
  <div class="filter">
211
  <label for="metricFilter">Filter by Evaluated task:</label>
@@ -229,7 +228,8 @@
229
  <div class="modal" id="modal" style="display: none;"></div>
230
 
231
  <script>
232
- function parseCSV(content) {
 
233
  const rows = [];
234
  let currentRow = [];
235
  let currentField = '';
@@ -250,27 +250,35 @@
250
  currentField += char;
251
  }
252
  }
 
253
  if (currentField) currentRow.push(currentField.trim());
254
  if (currentRow.length > 0) rows.push(currentRow);
255
- const headers = rows.shift();
 
256
  return { headers, rows };
257
  }
 
 
258
  async function loadCSVFromHuggingFace(dataset, filename, token) {
259
- const url = 'https://huggingface.co/datasets/UlrickBL/benchmark_overview/resolve/main/benchmark_overview.csv'//`https://huggingface.co/datasets/${dataset}/blob/main/${filename}`;
260
-
261
- const response = await fetch(url, {
262
- headers: {
263
- 'Authorization': `Bearer ${token}`,
264
- },
265
- });
266
-
267
- if (!response.ok) {
268
- throw new Error(`Failed to fetch file: ${response.statusText}`);
 
 
 
 
 
 
 
 
269
  }
270
-
271
- const content = await response.text();
272
-
273
- return parseCSV(content);
274
  }
275
 
276
  const metricFilter = document.getElementById('metricFilter');
@@ -427,24 +435,31 @@
427
  });
428
  makeResizable(); // Re-apply resizable functionality after table population
429
  }
430
-
 
431
  overlay.addEventListener('click', () => {
432
  overlay.style.display = 'none';
433
  modal.style.display = 'none';
434
  });
435
 
 
436
  metricFilter.addEventListener('change', () => {
437
  const filterValue = metricFilter.value;
438
  populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0); // Re-populate table with new filter
439
  });
440
 
441
- let parsedCSV;
442
-
443
- loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN).then(({ headers, rows }) => {
444
- parsedCSV = { headers, rows };
445
- populateFilterOptions(rows, 0);
446
- populateTable(headers, rows, '', 0);
447
- });
 
 
 
 
 
448
  </script>
449
  </body>
450
  </html>
 
6
  <title>LLM Benchmark Overview</title>
7
  <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
8
  <style>
9
+ /* General Body and Font Styles */
10
  body {
11
  font-family: 'Inter', sans-serif;
12
+ background-color: #1a1a2e; /* Dark background */
13
+ color: #e0e0e0; /* Light text */
14
  margin: 0;
15
  padding: 20px;
16
  line-height: 1.6;
17
+ font-size: 14px; /* Reduced base font size */
18
  }
19
 
20
+ /* Header Styling */
21
  h1 {
22
  text-align: center;
23
+ color: #a766ff; /* Neo purple */
24
  margin-bottom: 30px;
25
  font-weight: 600;
26
  font-size: 2.2em;
27
  text-shadow: 0 0 10px rgba(167, 102, 255, 0.4);
28
  }
29
 
30
+ /* Introduction Text */
31
+ body > div:nth-of-type(1) { /* Targeting the intro div */
32
  max-width: 900px;
33
  margin: 0 auto 30px auto;
34
  text-align: justify;
35
+ background-color: #2a2a4a; /* Slightly lighter dark background */
36
  padding: 20px;
37
  border-radius: 12px;
38
  box-shadow: 0 4px 15px rgba(0, 0, 0, 0.3);
39
  font-size: 0.95em;
40
  }
41
 
42
+ /* Table Container and Shadow */
43
  .table-container {
44
  overflow-x: auto;
45
  margin-top: 20px;
46
  position: relative;
47
  border-radius: 12px;
48
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.5); /* Stronger shadow */
49
  }
50
 
51
+ /* Table Styling */
52
  table {
53
+ width: auto; /* Changed from 100% to auto to allow min-width to force overflow */
54
  border-collapse: collapse;
55
  margin: 0 auto;
56
+ background-color: #2a2a4a; /* Darker table background */
57
  border-radius: 12px;
58
+ overflow: hidden; /* Ensures rounded corners apply */
59
+ min-width: 950px; /* Ensure a minimum width for the table itself (sum of column min-widths) */
60
+ table-layout: fixed; /* Keep fixed layout for column width control */
61
  }
62
 
63
+ /* Table Headers and Cells */
64
  th, td {
65
+ padding: 10px 15px; /* Reduced vertical padding from 12px to 10px */
66
  text-align: left;
67
+ border: 1px solid #3a3a5a; /* Darker border */
68
+ font-size: 0.9em; /* Smaller font for table content */
69
+ vertical-align: top; /* Align content to top */
70
+ white-space: normal; /* Ensure cells allow content to wrap */
71
+ word-wrap: break-word; /* Ensure long words break within cells */
72
  }
73
 
74
+ /* Table Header Specifics */
75
  th {
76
+ background-color: #3a3a5a; /* Dark header background */
77
+ color: #c0c0c0; /* Lighter header text */
78
  font-weight: 600;
79
  position: relative;
80
+ /* white-space: normal and word-wrap: break-word are now in th, td general rule */
 
81
  }
82
 
83
+ /* Resizable Column Handler */
84
  th.resizable .resizer {
85
  position: absolute;
86
  top: 0;
87
  right: 0;
88
+ width: 8px; /* Wider resizer for easier grabbing */
89
  height: 100%;
90
  cursor: col-resize;
91
+ background-color: rgba(167, 102, 255, 0.2); /* Semi-transparent purple */
92
  transition: background-color 0.2s ease-in-out;
93
  }
94
 
95
  th.resizable .resizer:hover {
96
+ background-color: rgba(167, 102, 255, 0.5); /* More visible on hover */
97
  }
98
 
99
+ /* Alternating Row Colors */
100
  tr:nth-child(even) {
101
+ background-color: #2f2f50; /* Slightly different shade for even rows */
102
  }
103
 
104
+ /* Specific Column Styling for wider columns */
105
+ /* Adjusted widths for better display */
106
+ th:nth-child(1), td:nth-child(1) { width: 15%; min-width: 120px; } /* Evaluated task */
107
+ th:nth-child(2), td:nth-child(2) { width: 15%; min-width: 120px; } /* Benchmark Name */
108
+ th:nth-child(3), td:nth-child(3) { width: 20%; min-width: 150px; } /* Metric often used */
109
+ th:nth-child(4), td:nth-child(4) { width: 25%; min-width: 200px; } /* Question + context example */
110
+ th:nth-child(5), td:nth-child(5) { width: 25%; min-width: 200px; } /* Answer examp */
111
+ th:nth-child(6), td:nth-child(6) { width: 10%; min-width: 80px; } /* Paper */
112
+ th:nth-child(7), td:nth-child(7) { width: 10%; min-width: 80px; } /* HF or Git link */
113
 
114
 
115
+ /* Inner div for truncated content */
116
  .cell-content {
117
+ cursor: pointer; /* Keep cursor pointer for expandability */
118
  overflow: hidden;
119
  text-overflow: ellipsis;
120
  display: -webkit-box;
121
+ -webkit-line-clamp: 4; /* Limit to 4 lines */
122
  -webkit-box-orient: vertical;
123
+ /* white-space: normal and word-wrap: break-word are now in th, td general rule */
 
124
  }
125
 
126
+ /* Hover effect on the cell, not the inner content */
127
  td:hover {
128
+ background-color: #3a3a5a; /* Highlight on hover */
129
  }
130
 
131
+ /* Filter and Search Bar Styling */
132
  .filter {
133
  margin-bottom: 25px;
134
  text-align: center;
 
141
  .filter label {
142
  font-size: 1em;
143
  margin-right: 5px;
144
+ color: #a766ff; /* Neo purple */
145
  font-weight: 600;
146
  }
147
  .filter select, .filter input[type="text"] {
148
  padding: 8px 12px;
149
  font-size: 0.95em;
150
+ border: 1px solid #5a5a7a; /* Darker border */
151
  border-radius: 8px;
152
+ background-color: #3a3a5a; /* Dark input background */
153
+ color: #e0e0e0; /* Light input text */
154
  outline: none;
155
  transition: border-color 0.2s ease-in-out, box-shadow 0.2s ease-in-out;
156
  }
 
159
  box-shadow: 0 0 8px rgba(167, 102, 255, 0.5);
160
  }
161
  .filter input[type="text"] {
162
+ flex-grow: 1; /* Allow search input to grow */
163
  max-width: 400px;
164
  }
165
 
166
+ /* Modal and Overlay Styling */
167
  .modal {
168
  position: fixed;
169
  top: 50%;
170
  left: 50%;
171
  transform: translate(-50%, -50%);
172
+ background-color: #2a2a4a; /* Dark modal background */
173
+ box-shadow: 0 8px 20px rgba(0, 0, 0, 0.6); /* Stronger shadow */
174
  padding: 30px;
175
  z-index: 1000;
176
  border-radius: 12px;
177
  max-width: 90%;
178
  max-height: 90%;
179
  overflow: auto;
180
+ color: #e0e0e0; /* Light text */
181
  font-size: 1em;
182
+ white-space: pre-wrap; /* Preserve formatting for modal content */
183
  }
184
  .overlay {
185
  position: fixed;
 
187
  left: 0;
188
  width: 100%;
189
  height: 100%;
190
+ background: rgba(0, 0, 0, 0.8); /* Darker overlay */
191
  z-index: 999;
192
  }
193
 
194
+ /* Link Styling */
195
  a {
196
+ color: #a766ff; /* Neo purple for links */
197
  text-decoration: none;
198
  transition: color 0.2s ease-in-out;
199
  }
200
  a:hover {
201
+ color: #c08cff; /* Lighter purple on hover */
202
  text-decoration: underline;
203
  }
204
  </style>
205
  </head>
206
  <body>
207
+ <h1>LLM Benchmark Overview (Update Ongoing)</h1>
208
  <div>As the development and evaluation of large language models (LLMs) continue to evolve, I conducted an overview of the principal benchmarks commonly found in research papers. My goal is to create a clear and comprehensive resource that summarizes what is being tested in LLMs, with concrete examples, key metrics, and direct links to related papers and repositories. This document serves as a centralized matrix that will be continuously updated with insights from future papers I review.</div>
209
  <div class="filter">
210
  <label for="metricFilter">Filter by Evaluated task:</label>
 
228
  <div class="modal" id="modal" style="display: none;"></div>
229
 
230
  <script>
231
+ // Custom CSV parser to handle quoted fields with commas/newlines
232
+ function parseCSV(content) {
233
  const rows = [];
234
  let currentRow = [];
235
  let currentField = '';
 
250
  currentField += char;
251
  }
252
  }
253
+ // Add the last field and row if any
254
  if (currentField) currentRow.push(currentField.trim());
255
  if (currentRow.length > 0) rows.push(currentRow);
256
+
257
+ const headers = rows.shift(); // First row is headers
258
  return { headers, rows };
259
  }
260
+
261
+ // Function to load CSV from Hugging Face (commented out for Canvas preview)
262
  async function loadCSVFromHuggingFace(dataset, filename, token) {
263
+ const url = `https://huggingface.co/datasets/${dataset}/resolve/main/${filename}`;
264
+ try {
265
+ const response = await fetch(url, {
266
+ headers: {
267
+ // 'Authorization': `Bearer ${token}`, // Uncomment if a token is required
268
+ },
269
+ });
270
+
271
+ if (!response.ok) {
272
+ throw new Error(`Failed to fetch file: ${response.statusText}`);
273
+ }
274
+
275
+ const content = await response.text();
276
+ return parseCSV(content);
277
+ } catch (error) {
278
+ console.error("Error loading CSV from Hugging Face:", error);
279
+ // Fallback or error message to user could go here
280
+ return { headers: [], rows: [] };
281
  }
 
 
 
 
282
  }
283
 
284
  const metricFilter = document.getElementById('metricFilter');
 
435
  });
436
  makeResizable(); // Re-apply resizable functionality after table population
437
  }
438
+
439
+ // Close modal on overlay click
440
  overlay.addEventListener('click', () => {
441
  overlay.style.display = 'none';
442
  modal.style.display = 'none';
443
  });
444
 
445
+ // Filter change listener
446
  metricFilter.addEventListener('change', () => {
447
  const filterValue = metricFilter.value;
448
  populateTable(parsedCSV.headers, parsedCSV.rows, filterValue, 0); // Re-populate table with new filter
449
  });
450
 
451
+
452
+ loadCSVFromHuggingFace('UlrickBL/benchmark_overview', 'benchmark_overview.csv', window.huggingface.variables.HF_TOKEN)
453
+ .then(({ headers, rows }) => {
454
+ parsedCSV = { headers, rows };
455
+ populateFilterOptions(rows, 0);
456
+ populateTable(headers, rows, '', 0);
457
+ })
458
+ .catch(error => {
459
+ console.error("Failed to load CSV data:", error);
460
+ // Display a user-friendly message if data loading fails
461
+ tableBody.innerHTML = '<tr><td colspan="7" style="text-align: center; color: #ff6b6b;">Failed to load data. Please check the dataset link or your internet connection.</td></tr>';
462
+ });
463
  </script>
464
  </body>
465
  </html>