acecalisto3 commited on
Commit
2a41399
·
verified ·
1 Parent(s): 8d920b7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1400 -0
app.py CHANGED
@@ -84,6 +84,1406 @@ The dataset was generated using the [Infinite Dataset Hub]({url}) and {model_id}
84
  - **More Datasets**: https://huggingface.co/datasets?other=infinite-dataset-hub
85
  """
86
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
87
  # --- Gradio CSS ---
88
  css = """
89
  a { color: var(--body-text-color); }
 
84
  - **More Datasets**: https://huggingface.co/datasets?other=infinite-dataset-hub
85
  """
86
 
87
+ # --- Gradio HTML ---
88
+ html = """
89
+
90
+ <!DOCTYPE html>
91
+ <html lang="en">
92
+ <head>
93
+ <meta charset="UTF-8">
94
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
95
+ <title>Infinite Dataset Hub</title>
96
+ <script src="https://cdn.tailwindcss.com"></script>
97
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
98
+ <script src="https://cdn.jsdelivr.net/npm/[email protected]/papaparse.min.js"></script>
99
+ <script>
100
+ tailwind.config = {
101
+ darkMode: 'class',
102
+ theme: {
103
+ extend: {
104
+ colors: {
105
+ primary: '#5D5CDE',
106
+ },
107
+ }
108
+ }
109
+ }
110
+ </script>
111
+ <style>
112
+ .shimmer {
113
+ background: linear-gradient(90deg, #f0f0f0 25%, #e0e0e0 50%, #f0f0f0 75%);
114
+ background-size: 200% 100%;
115
+ animation: shimmer 1.5s infinite;
116
+ border-radius: 4px;
117
+ }
118
+
119
+ @keyframes shimmer {
120
+ 0% {
121
+ background-position: -200% 0;
122
+ }
123
+ 100% {
124
+ background-position: 200% 0;
125
+ }
126
+ }
127
+
128
+ /* Dark mode overrides */
129
+ .dark .shimmer {
130
+ background: linear-gradient(90deg, #2a2a2a 25%, #3a3a3a 50%, #2a2a2a 75%);
131
+ background-size: 200% 100%;
132
+ }
133
+
134
+ .dataset-card {
135
+ transition: transform 0.2s, box-shadow 0.2s;
136
+ }
137
+
138
+ .dataset-card:hover {
139
+ transform: translateY(-2px);
140
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.1), 0 4px 6px -2px rgba(0, 0, 0, 0.05);
141
+ }
142
+
143
+ .dark .dataset-card:hover {
144
+ box-shadow: 0 10px 15px -3px rgba(0, 0, 0, 0.3), 0 4px 6px -2px rgba(0, 0, 0, 0.2);
145
+ }
146
+
147
+ /* Table styling */
148
+ table {
149
+ width: 100%;
150
+ border-collapse: collapse;
151
+ margin: 1rem 0;
152
+ }
153
+
154
+ table thead th {
155
+ background-color: #f3f4f6;
156
+ padding: 0.75rem;
157
+ text-align: left;
158
+ font-weight: 600;
159
+ }
160
+
161
+ .dark table thead th {
162
+ background-color: #374151;
163
+ }
164
+
165
+ table tbody td {
166
+ padding: 0.75rem;
167
+ border-top: 1px solid #e5e7eb;
168
+ }
169
+
170
+ .dark table tbody td {
171
+ border-top: 1px solid #4b5563;
172
+ }
173
+
174
+ table tbody tr:nth-child(even) {
175
+ background-color: #f9fafb;
176
+ }
177
+
178
+ .dark table tbody tr:nth-child(even) {
179
+ background-color: #1f2937;
180
+ }
181
+
182
+ /* Search engine badge */
183
+ .engine-badge {
184
+ position: absolute;
185
+ top: -8px;
186
+ right: -8px;
187
+ font-size: 0.7rem;
188
+ padding: 2px 6px;
189
+ border-radius: 9999px;
190
+ background-color: #5D5CDE;
191
+ color: white;
192
+ box-shadow: 0 2px 4px rgba(0,0,0,0.1);
193
+ }
194
+
195
+ .dark .engine-badge {
196
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
197
+ }
198
+
199
+ /* Toggle switch */
200
+ .toggle-switch {
201
+ position: relative;
202
+ display: inline-block;
203
+ width: 50px;
204
+ height: 24px;
205
+ }
206
+
207
+ .toggle-switch input {
208
+ opacity: 0;
209
+ width: 0;
210
+ height: 0;
211
+ }
212
+
213
+ .toggle-slider {
214
+ position: absolute;
215
+ cursor: pointer;
216
+ top: 0;
217
+ left: 0;
218
+ right: 0;
219
+ bottom: 0;
220
+ background-color: #ccc;
221
+ transition: .4s;
222
+ border-radius: 24px;
223
+ }
224
+
225
+ .toggle-slider:before {
226
+ position: absolute;
227
+ content: "";
228
+ height: 16px;
229
+ width: 16px;
230
+ left: 4px;
231
+ bottom: 4px;
232
+ background-color: white;
233
+ transition: .4s;
234
+ border-radius: 50%;
235
+ }
236
+
237
+ input:checked + .toggle-slider {
238
+ background-color: #5D5CDE;
239
+ }
240
+
241
+ input:checked + .toggle-slider:before {
242
+ transform: translateX(26px);
243
+ }
244
+ </style>
245
+ </head>
246
+ <body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
247
+ <!-- Dark mode detection -->
248
+ <script>
249
+ if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
250
+ document.documentElement.classList.add('dark');
251
+ }
252
+ window.matchMedia('(prefers-color-scheme: dark)').addEventListener('change', event => {
253
+ if (event.matches) {
254
+ document.documentElement.classList.add('dark');
255
+ } else {
256
+ document.documentElement.classList.remove('dark');
257
+ }
258
+ });
259
+ </script>
260
+
261
+ <div class="container mx-auto px-4 py-8">
262
+ <!-- Header -->
263
+ <header class="text-center mb-8">
264
+ <h1 class="text-3xl font-bold mb-2">🤗 Infinite Dataset Hub ♾️</h1>
265
+ <p class="text-lg text-gray-600 dark:text-gray-400">Generate datasets from AI and real-world data sources</p>
266
+ </header>
267
+
268
+ <!-- Main Content -->
269
+ <main>
270
+ <!-- Search Section -->
271
+ <div id="search-page" class="mb-8">
272
+ <div class="max-w-3xl mx-auto">
273
+ <div class="mb-4">
274
+ <div class="flex mb-2">
275
+ <input id="search-input" type="text" placeholder="Search datasets, get infinite results"
276
+ class="flex-grow px-4 py-3 text-base rounded-l-lg border border-gray-300 dark:border-gray-700 focus:outline-none focus:ring-2 focus:ring-primary dark:bg-gray-800">
277
+ <button id="search-button" class="bg-primary text-white px-6 py-3 rounded-r-lg hover:bg-opacity-90 transition">
278
+ 🔍
279
+ </button>
280
+ </div>
281
+
282
+ <div class="flex items-center justify-between p-3 bg-gray-100 dark:bg-gray-800 rounded-lg">
283
+ <div class="flex items-center">
284
+ <label class="toggle-switch mr-3">
285
+ <input type="checkbox" id="data-source-toggle" checked>
286
+ <span class="toggle-slider"></span>
287
+ </label>
288
+ <div>
289
+ <span id="data-source-text" class="font-medium">Using: Real + AI Data</span>
290
+ <p class="text-xs text-gray-500 dark:text-gray-400">Toggle to switch between data sources</p>
291
+ </div>
292
+ </div>
293
+
294
+ <button id="engine-settings-button" class="text-primary hover:underline flex items-center">
295
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
296
+ <path fill-rule="evenodd" d="M11.49 3.17c-.38-1.56-2.6-1.56-2.98 0a1.532 1.532 0 01-2.286.948c-1.372-.836-2.942.734-2.106 2.106.54.886.061 2.042-.947 2.287-1.561.379-1.561 2.6 0 2.978a1.532 1.532 0 01.947 2.287c-.836 1.372.734 2.942 2.106 2.106a1.532 1.532 0 012.287.947c.379 1.561 2.6 1.561 2.978 0a1.533 1.533 0 012.287-.947c1.372.836 2.942-.734 2.106-2.106a1.533 1.533 0 01.947-2.287c1.561-.379 1.561-2.6 0-2.978a1.532 1.532 0 01-.947-2.287c.836-1.372-.734-2.942-2.106-2.106a1.532 1.532 0 01-2.287-.947zM10 13a3 3 0 100-6 3 3 0 000 6z" clip-rule="evenodd" />
297
+ </svg>
298
+ Search Engines
299
+ </button>
300
+ </div>
301
+ </div>
302
+
303
+ <!-- Search Engine Selection Modal -->
304
+ <div id="engine-modal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden">
305
+ <div class="bg-white dark:bg-gray-800 rounded-lg p-6 max-w-lg w-full max-h-[80vh] overflow-y-auto">
306
+ <div class="flex justify-between items-center mb-4">
307
+ <h3 class="text-xl font-bold">Search Engine Settings</h3>
308
+ <button id="close-modal-button" class="text-gray-500 hover:text-gray-700 dark:text-gray-400 dark:hover:text-gray-200">
309
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-6 w-6" fill="none" viewBox="0 0 24 24" stroke="currentColor">
310
+ <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M6 18L18 6M6 6l12 12" />
311
+ </svg>
312
+ </button>
313
+ </div>
314
+
315
+ <p class="mb-4 text-sm text-gray-600 dark:text-gray-400">
316
+ Select which search engines to use for real data retrieval. A diverse selection improves results.
317
+ </p>
318
+
319
+ <div id="engine-options" class="space-y-2 mb-6">
320
+ <!-- Engine options will be dynamically inserted here -->
321
+ </div>
322
+
323
+ <div class="flex justify-between">
324
+ <button id="select-all-engines" class="text-primary hover:underline">Select All</button>
325
+ <button id="deselect-all-engines" class="text-primary hover:underline">Deselect All</button>
326
+ </div>
327
+
328
+ <div class="mt-6 flex justify-end">
329
+ <button id="save-engines-button" class="bg-primary text-white px-4 py-2 rounded hover:bg-opacity-90 transition">
330
+ Save Settings
331
+ </button>
332
+ </div>
333
+ </div>
334
+ </div>
335
+
336
+ <div id="dataset-results" class="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
337
+ <!-- Dataset cards will be dynamically inserted here -->
338
+ </div>
339
+
340
+ <div id="load-more-container" class="text-center mt-6 hidden">
341
+ <button id="load-more-button" class="bg-gray-200 dark:bg-gray-700 px-6 py-3 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition">
342
+ Load more datasets
343
+ </button>
344
+ </div>
345
+ </div>
346
+ </div>
347
+
348
+ <!-- Dataset Detail Page -->
349
+ <div id="dataset-page" class="hidden max-w-4xl mx-auto">
350
+ <button id="back-button" class="flex items-center text-primary mb-4 hover:underline">
351
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
352
+ <path fill-rule="evenodd" d="M9.707 14.707a1 1 0 01-1.414 0l-4-4a1 1 0 010-1.414l4-4a1 1 0 011.414 1.414L7.414 9H15a1 1 0 110 2H7.414l2.293 2.293a1 1 0 010 1.414z" clip-rule="evenodd" />
353
+ </svg>
354
+ Back to Search
355
+ </button>
356
+
357
+ <div id="dataset-header" class="mb-4">
358
+ <div class="flex items-center justify-between">
359
+ <h2 id="dataset-title" class="text-2xl font-bold"></h2>
360
+ <span id="data-source-badge" class="px-3 py-1 rounded-full text-xs font-medium bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200">
361
+ Real Data
362
+ </span>
363
+ </div>
364
+ <div id="dataset-tags" class="text-sm text-gray-600 dark:text-gray-400 mt-1"></div>
365
+ </div>
366
+
367
+ <div id="data-source-info" class="bg-blue-50 dark:bg-blue-900 p-4 rounded-lg mb-6 text-blue-800 dark:text-blue-200">
368
+ <h3 class="font-semibold mb-1 flex items-center">
369
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
370
+ <path fill-rule="evenodd" d="M18 10a8 8 0 11-16 0 8 8 0 0116 0zm-7-4a1 1 0 11-2 0 1 1 0 012 0zM9 9a1 1 0 000 2v3a1 1 0 001 1h1a1 1 0 100-2v-3a1 1 0 00-1-1H9z" clip-rule="evenodd" />
371
+ </svg>
372
+ Data Source Information
373
+ </h3>
374
+ <p id="source-details" class="text-sm"></p>
375
+ </div>
376
+
377
+ <div id="dataset-description" class="prose dark:prose-invert prose-sm sm:prose max-w-none mb-6"></div>
378
+
379
+ <div id="dataset-preview" class="mb-6 overflow-x-auto">
380
+ <h3 class="text-xl font-semibold mb-3">Dataset Preview</h3>
381
+ <div id="preview-table" class="border dark:border-gray-700 rounded-lg overflow-hidden"></div>
382
+ </div>
383
+
384
+ <div id="generate-actions" class="mb-8">
385
+ <button id="generate-full-button" class="bg-primary text-white px-6 py-3 rounded-lg hover:bg-opacity-90 transition mr-3">
386
+ Generate Full Dataset
387
+ </button>
388
+ <div id="generate-status" class="hidden mt-4">
389
+ <div class="flex items-center">
390
+ <div class="animate-spin rounded-full h-5 w-5 border-b-2 border-primary mr-3"></div>
391
+ <span>Generating dataset... <span id="rows-count">0</span> rows created</span>
392
+ </div>
393
+ <div class="w-full bg-gray-200 dark:bg-gray-700 rounded-full h-2.5 mt-2">
394
+ <div id="progress-bar" class="bg-primary h-2.5 rounded-full" style="width: 0%"></div>
395
+ </div>
396
+ </div>
397
+ </div>
398
+
399
+ <div id="full-dataset" class="hidden mb-6">
400
+ <h3 class="text-xl font-semibold mb-3">Full Dataset</h3>
401
+ <div id="full-table" class="border dark:border-gray-700 rounded-lg overflow-hidden"></div>
402
+ <div class="mt-4 flex flex-wrap gap-3">
403
+ <button id="download-csv-button" class="bg-green-600 hover:bg-green-700 text-white px-4 py-2 rounded-lg transition flex items-center">
404
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
405
+ <path fill-rule="evenodd" d="M3 17a1 1 0 011-1h12a1 1 0 110 2H4a1 1 0 01-1-1zm3.293-7.707a1 1 0 011.414 0L9 10.586V3a1 1 0 112 0v7.586l1.293-1.293a1 1 0 111.414 1.414l-3 3a1 1 0 01-1.414 0l-3-3a1 1 0 010-1.414z" clip-rule="evenodd" />
406
+ </svg>
407
+ Download CSV
408
+ </button>
409
+ <button id="download-json-button" class="bg-yellow-600 hover:bg-yellow-700 text-white px-4 py-2 rounded-lg transition flex items-center">
410
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
411
+ <path fill-rule="evenodd" d="M3 17a1 1 0 011-1h12a1 1 0 110 2H4a1 1 0 01-1-1zm3.293-7.707a1 1 0 011.414 0L9 10.586V3a1 1 0 112 0v7.586l1.293-1.293a1 1 0 111.414 1.414l-3 3a1 1 0 01-1.414 0l-3-3a1 1 0 010-1.414z" clip-rule="evenodd" />
412
+ </svg>
413
+ Download JSON
414
+ </button>
415
+ <button id="download-parquet-button" class="bg-blue-600 hover:bg-blue-700 text-white px-4 py-2 rounded-lg transition flex items-center">
416
+ <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-2" viewBox="0 0 20 20" fill="currentColor">
417
+ <path fill-rule="evenodd" d="M3 17a1 1 0 011-1h12a1 1 0 110 2H4a1 1 0 01-1-1zm3.293-7.707a1 1 0 011.414 0L9 10.586V3a1 1 0 112 0v7.586l1.293-1.293a1 1 0 111.414 1.414l-3 3a1 1 0 01-1.414 0l-3-3a1 1 0 010-1.414z" clip-rule="evenodd" />
418
+ </svg>
419
+ Download Parquet
420
+ </button>
421
+ </div>
422
+ </div>
423
+ </div>
424
+ </main>
425
+
426
+ <!-- Footer -->
427
+ <footer class="mt-12 text-center text-sm text-gray-600 dark:text-gray-400">
428
+ <p>Powered by Claude-3.7-Sonnet • Datasets generated from real sources and AI</p>
429
+ </footer>
430
+ </div>
431
+
432
+ <script>
433
+ // Constants and global state
434
+ const MAX_DATASETS_PER_PAGE = 10;
435
+ const MAX_FULL_DATASET_ROWS = 100;
436
+
437
+ // List of search engines
438
+ const searchEngines = [
439
+ "AlltheInternet.com", "DuckDuckGo.com", "Google.com", "Bing.com", "Search.Yahoo.com",
440
+ "Startpage.com", "Qwant.com", "Ecosia.org", "WolframAlpha.com", "Mojeek.co.uk",
441
+ "Search.Brave.com", "Yandex.com", "Baidu.com", "Gibiru.com", "MetaGer.org",
442
+ "Swisscows.com", "Presearch.com", "Ekoru.org", "Search.Lilo.org"
443
+ ];
444
+
445
+ let currentDatasets = [];
446
+ let currentPage = 1;
447
+ let currentSearchQuery = '';
448
+ let currentDataset = null;
449
+ let fullDatasetRows = [];
450
+ let useRealData = true;
451
+ let selectedEngines = ["DuckDuckGo.com", "Bing.com", "Search.Yahoo.com", "Search.Brave.com", "Ecosia.org"];
452
+ let currentEngine = ""; // Store the engine currently being used
453
+
454
+ // DOM Elements
455
+ const searchInput = document.getElementById('search-input');
456
+ const searchButton = document.getElementById('search-button');
457
+ const resultsContainer = document.getElementById('dataset-results');
458
+ const loadMoreContainer = document.getElementById('load-more-container');
459
+ const loadMoreButton = document.getElementById('load-more-button');
460
+ const searchPage = document.getElementById('search-page');
461
+ const datasetPage = document.getElementById('dataset-page');
462
+ const backButton = document.getElementById('back-button');
463
+ const datasetTitle = document.getElementById('dataset-title');
464
+ const datasetTags = document.getElementById('dataset-tags');
465
+ const datasetDescription = document.getElementById('dataset-description');
466
+ const previewTable = document.getElementById('preview-table');
467
+ const generateFullButton = document.getElementById('generate-full-button');
468
+ const generateStatus = document.getElementById('generate-status');
469
+ const rowsCount = document.getElementById('rows-count');
470
+ const progressBar = document.getElementById('progress-bar');
471
+ const fullDatasetSection = document.getElementById('full-dataset');
472
+ const fullTable = document.getElementById('full-table');
473
+ const downloadCsvButton = document.getElementById('download-csv-button');
474
+ const downloadJsonButton = document.getElementById('download-json-button');
475
+ const downloadParquetButton = document.getElementById('download-parquet-button');
476
+ const dataSourceToggle = document.getElementById('data-source-toggle');
477
+ const dataSourceText = document.getElementById('data-source-text');
478
+ const dataSourceBadge = document.getElementById('data-source-badge');
479
+ const sourceDetails = document.getElementById('source-details');
480
+ const engineSettingsButton = document.getElementById('engine-settings-button');
481
+ const engineModal = document.getElementById('engine-modal');
482
+ const engineOptions = document.getElementById('engine-options');
483
+ const closeModalButton = document.getElementById('close-modal-button');
484
+ const saveEnginesButton = document.getElementById('save-engines-button');
485
+ const selectAllEngines = document.getElementById('select-all-engines');
486
+ const deselectAllEngines = document.getElementById('deselect-all-engines');
487
+
488
+ // Event Listeners
489
+ document.addEventListener('DOMContentLoaded', () => {
490
+ searchButton.addEventListener('click', performSearch);
491
+ searchInput.addEventListener('keypress', (e) => {
492
+ if (e.key === 'Enter') performSearch();
493
+ });
494
+ loadMoreButton.addEventListener('click', loadMoreDatasets);
495
+ backButton.addEventListener('click', showSearchPage);
496
+ generateFullButton.addEventListener('click', generateFullDataset);
497
+ downloadCsvButton.addEventListener('click', () => downloadData('csv'));
498
+ downloadJsonButton.addEventListener('click', () => downloadData('json'));
499
+ downloadParquetButton.addEventListener('click', () => downloadData('parquet'));
500
+
501
+ dataSourceToggle.addEventListener('change', toggleDataSource);
502
+ engineSettingsButton.addEventListener('click', showEngineModal);
503
+ closeModalButton.addEventListener('click', hideEngineModal);
504
+ saveEnginesButton.addEventListener('click', saveEngineSettings);
505
+ selectAllEngines.addEventListener('click', () => toggleAllEngines(true));
506
+ deselectAllEngines.addEventListener('click', () => toggleAllEngines(false));
507
+
508
+ // Initialize engine options
509
+ populateEngineOptions();
510
+
511
+ // Show initial placeholder datasets
512
+ showPlaceholderDatasets();
513
+ });
514
+
515
+ // Search Engine Settings
516
+ function populateEngineOptions() {
517
+ engineOptions.innerHTML = '';
518
+
519
+ searchEngines.forEach(engine => {
520
+ const isChecked = selectedEngines.includes(engine);
521
+
522
+ const optionDiv = document.createElement('div');
523
+ optionDiv.className = 'flex items-center';
524
+
525
+ optionDiv.innerHTML = `
526
+ <input type="checkbox" id="engine-${engine}" class="engine-checkbox mr-2 h-4 w-4"
527
+ value="${engine}" ${isChecked ? 'checked' : ''}>
528
+ <label for="engine-${engine}" class="cursor-pointer">${engine}</label>
529
+ `;
530
+
531
+ engineOptions.appendChild(optionDiv);
532
+ });
533
+ }
534
+
535
+ function showEngineModal() {
536
+ engineModal.classList.remove('hidden');
537
+ }
538
+
539
+ function hideEngineModal() {
540
+ engineModal.classList.add('hidden');
541
+ }
542
+
543
+ function saveEngineSettings() {
544
+ const checkboxes = document.querySelectorAll('.engine-checkbox:checked');
545
+ selectedEngines = Array.from(checkboxes).map(cb => cb.value);
546
+
547
+ if (selectedEngines.length === 0) {
548
+ // Ensure at least one engine is selected
549
+ selectedEngines = ["DuckDuckGo.com"];
550
+ document.getElementById(`engine-DuckDuckGo.com`).checked = true;
551
+ showNotification("At least one search engine must be selected. Using DuckDuckGo as default.");
552
+ }
553
+
554
+ hideEngineModal();
555
+ showNotification(`Updated search engine settings. Using ${selectedEngines.length} engines.`);
556
+ }
557
+
558
+ function toggleAllEngines(select) {
559
+ const checkboxes = document.querySelectorAll('.engine-checkbox');
560
+ checkboxes.forEach(cb => {
561
+ cb.checked = select;
562
+ });
563
+ }
564
+
565
+ // Toggle data source between real and AI
566
+ function toggleDataSource() {
567
+ useRealData = dataSourceToggle.checked;
568
+ dataSourceText.textContent = useRealData ? "Using: Real + AI Data" : "Using: AI Data Only";
569
+
570
+ // Show or hide engine settings button
571
+ engineSettingsButton.style.display = useRealData ? "flex" : "none";
572
+
573
+ showNotification(`Switched to ${useRealData ? "combined real and synthetic" : "synthetic-only"} data mode`);
574
+ }
575
+
576
+ // Search functionality
577
+ function performSearch() {
578
+ const query = searchInput.value.trim();
579
+ if (!query) return;
580
+
581
+ currentSearchQuery = query;
582
+ currentPage = 1;
583
+ currentDatasets = [];
584
+
585
+ resultsContainer.innerHTML = '';
586
+ showLoadingSkeletons();
587
+
588
+ if (useRealData) {
589
+ // Use real data from search engines + AI
590
+ searchWithRealData(query);
591
+ } else {
592
+ // Use only AI-generated data
593
+ searchWithAIData(query);
594
+ }
595
+ }
596
+
597
+ function searchWithRealData(query) {
598
+ // Randomly select a search engine from the user's selected engines
599
+ currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
600
+
601
+ // Register handler for dataset names based on real search results
602
+ window.Poe.registerHandler("real-search-handler", (result) => {
603
+ if (result.status === "error") {
604
+ showError("Error querying search engines");
605
+ return;
606
+ }
607
+
608
+ const message = result.responses[0];
609
+
610
+ if (message.status === "complete") {
611
+ // Parse the dataset names and tags from the response
612
+ const datasets = parseDatasetResults(message.content);
613
+ datasets.forEach(dataset => {
614
+ dataset.isReal = true;
615
+ dataset.engine = currentEngine;
616
+ });
617
+
618
+ currentDatasets = datasets;
619
+
620
+ // Display the datasets
621
+ resultsContainer.innerHTML = '';
622
+ displayDatasets(datasets);
623
+
624
+ // Show load more button if we have results
625
+ if (datasets.length > 0) {
626
+ loadMoreContainer.classList.remove('hidden');
627
+ }
628
+ }
629
+ });
630
+
631
+ try {
632
+ window.Poe.sendUserMessage(
633
+ `@Claude-3.7-Sonnet You are a data specialist who can transform real search results into structured datasets.
634
+
635
+ A user is searching for data about: "${query}"
636
+
637
+ Imagine you've queried ${currentEngine} and received real search results. Create a list of 10 specific datasets that could be created from these search results.
638
+
639
+ For each dataset:
640
+ 1. Give it a clear, specific name related to the search topic
641
+ 2. Include 3-5 relevant tags in parentheses, with one tag specifying the ML task type (classification, regression, clustering, etc.)
642
+
643
+ Format each dataset as:
644
+ 1. DatasetName (tag1, tag2, ml_task_tag)
645
+
646
+ Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${query}".`,
647
+ {
648
+ handler: "real-search-handler",
649
+ stream: false,
650
+ openChat: false
651
+ }
652
+ );
653
+ } catch (err) {
654
+ showError("Error sending message: " + err);
655
+ // Fall back to AI data
656
+ searchWithAIData(query);
657
+ }
658
+ }
659
+
660
+ function searchWithAIData(query) {
661
+ // Register handler for AI-generated dataset names
662
+ window.Poe.registerHandler("dataset-search-handler", (result) => {
663
+ if (result.status === "error") {
664
+ showError("Error generating datasets");
665
+ return;
666
+ }
667
+
668
+ const message = result.responses[0];
669
+
670
+ if (message.status === "complete") {
671
+ // Parse the dataset names and tags from the response
672
+ const datasets = parseDatasetResults(message.content);
673
+ datasets.forEach(dataset => {
674
+ dataset.isReal = false;
675
+ });
676
+
677
+ currentDatasets = datasets;
678
+
679
+ // Display the datasets
680
+ resultsContainer.innerHTML = '';
681
+ displayDatasets(datasets);
682
+
683
+ // Show load more button if we have results
684
+ if (datasets.length > 0) {
685
+ loadMoreContainer.classList.remove('hidden');
686
+ }
687
+ }
688
+ });
689
+
690
+ try {
691
+ window.Poe.sendUserMessage(
692
+ `@Claude-3.7-Sonnet A Machine Learning Practioner is looking for a dataset that matches '${query}'.
693
+ Generate a list of ${MAX_DATASETS_PER_PAGE} names of quality datasets that don't exist but sound plausible and would
694
+ be helpful. Feel free to reuse words from the query '${query}' to name the datasets.
695
+ Every dataset should be about '${query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:
696
+ 1. DatasetName1 (tag1, tag2, tag3)
697
+ 2. DatasetName2 (tag1, tag2, tag3)`,
698
+ {
699
+ handler: "dataset-search-handler",
700
+ stream: false,
701
+ openChat: false
702
+ }
703
+ );
704
+ } catch (err) {
705
+ showError("Error sending message: " + err);
706
+ }
707
+ }
708
+
709
+ function parseDatasetResults(content) {
710
+ const lines = content.split('\n');
711
+ const datasets = [];
712
+
713
+ lines.forEach(line => {
714
+ // Match lines that start with a number followed by a period
715
+ const match = line.match(/^\s*\d+\.\s+(.+?)\s+\((.+?)\)/);
716
+ if (match) {
717
+ const name = match[1].trim();
718
+ const tags = match[2].split(',').map(tag => tag.trim());
719
+ datasets.push({ name, tags });
720
+ }
721
+ });
722
+
723
+ return datasets;
724
+ }
725
+
726
+ function displayDatasets(datasets) {
727
+ datasets.forEach(dataset => {
728
+ const card = document.createElement('div');
729
+ card.className = 'dataset-card bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700 cursor-pointer relative';
730
+
731
+ const tagsHtml = dataset.tags.map(tag =>
732
+ `<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
733
+ ).join('');
734
+
735
+ // Add a badge for real data
736
+ let badgeHtml = '';
737
+ if (dataset.isReal) {
738
+ badgeHtml = `<span class="engine-badge" title="Data from ${dataset.engine}">${dataset.engine.split('.')[0]}</span>`;
739
+ }
740
+
741
+ card.innerHTML = `
742
+ ${badgeHtml}
743
+ <h3 class="text-lg font-semibold mb-2">${dataset.name}</h3>
744
+ <div class="flex flex-wrap mt-2">${tagsHtml}</div>
745
+ `;
746
+
747
+ card.addEventListener('click', () => showDatasetDetails(dataset));
748
+ resultsContainer.appendChild(card);
749
+ });
750
+ }
751
+
752
+ function showLoadingSkeletons() {
753
+ for (let i = 0; i < 4; i++) {
754
+ const skeleton = document.createElement('div');
755
+ skeleton.className = 'bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700';
756
+ skeleton.innerHTML = `
757
+ <div class="shimmer h-6 w-3/4 mb-2"></div>
758
+ <div class="flex flex-wrap mt-2">
759
+ <div class="shimmer h-6 w-16 rounded mr-1 mb-1"></div>
760
+ <div class="shimmer h-6 w-20 rounded mr-1 mb-1"></div>
761
+ <div class="shimmer h-6 w-24 rounded mr-1 mb-1"></div>
762
+ </div>
763
+ `;
764
+ resultsContainer.appendChild(skeleton);
765
+ }
766
+ }
767
+
768
+ function loadMoreDatasets() {
769
+ currentPage++;
770
+
771
+ // Use the same data source (real or AI) as the initial search
772
+ if (useRealData) {
773
+ loadMoreRealDatasets();
774
+ } else {
775
+ loadMoreAIDatasets();
776
+ }
777
+ }
778
+
779
+ function loadMoreRealDatasets() {
780
+ // Rotate to a different search engine for variety
781
+ const previousEngine = currentEngine;
782
+ while (currentEngine === previousEngine && selectedEngines.length > 1) {
783
+ currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
784
+ }
785
+
786
+ // Register handler for more datasets
787
+ window.Poe.registerHandler("more-real-datasets-handler", (result) => {
788
+ if (result.status === "error") {
789
+ showError("Error generating more datasets");
790
+ return;
791
+ }
792
+
793
+ const message = result.responses[0];
794
+
795
+ if (message.status === "complete") {
796
+ // Parse the dataset names and tags from the response
797
+ const datasets = parseDatasetResults(message.content);
798
+ datasets.forEach(dataset => {
799
+ dataset.isReal = true;
800
+ dataset.engine = currentEngine;
801
+ });
802
+
803
+ currentDatasets = [...currentDatasets, ...datasets];
804
+
805
+ // Display the datasets
806
+ displayDatasets(datasets);
807
+ }
808
+ });
809
+
810
+ try {
811
+ window.Poe.sendUserMessage(
812
+ `@Claude-3.7-Sonnet You're a data specialist who can transform real search results into structured datasets.
813
+
814
+ Continue our previous search for data about: "${currentSearchQuery}"
815
+
816
+ Now let's use a different search engine: ${currentEngine}
817
+
818
+ Create 10 more specific datasets that could be created from these search results. Make sure these are different from the previous datasets.
819
+
820
+ Use the same format:
821
+ 1. DatasetName (tag1, tag2, ml_task_tag)
822
+
823
+ Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${currentSearchQuery}".`,
824
+ {
825
+ handler: "more-real-datasets-handler",
826
+ stream: false,
827
+ openChat: false
828
+ }
829
+ );
830
+ } catch (err) {
831
+ showError("Error sending message: " + err);
832
+ // Fall back to AI data
833
+ loadMoreAIDatasets();
834
+ }
835
+ }
836
+
837
+ function loadMoreAIDatasets() {
838
+ // Register handler for more AI datasets
839
+ window.Poe.registerHandler("more-datasets-handler", (result) => {
840
+ if (result.status === "error") {
841
+ showError("Error generating more datasets");
842
+ return;
843
+ }
844
+
845
+ const message = result.responses[0];
846
+
847
+ if (message.status === "complete") {
848
+ // Parse the dataset names and tags from the response
849
+ const datasets = parseDatasetResults(message.content);
850
+ datasets.forEach(dataset => {
851
+ dataset.isReal = false;
852
+ });
853
+
854
+ currentDatasets = [...currentDatasets, ...datasets];
855
+
856
+ // Display the datasets
857
+ displayDatasets(datasets);
858
+ }
859
+ });
860
+
861
+ try {
862
+ window.Poe.sendUserMessage(
863
+ `@Claude-3.7-Sonnet Please generate ${MAX_DATASETS_PER_PAGE} more dataset names about '${currentSearchQuery}'. Use the same format as before:
864
+ 1. DatasetName1 (tag1, tag2, tag3)
865
+ Make sure these are completely different from previous suggestions.`,
866
+ {
867
+ handler: "more-datasets-handler",
868
+ stream: false,
869
+ openChat: false
870
+ }
871
+ );
872
+ } catch (err) {
873
+ showError("Error sending message: " + err);
874
+ }
875
+ }
876
+
877
+ function showDatasetDetails(dataset) {
878
+ currentDataset = dataset;
879
+ searchPage.classList.add('hidden');
880
+ datasetPage.classList.remove('hidden');
881
+
882
+ // Update UI with dataset info
883
+ datasetTitle.textContent = dataset.name;
884
+ datasetTags.innerHTML = dataset.tags.map(tag =>
885
+ `<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
886
+ ).join('');
887
+
888
+ // Update source badge
889
+ if (dataset.isReal) {
890
+ dataSourceBadge.textContent = "Real Data";
891
+ dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200";
892
+ sourceDetails.innerHTML = `This dataset is based on real information queried from <strong>${dataset.engine}</strong> for the search term "<strong>${currentSearchQuery}</strong>". The data has been structured for machine learning use.`;
893
+ } else {
894
+ dataSourceBadge.textContent = "AI-Generated";
895
+ dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-purple-100 text-purple-800 dark:bg-purple-900 dark:text-purple-200";
896
+ sourceDetails.innerHTML = `This is an AI-generated dataset created using Claude-3.7-Sonnet. The content is synthetic and designed to represent plausible data related to "${currentSearchQuery}".`;
897
+ }
898
+
899
+ // Clear previous content
900
+ datasetDescription.innerHTML = '<div class="shimmer h-4 w-full mb-2"></div>'.repeat(3);
901
+ previewTable.innerHTML = '';
902
+ fullDatasetSection.classList.add('hidden');
903
+ generateStatus.classList.add('hidden');
904
+ generateFullButton.disabled = false;
905
+
906
+ // Reset full dataset
907
+ fullDatasetRows = [];
908
+
909
+ // Generate dataset preview - different approach for real vs AI data
910
+ if (dataset.isReal) {
911
+ generateRealDatasetPreview(dataset);
912
+ } else {
913
+ generateAIDatasetPreview(dataset);
914
+ }
915
+
916
+ // Scroll to top
917
+ window.scrollTo(0, 0);
918
+ }
919
+
920
+ function generateRealDatasetPreview(dataset) {
921
+ window.Poe.registerHandler("real-preview-handler", (result) => {
922
+ if (result.status === "error") {
923
+ datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
924
+ return;
925
+ }
926
+
927
+ const message = result.responses[0];
928
+
929
+ if (message.status === "complete") {
930
+ const content = message.content;
931
+
932
+ // Extract description and CSV
933
+ const parts = content.split('**CSV Content Preview:**');
934
+ let description = "";
935
+ let csvContent = "";
936
+
937
+ if (parts.length > 1) {
938
+ description = parts[0].replace('**Dataset Description:**', '').trim();
939
+ csvContent = parts[1].trim();
940
+
941
+ // Clean up CSV content (remove markdown code block markers)
942
+ csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
943
+ } else {
944
+ description = "No description available";
945
+ csvContent = content;
946
+ }
947
+
948
+ // Display description
949
+ datasetDescription.innerHTML = marked.parse(description);
950
+
951
+ // Parse and display CSV preview
952
+ try {
953
+ const results = Papa.parse(csvContent, {
954
+ header: true,
955
+ skipEmptyLines: true
956
+ });
957
+
958
+ if (results.data && results.data.length > 0) {
959
+ // Create table from CSV data
960
+ createTable(previewTable, results.data, results.meta.fields);
961
+ } else {
962
+ previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
963
+ }
964
+ } catch (err) {
965
+ previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
966
+ }
967
+ }
968
+ });
969
+
970
+ try {
971
+ const tagsStr = dataset.tags.join(', ');
972
+ window.Poe.sendUserMessage(
973
+ `@Claude-3.7-Sonnet You're a specialist in converting web search results into structured data.
974
+
975
+ Based on search results from ${dataset.engine} about "${currentSearchQuery}",
976
+ create a preview of the dataset "${dataset.name}" with tags "${tagsStr}".
977
+
978
+ First, write a detailed description of what this dataset contains, its structure, and how it was constructed from web search results.
979
+
980
+ Then, generate a realistic 5-row CSV preview that resembles data you might get if you scraped and structured real results from ${dataset.engine}.
981
+
982
+ Format your response with:
983
+ **Dataset Description:** [detailed description]
984
+
985
+ **CSV Content Preview:**
986
+ \`\`\`csv
987
+ [CSV header and 5 rows of realistic data]
988
+ \`\`\`
989
+
990
+ Include relevant columns for the dataset type, with proper labels/categories where appropriate. The data should look like it came from real sources.`,
991
+ {
992
+ handler: "real-preview-handler",
993
+ stream: false,
994
+ openChat: false
995
+ }
996
+ );
997
+ } catch (err) {
998
+ datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
999
+ }
1000
+ }
1001
+
1002
+ function generateAIDatasetPreview(dataset) {
1003
+ window.Poe.registerHandler("dataset-preview-handler", (result) => {
1004
+ if (result.status === "error") {
1005
+ datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
1006
+ return;
1007
+ }
1008
+
1009
+ const message = result.responses[0];
1010
+
1011
+ if (message.status === "complete") {
1012
+ const content = message.content;
1013
+
1014
+ // Extract description and CSV
1015
+ const parts = content.split('**CSV Content Preview:**');
1016
+ let description = "";
1017
+ let csvContent = "";
1018
+
1019
+ if (parts.length > 1) {
1020
+ description = parts[0].replace('**Dataset Description:**', '').trim();
1021
+ csvContent = parts[1].trim();
1022
+
1023
+ // Clean up CSV content (remove markdown code block markers)
1024
+ csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
1025
+ } else {
1026
+ description = "No description available";
1027
+ csvContent = content;
1028
+ }
1029
+
1030
+ // Display description
1031
+ datasetDescription.innerHTML = marked.parse(description);
1032
+
1033
+ // Parse and display CSV preview
1034
+ try {
1035
+ const results = Papa.parse(csvContent, {
1036
+ header: true,
1037
+ skipEmptyLines: true
1038
+ });
1039
+
1040
+ if (results.data && results.data.length > 0) {
1041
+ // Create table from CSV data
1042
+ createTable(previewTable, results.data, results.meta.fields);
1043
+ } else {
1044
+ previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
1045
+ }
1046
+ } catch (err) {
1047
+ previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
1048
+ }
1049
+ }
1050
+ });
1051
+
1052
+ try {
1053
+ const tagsStr = dataset.tags.join(', ');
1054
+ window.Poe.sendUserMessage(
1055
+ `@Claude-3.7-Sonnet An ML practitioner is looking for a dataset CSV after the query '${currentSearchQuery}'.
1056
+ Generate the first 5 rows of a plausible and quality CSV for the dataset '${dataset.name}'.
1057
+ You can get inspiration from related keywords '${tagsStr}' but most importantly the dataset should correspond to the query '${currentSearchQuery}'.
1058
+ Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts).
1059
+ Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**`,
1060
+ {
1061
+ handler: "dataset-preview-handler",
1062
+ stream: false,
1063
+ openChat: false
1064
+ }
1065
+ );
1066
+ } catch (err) {
1067
+ datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
1068
+ }
1069
+ }
1070
+
1071
+ function createTable(container, data, headers) {
1072
+ container.innerHTML = '';
1073
+
1074
+ const table = document.createElement('table');
1075
+ table.className = 'w-full';
1076
+
1077
+ // Create header
1078
+ const thead = document.createElement('thead');
1079
+ const headerRow = document.createElement('tr');
1080
+
1081
+ headers.forEach(header => {
1082
+ const th = document.createElement('th');
1083
+ th.textContent = header;
1084
+ headerRow.appendChild(th);
1085
+ });
1086
+
1087
+ thead.appendChild(headerRow);
1088
+ table.appendChild(thead);
1089
+
1090
+ // Create body
1091
+ const tbody = document.createElement('tbody');
1092
+
1093
+ data.forEach(row => {
1094
+ const tr = document.createElement('tr');
1095
+
1096
+ headers.forEach(header => {
1097
+ const td = document.createElement('td');
1098
+ td.textContent = row[header] || '';
1099
+ tr.appendChild(td);
1100
+ });
1101
+
1102
+ tbody.appendChild(tr);
1103
+ });
1104
+
1105
+ table.appendChild(tbody);
1106
+ container.appendChild(table);
1107
+ }
1108
+
1109
+ function generateFullDataset() {
1110
+ // Disable button and show status
1111
+ generateFullButton.disabled = true;
1112
+ generateStatus.classList.remove('hidden');
1113
+ rowsCount.textContent = '0';
1114
+ progressBar.style.width = '0%';
1115
+
1116
+ // Set up variables for tracking generation
1117
+ let csvHeader = '';
1118
+ const targetRows = MAX_FULL_DATASET_ROWS;
1119
+ let currentRows = 0;
1120
+ fullDatasetRows = [];
1121
+
1122
+ // Get the CSV header from the preview table
1123
+ const previewHeaders = Array.from(previewTable.querySelectorAll('thead th')).map(th => th.textContent);
1124
+ csvHeader = previewHeaders.join(',');
1125
+
1126
+ // Add initial rows from preview
1127
+ const previewRows = Array.from(previewTable.querySelectorAll('tbody tr')).map(tr => {
1128
+ const row = {};
1129
+ Array.from(tr.querySelectorAll('td')).forEach((td, index) => {
1130
+ row[previewHeaders[index]] = td.textContent;
1131
+ });
1132
+ return row;
1133
+ });
1134
+
1135
+ fullDatasetRows = [...previewRows];
1136
+ currentRows = previewRows.length;
1137
+ updateGenerationProgress(currentRows, targetRows);
1138
+
1139
+ // Choose generation method based on dataset type
1140
+ if (currentDataset.isReal) {
1141
+ generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows);
1142
+ } else {
1143
+ generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows);
1144
+ }
1145
+ }
1146
+
1147
+ function generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows) {
1148
+ // Function to generate more rows in batches from "real" search results
1149
+ const generateBatch = (batchIndex) => {
1150
+ const batchSize = 15; // Larger batches for efficiency
1151
+ const startRow = currentRows + batchIndex * batchSize;
1152
+
1153
+ if (startRow >= targetRows) {
1154
+ // We've reached the target, show the full dataset
1155
+ showFullDataset();
1156
+ return;
1157
+ }
1158
+
1159
+ window.Poe.registerHandler(`real-batch-${batchIndex}-handler`, (result) => {
1160
+ if (result.status === "error") {
1161
+ showError("Error generating dataset rows");
1162
+ return;
1163
+ }
1164
+
1165
+ const message = result.responses[0];
1166
+
1167
+ if (message.status === "complete") {
1168
+ const content = message.content;
1169
+
1170
+ // Extract CSV content (remove markdown code block markers)
1171
+ let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
1172
+
1173
+ // If there are multiple code blocks, try to find one with CSV data
1174
+ if (csvContent.includes('```')) {
1175
+ const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
1176
+ if (codeBlocks.length > 0) {
1177
+ csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
1178
+ }
1179
+ }
1180
+
1181
+ try {
1182
+ // Parse the CSV
1183
+ const results = Papa.parse(csvContent, {
1184
+ header: true,
1185
+ skipEmptyLines: true
1186
+ });
1187
+
1188
+ if (results.data && results.data.length > 0) {
1189
+ // Add the new rows
1190
+ fullDatasetRows = [...fullDatasetRows, ...results.data];
1191
+ currentRows += results.data.length;
1192
+
1193
+ // Update progress
1194
+ updateGenerationProgress(currentRows, targetRows);
1195
+
1196
+ // Generate next batch
1197
+ generateBatch(batchIndex + 1);
1198
+ } else {
1199
+ // Try again with a different prompt
1200
+ generateBatch(batchIndex);
1201
+ }
1202
+ } catch (err) {
1203
+ console.error("Error parsing CSV:", err);
1204
+ // Try again
1205
+ generateBatch(batchIndex);
1206
+ }
1207
+ }
1208
+ });
1209
+
1210
+ try {
1211
+ // For variation, rotate through engines for each batch
1212
+ const engineForBatch = selectedEngines[batchIndex % selectedEngines.length] || currentDataset.engine;
1213
+
1214
+ window.Poe.sendUserMessage(
1215
+ `@Claude-3.7-Sonnet You're expanding a dataset based on search results from ${engineForBatch}.
1216
+
1217
+ For the dataset "${currentDataset.name}" about "${currentSearchQuery}", please generate ${batchSize} more rows of data.
1218
+
1219
+ Use this exact CSV header: ${csvHeader}
1220
+
1221
+ The data should look realistic, as if it came from actual ${engineForBatch} search results for "${currentSearchQuery}".
1222
+ Include appropriate values for each field, maintaining the same patterns and types as seen in the existing data.
1223
+
1224
+ Only include the CSV data in your response (header + ${batchSize} rows), no explanations or additional text.`,
1225
+ {
1226
+ handler: `real-batch-${batchIndex}-handler`,
1227
+ stream: false,
1228
+ openChat: false
1229
+ }
1230
+ );
1231
+ } catch (err) {
1232
+ showError("Error sending message: " + err);
1233
+ }
1234
+ };
1235
+
1236
+ // Start generating batches
1237
+ generateBatch(0);
1238
+ }
1239
+
1240
+ function generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows) {
1241
+ // Function to generate more rows in batches from AI
1242
+ const generateBatch = (batchIndex) => {
1243
+ const batchSize = 10;
1244
+ const startRow = currentRows + batchIndex * batchSize;
1245
+
1246
+ if (startRow >= targetRows) {
1247
+ // We've reached the target, show the full dataset
1248
+ showFullDataset();
1249
+ return;
1250
+ }
1251
+
1252
+ window.Poe.registerHandler(`batch-${batchIndex}-handler`, (result) => {
1253
+ if (result.status === "error") {
1254
+ showError("Error generating dataset rows");
1255
+ return;
1256
+ }
1257
+
1258
+ const message = result.responses[0];
1259
+
1260
+ if (message.status === "complete") {
1261
+ const content = message.content;
1262
+
1263
+ // Extract CSV content (remove markdown code block markers)
1264
+ let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
1265
+
1266
+ // If there are multiple code blocks, try to find one with CSV data
1267
+ if (csvContent.includes('```')) {
1268
+ const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
1269
+ if (codeBlocks.length > 0) {
1270
+ csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
1271
+ }
1272
+ }
1273
+
1274
+ try {
1275
+ // Parse the CSV
1276
+ const results = Papa.parse(csvContent, {
1277
+ header: true,
1278
+ skipEmptyLines: true
1279
+ });
1280
+
1281
+ if (results.data && results.data.length > 0) {
1282
+ // Add the new rows
1283
+ fullDatasetRows = [...fullDatasetRows, ...results.data];
1284
+ currentRows += results.data.length;
1285
+
1286
+ // Update progress
1287
+ updateGenerationProgress(currentRows, targetRows);
1288
+
1289
+ // Generate next batch
1290
+ generateBatch(batchIndex + 1);
1291
+ } else {
1292
+ // Try again with a different prompt
1293
+ generateBatch(batchIndex);
1294
+ }
1295
+ } catch (err) {
1296
+ console.error("Error parsing CSV:", err);
1297
+ // Try again
1298
+ generateBatch(batchIndex);
1299
+ }
1300
+ }
1301
+ });
1302
+
1303
+ try {
1304
+ const tagsStr = currentDataset.tags.join(', ');
1305
+ window.Poe.sendUserMessage(
1306
+ `@Claude-3.7-Sonnet For the dataset '${currentDataset.name}' about '${currentSearchQuery}' with tags '${tagsStr}',
1307
+ please generate ${batchSize} more sample rows in CSV format. Use the same CSV header: ${csvHeader}
1308
+ Only include the CSV data in your response, no explanations or additional text.`,
1309
+ {
1310
+ handler: `batch-${batchIndex}-handler`,
1311
+ stream: false,
1312
+ openChat: false
1313
+ }
1314
+ );
1315
+ } catch (err) {
1316
+ showError("Error sending message: " + err);
1317
+ }
1318
+ };
1319
+
1320
+ // Start generating batches
1321
+ generateBatch(0);
1322
+ }
1323
+
1324
+ function updateGenerationProgress(current, total) {
1325
+ rowsCount.textContent = current;
1326
+ const percentage = Math.min(100, Math.floor((current / total) * 100));
1327
+ progressBar.style.width = `${percentage}%`;
1328
+ }
1329
+
1330
+ function showFullDataset() {
1331
+ // Hide generation status
1332
+ generateStatus.classList.add('hidden');
1333
+
1334
+ // Show full dataset section
1335
+ fullDatasetSection.classList.remove('hidden');
1336
+
1337
+ // Get headers from the data
1338
+ const headers = Object.keys(fullDatasetRows[0] || {});
1339
+
1340
+ // Create and display the table
1341
+ createTable(fullTable, fullDatasetRows.slice(0, 10), headers);
1342
+
1343
+ // Add a note about showing limited rows
1344
+ const note = document.createElement('p');
1345
+ note.className = 'text-sm text-gray-600 dark:text-gray-400 mt-2';
1346
+ note.textContent = `Showing 10 of ${fullDatasetRows.length} rows. Use the download buttons to get the complete dataset.`;
1347
+ fullTable.appendChild(note);
1348
+ }
1349
+
1350
+ function downloadData(format) {
1351
+ if (fullDatasetRows.length === 0) return;
1352
+
1353
+ const filename = `${currentDataset.name.replace(/\s+/g, '_')}_dataset`;
1354
+
1355
+ switch(format) {
1356
+ case 'csv':
1357
+ downloadCsv(filename);
1358
+ break;
1359
+ case 'json':
1360
+ downloadJson(filename);
1361
+ break;
1362
+ case 'parquet':
1363
+ // Show a notification that this format is simulated
1364
+ showNotification("Parquet format download simulated - actual conversion would require a server component");
1365
+ downloadJson(filename + "_parquet_simulated");
1366
+ break;
1367
+ }
1368
+ }
1369
+
1370
+ function downloadCsv(filename) {
1371
+ // Convert data to CSV
1372
+ const csv = Papa.unparse(fullDatasetRows);
1373
+
1374
+ // Create a blob and download link
1375
+ const blob = new Blob([csv], { type: 'text/csv' });
1376
+ const url = URL.createObjectURL(blob);
1377
+ const a = document.createElement('a');
1378
+
1379
+ a.href = url;
1380
+ a.download = `${filename}.csv`;
1381
+ document.body.appendChild(a);
1382
+ a.click();
1383
+
1384
+ // Clean up
1385
+ setTimeout(() => {
1386
+ document.body.removeChild(a);
1387
+ URL.revokeObjectURL(url);
1388
+ }, 100);
1389
+ }
1390
+
1391
+ function downloadJson(filename) {
1392
+ // Convert data to JSON
1393
+ const json = JSON.stringify(fullDatasetRows, null, 2);
1394
+
1395
+ // Create a blob and download link
1396
+ const blob = new Blob([json], { type: 'application/json' });
1397
+ const url = URL.createObjectURL(blob);
1398
+ const a = document.createElement('a');
1399
+
1400
+ a.href = url;
1401
+ a.download = `${filename}.json`;
1402
+ document.body.appendChild(a);
1403
+ a.click();
1404
+
1405
+ // Clean up
1406
+ setTimeout(() => {
1407
+ document.body.removeChild(a);
1408
+ URL.revokeObjectURL(url);
1409
+ }, 100);
1410
+ }
1411
+
1412
+ function showSearchPage() {
1413
+ searchPage.classList.remove('hidden');
1414
+ datasetPage.classList.add('hidden');
1415
+ }
1416
+
1417
+ function showError(message) {
1418
+ console.error(message);
1419
+ showNotification(message, true);
1420
+ }
1421
+
1422
+ function showNotification(message, isError = false) {
1423
+ const notification = document.createElement('div');
1424
+ notification.className = `fixed bottom-4 right-4 px-6 py-3 rounded-lg shadow-lg ${
1425
+ isError
1426
+ ? 'bg-red-500 text-white'
1427
+ : 'bg-green-500 text-white'
1428
+ } z-50 transition-opacity duration-300`;
1429
+ notification.textContent = message;
1430
+
1431
+ document.body.appendChild(notification);
1432
+
1433
+ setTimeout(() => {
1434
+ notification.style.opacity = '0';
1435
+ setTimeout(() => {
1436
+ document.body.removeChild(notification);
1437
+ }, 300);
1438
+ }, 3000);
1439
+ }
1440
+
1441
+ function showPlaceholderDatasets() {
1442
+ const placeholders = [
1443
+ {
1444
+ name: "NewsEventsPredict",
1445
+ tags: ["classification", "media", "trend"],
1446
+ isReal: true,
1447
+ engine: "AlltheInternet.com"
1448
+ },
1449
+ {
1450
+ name: "FinancialForecast",
1451
+ tags: ["economy", "stocks", "regression"],
1452
+ isReal: false
1453
+ },
1454
+ {
1455
+ name: "HealthMonitor",
1456
+ tags: ["science", "real-time", "anomaly detection"],
1457
+ isReal: true,
1458
+ engine: "DuckDuckGo.com"
1459
+ },
1460
+ {
1461
+ name: "SportsAnalysis",
1462
+ tags: ["classification", "performance", "player tracking"],
1463
+ isReal: false
1464
+ },
1465
+ {
1466
+ name: "RetailSalesAnalyzer",
1467
+ tags: ["consumer behavior", "sales trend", "segmentation"],
1468
+ isReal: true,
1469
+ engine: "Bing.com"
1470
+ },
1471
+ {
1472
+ name: "SocialMediaSentiment",
1473
+ tags: ["text classification", "opinion mining", "NLP"],
1474
+ isReal: false
1475
+ }
1476
+ ];
1477
+
1478
+ currentDatasets = placeholders;
1479
+ displayDatasets(placeholders);
1480
+ loadMoreContainer.classList.remove('hidden');
1481
+ }
1482
+ </script>
1483
+ </body>
1484
+ </html>
1485
+ """
1486
+
1487
  # --- Gradio CSS ---
1488
  css = """
1489
  a { color: var(--body-text-color); }