acecalisto3 commited on
Commit
5e4f1ce
·
verified ·
1 Parent(s): afcc0ef

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +935 -169
app.py CHANGED
@@ -244,7 +244,6 @@ html = """
244
  </style>
245
  </head>
246
  <body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
247
- <!-- Dark mode detection -->
248
  <script>
249
  if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
250
  document.documentElement.classList.add('dark');
@@ -259,15 +258,12 @@ html = """
259
  </script>
260
 
261
  <div class="container mx-auto px-4 py-8">
262
- <!-- Header -->
263
  <header class="text-center mb-8">
264
  <h1 class="text-3xl font-bold mb-2">🤗 Infinite Dataset Hub ♾️</h1>
265
  <p class="text-lg text-gray-600 dark:text-gray-400">Generate datasets from AI and real-world data sources</p>
266
  </header>
267
 
268
- <!-- Main Content -->
269
  <main>
270
- <!-- Search Section -->
271
  <div id="search-page" class="mb-8">
272
  <div class="max-w-3xl mx-auto">
273
  <div class="mb-4">
@@ -300,7 +296,6 @@ html = """
300
  </div>
301
  </div>
302
 
303
- <!-- Search Engine Selection Modal -->
304
  <div id="engine-modal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden">
305
  <div class="bg-white dark:bg-gray-800 rounded-lg p-6 max-w-lg w-full max-h-[80vh] overflow-y-auto">
306
  <div class="flex justify-between items-center mb-4">
@@ -317,8 +312,7 @@ html = """
317
  </p>
318
 
319
  <div id="engine-options" class="space-y-2 mb-6">
320
- <!-- Engine options will be dynamically inserted here -->
321
- </div>
322
 
323
  <div class="flex justify-between">
324
  <button id="select-all-engines" class="text-primary hover:underline">Select All</button>
@@ -334,8 +328,7 @@ html = """
334
  </div>
335
 
336
  <div id="dataset-results" class="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
337
- <!-- Dataset cards will be dynamically inserted here -->
338
- </div>
339
 
340
  <div id="load-more-container" class="text-center mt-6 hidden">
341
  <button id="load-more-button" class="bg-gray-200 dark:bg-gray-700 px-6 py-3 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition">
@@ -345,7 +338,6 @@ html = """
345
  </div>
346
  </div>
347
 
348
- <!-- Dataset Detail Page -->
349
  <div id="dataset-page" class="hidden max-w-4xl mx-auto">
350
  <button id="back-button" class="flex items-center text-primary mb-4 hover:underline">
351
  <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
@@ -423,7 +415,6 @@ html = """
423
  </div>
424
  </main>
425
 
426
- <!-- Footer -->
427
  <footer class="mt-12 text-center text-sm text-gray-600 dark:text-gray-400">
428
  <p>Powered by Claude-3.7-Sonnet • Datasets generated from real sources and AI</p>
429
  </footer>
@@ -485,115 +476,943 @@ html = """
485
  const selectAllEngines = document.getElementById('select-all-engines');
486
  const deselectAllEngines = document.getElementById('deselect-all-engines');
487
 
488
- // Event Listeners
489
- document.addEventListener('DOMContentLoaded', () => {
490
- searchButton.addEventListener('click', performSearch);
491
- searchInput.addEventListener('keypress', (e) => {
492
- if (e.key === 'Enter') performSearch();
493
- });
494
- loadMoreButton.addEventListener('click', loadMoreDatasets);
495
- backButton.addEventListener('click', showSearchPage);
496
- generateFullButton.addEventListener('click', generateFullDataset);
497
- downloadCsvButton.addEventListener('click', () => downloadData('csv'));
498
- downloadJsonButton.addEventListener('click', () => downloadData('json'));
499
- downloadParquetButton.addEventListener('click', () => downloadData('parquet'));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
500
 
501
- dataSourceToggle.addEventListener('change', toggleDataSource);
502
- engineSettingsButton.addEventListener('click', showEngineModal);
503
- closeModalButton.addEventListener('click', hideEngineModal);
504
- saveEnginesButton.addEventListener('click', saveEngineSettings);
505
- selectAllEngines.addEventListener('click', () => toggleAllEngines(true));
506
- deselectAllEngines.addEventListener('click', () => toggleAllEngines(false));
507
 
508
- // Initialize engine options
509
- populateEngineOptions();
510
 
511
- // Show initial placeholder datasets
512
- showPlaceholderDatasets();
513
- });
514
-
515
- // Search Engine Settings
516
- function populateEngineOptions() {
517
- engineOptions.innerHTML = '';
518
 
519
- searchEngines.forEach(engine => {
520
- const isChecked = selectedEngines.includes(engine);
521
-
522
- const optionDiv = document.createElement('div');
523
- optionDiv.className = 'flex items-center';
524
-
525
- optionDiv.innerHTML = `
526
- <input type="checkbox" id="engine-${engine}" class="engine-checkbox mr-2 h-4 w-4"
527
- value="${engine}" ${isChecked ? 'checked' : ''}>
528
- <label for="engine-${engine}" class="cursor-pointer">${engine}</label>
529
- `;
530
-
531
- engineOptions.appendChild(optionDiv);
532
- });
533
  }
534
 
535
- function showEngineModal() {
536
- engineModal.classList.remove('hidden');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  }
538
 
539
- function hideEngineModal() {
540
- engineModal.classList.add('hidden');
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
541
  }
542
 
543
- function saveEngineSettings() {
544
- const checkboxes = document.querySelectorAll('.engine-checkbox:checked');
545
- selectedEngines = Array.from(checkboxes).map(cb => cb.value);
546
 
547
- if (selectedEngines.length === 0) {
548
- // Ensure at least one engine is selected
549
- selectedEngines = ["DuckDuckGo.com"];
550
- document.getElementById(`engine-DuckDuckGo.com`).checked = true;
551
- showNotification("At least one search engine must be selected. Using DuckDuckGo as default.");
552
- }
553
 
554
- hideEngineModal();
555
- showNotification(`Updated search engine settings. Using ${selectedEngines.length} engines.`);
 
 
 
 
 
 
 
 
556
  }
557
 
558
- function toggleAllEngines(select) {
559
- const checkboxes = document.querySelectorAll('.engine-checkbox');
560
- checkboxes.forEach(cb => {
561
- cb.checked = select;
562
- });
563
  }
564
 
565
- // Toggle data source between real and AI
566
- function toggleDataSource() {
567
- useRealData = dataSourceToggle.checked;
568
- dataSourceText.textContent = useRealData ? "Using: Real + AI Data" : "Using: AI Data Only";
569
-
570
- // Show or hide engine settings button
571
- engineSettingsButton.style.display = useRealData ? "flex" : "none";
572
-
573
- showNotification(`Switched to ${useRealData ? "combined real and synthetic" : "synthetic-only"} data mode`);
574
  }
575
 
576
- // Search functionality
577
- function performSearch() {
578
- const query = searchInput.value.trim();
579
- if (!query) return;
580
-
581
- currentSearchQuery = query;
582
- currentPage = 1;
583
- currentDatasets = [];
584
 
585
- resultsContainer.innerHTML = '';
586
- showLoadingSkeletons();
587
 
588
- if (useRealData) {
589
- // Use real data from search engines + AI
590
- searchWithRealData(query);
591
- } else {
592
- // Use only AI-generated data
593
- searchWithAIData(query);
594
- }
595
  }
596
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
597
  function searchWithRealData(query) {
598
  // Randomly select a search engine from the user's selected engines
599
  currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
@@ -706,65 +1525,6 @@ html = """
706
  }
707
  }
708
 
709
- function parseDatasetResults(content) {
710
- const lines = content.split('\n');
711
- const datasets = [];
712
-
713
- lines.forEach(line => {
714
- // Match lines that start with a number followed by a period
715
- const match = line.match(/^\s*\d+\.\s+(.+?)\s+\((.+?)\)/);
716
- if (match) {
717
- const name = match[1].trim();
718
- const tags = match[2].split(',').map(tag => tag.trim());
719
- datasets.push({ name, tags });
720
- }
721
- });
722
-
723
- return datasets;
724
- }
725
-
726
- function displayDatasets(datasets) {
727
- datasets.forEach(dataset => {
728
- const card = document.createElement('div');
729
- card.className = 'dataset-card bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700 cursor-pointer relative';
730
-
731
- const tagsHtml = dataset.tags.map(tag =>
732
- `<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
733
- ).join('');
734
-
735
- // Add a badge for real data
736
- let badgeHtml = '';
737
- if (dataset.isReal) {
738
- badgeHtml = `<span class="engine-badge" title="Data from ${dataset.engine}">${dataset.engine.split('.')[0]}</span>`;
739
- }
740
-
741
- card.innerHTML = `
742
- ${badgeHtml}
743
- <h3 class="text-lg font-semibold mb-2">${dataset.name}</h3>
744
- <div class="flex flex-wrap mt-2">${tagsHtml}</div>
745
- `;
746
-
747
- card.addEventListener('click', () => showDatasetDetails(dataset));
748
- resultsContainer.appendChild(card);
749
- });
750
- }
751
-
752
- function showLoadingSkeletons() {
753
- for (let i = 0; i < 4; i++) {
754
- const skeleton = document.createElement('div');
755
- skeleton.className = 'bg-white dark:bg-gray-800 rounded-lg p-4 border border-gray-200 dark:border-gray-700';
756
- skeleton.innerHTML = `
757
- <div class="shimmer h-6 w-3/4 mb-2"></div>
758
- <div class="flex flex-wrap mt-2">
759
- <div class="shimmer h-6 w-16 rounded mr-1 mb-1"></div>
760
- <div class="shimmer h-6 w-20 rounded mr-1 mb-1"></div>
761
- <div class="shimmer h-6 w-24 rounded mr-1 mb-1"></div>
762
- </div>
763
- `;
764
- resultsContainer.appendChild(skeleton);
765
- }
766
- }
767
-
768
  function loadMoreDatasets() {
769
  currentPage++;
770
 
@@ -1230,6 +1990,8 @@ html = """
1230
  );
1231
  } catch (err) {
1232
  showError("Error sending message: " + err);
 
 
1233
  }
1234
  };
1235
 
@@ -2041,13 +2803,17 @@ with gr.Blocks(css=css) as demo:
2041
  engine_settings_button = gr.Button("Configure Search Engines", icon="https://img.icons8.com/ios-filled/50/000000/settings--v1.png", size="sm")
2042
 
2043
  # Engine Selection Modal
2044
- with gr.Modal("Search Engine Settings", id="engine-modal") as engine_modal:
2045
- gr.Markdown("Select which search engines to use for real data retrieval. A diverse selection improves results.")
2046
- engine_options_html_comp = gr.HTML(elem_id="engine-options")
2047
- with gr.Row():
2048
- select_all_engines_btn = gr.Button("Select All")
2049
- deselect_all_engines_btn = gr.Button("Deselect All")
2050
- save_engines_btn = gr.Button("Save Settings", variant="primary")
 
 
 
 
2051
 
2052
  # --- Dataset Detail Page UI ---
2053
  with gr.Column(visible=False, elem_id="dataset-page") as dataset_page:
@@ -2094,7 +2860,7 @@ with gr.Blocks(css=css) as demo:
2094
  if "I'm sorry" in line or "policy" in line: raise gr.Error("Inappropriate content detected.")
2095
  if generated_count >= MAX_NB_ITEMS_PER_GENERATION_CALL: break
2096
 
2097
- match = re.match(r"^\s*\d+\.\s+(.+?)\s+$$(.+?)$$", line) # Parse line format
2098
  if match:
2099
  dataset_name, tags = match.groups()
2100
  dataset_name, tags = dataset_name.strip(), tags.strip()
@@ -2359,9 +3125,9 @@ with gr.Blocks(css=css) as demo:
2359
  outputs=[selected_engines_state, current_engine_state, gr.Info()]
2360
  )
2361
 
2362
- engine_settings_button.click(lambda: engine_modal.update(visible=True), outputs=[engine_modal])
2363
- # Close modal on save or when clicking outside (implicit via Gradio's modal handling)
2364
-
2365
  # Initial App Load Logic
2366
  @demo.load(outputs=([search_page, dataset_page, dataset_title_md, dataset_description_md, dataset_source_badge, dataset_source_info, dataset_share_textbox, full_dataset_section, save_dataset_button, open_dataset_message, search_bar] + # Outputs for detail page and search bar
2367
  buttons + [generated_texts_state] + # Outputs for search results buttons and state
@@ -2424,4 +3190,4 @@ with gr.Blocks(css=css) as demo:
2424
 
2425
 
2426
  if __name__ == "__main__":
2427
- demo.launch(share=False, server_name="0.0.0.0")
 
244
  </style>
245
  </head>
246
  <body class="bg-white dark:bg-gray-900 text-gray-800 dark:text-gray-200 min-h-screen">
 
247
  <script>
248
  if (window.matchMedia && window.matchMedia('(prefers-color-scheme: dark)').matches) {
249
  document.documentElement.classList.add('dark');
 
258
  </script>
259
 
260
  <div class="container mx-auto px-4 py-8">
 
261
  <header class="text-center mb-8">
262
  <h1 class="text-3xl font-bold mb-2">🤗 Infinite Dataset Hub ♾️</h1>
263
  <p class="text-lg text-gray-600 dark:text-gray-400">Generate datasets from AI and real-world data sources</p>
264
  </header>
265
 
 
266
  <main>
 
267
  <div id="search-page" class="mb-8">
268
  <div class="max-w-3xl mx-auto">
269
  <div class="mb-4">
 
296
  </div>
297
  </div>
298
 
 
299
  <div id="engine-modal" class="fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center z-50 hidden">
300
  <div class="bg-white dark:bg-gray-800 rounded-lg p-6 max-w-lg w-full max-h-[80vh] overflow-y-auto">
301
  <div class="flex justify-between items-center mb-4">
 
312
  </p>
313
 
314
  <div id="engine-options" class="space-y-2 mb-6">
315
+ </div>
 
316
 
317
  <div class="flex justify-between">
318
  <button id="select-all-engines" class="text-primary hover:underline">Select All</button>
 
328
  </div>
329
 
330
  <div id="dataset-results" class="grid grid-cols-1 md:grid-cols-2 gap-4 mt-6">
331
+ </div>
 
332
 
333
  <div id="load-more-container" class="text-center mt-6 hidden">
334
  <button id="load-more-button" class="bg-gray-200 dark:bg-gray-700 px-6 py-3 rounded-lg hover:bg-gray-300 dark:hover:bg-gray-600 transition">
 
338
  </div>
339
  </div>
340
 
 
341
  <div id="dataset-page" class="hidden max-w-4xl mx-auto">
342
  <button id="back-button" class="flex items-center text-primary mb-4 hover:underline">
343
  <svg xmlns="http://www.w3.org/2000/svg" class="h-5 w-5 mr-1" viewBox="0 0 20 20" fill="currentColor">
 
415
  </div>
416
  </main>
417
 
 
418
  <footer class="mt-12 text-center text-sm text-gray-600 dark:text-gray-400">
419
  <p>Powered by Claude-3.7-Sonnet • Datasets generated from real sources and AI</p>
420
  </footer>
 
476
  const selectAllEngines = document.getElementById('select-all-engines');
477
  const deselectAllEngines = document.getElementById('deselect-all-engines');
478
 
479
+ // Event Listeners
480
+ document.addEventListener('DOMContentLoaded', () => {
481
+ searchButton.addEventListener('click', performSearch);
482
+ searchInput.addEventListener('keypress', (e) => {
483
+ if (e.key === 'Enter') performSearch();
484
+ });
485
+ loadMoreButton.addEventListener('click', loadMoreDatasets);
486
+ backButton.addEventListener('click', showSearchPage);
487
+ generateFullButton.addEventListener('click', generateFullDataset);
488
+ downloadCsvButton.addEventListener('click', () => downloadData('csv'));
489
+ downloadJsonButton.addEventListener('click', () => downloadData('json'));
490
+ downloadParquetButton.addEventListener('click', () => downloadData('parquet'));
491
+
492
+ dataSourceToggle.addEventListener('change', toggleDataSource);
493
+ engineSettingsButton.addEventListener('click', showEngineModal);
494
+ closeModalButton.addEventListener('click', hideEngineModal);
495
+ saveEnginesButton.addEventListener('click', saveEngineSettings);
496
+ selectAllEngines.addEventListener('click', () => toggleAllEngines(true));
497
+ deselectAllEngines.addEventListener('click', () => toggleAllEngines(false));
498
+
499
+ // Initialize engine options
500
+ populateEngineOptions();
501
+
502
+ // Show initial placeholder datasets
503
+ showPlaceholderDatasets();
504
+ });
505
+
506
+ // Search Engine Settings
507
+ function populateEngineOptions() {
508
+ engineOptions.innerHTML = '';
509
+
510
+ searchEngines.forEach(engine => {
511
+ const isChecked = selectedEngines.includes(engine);
512
+
513
+ const optionDiv = document.createElement('div');
514
+ optionDiv.className = 'flex items-center';
515
+
516
+ optionDiv.innerHTML = `
517
+ <input type="checkbox" id="engine-${engine}" class="engine-checkbox mr-2 h-4 w-4"
518
+ value="${engine}" ${isChecked ? 'checked' : ''}>
519
+ <label for="engine-${engine}" class="cursor-pointer">${engine}</label>
520
+ `;
521
+
522
+ engineOptions.appendChild(optionDiv);
523
+ });
524
+ }
525
+
526
+ function showEngineModal() {
527
+ engineModal.classList.remove('hidden');
528
+ }
529
+
530
+ function hideEngineModal() {
531
+ engineModal.classList.add('hidden');
532
+ }
533
+
534
+ function saveEngineSettings() {
535
+ const checkboxes = document.querySelectorAll('.engine-checkbox:checked');
536
+ selectedEngines = Array.from(checkboxes).map(cb => cb.value);
537
+
538
+ if (selectedEngines.length === 0) {
539
+ // Ensure at least one engine is selected
540
+ selectedEngines = ["DuckDuckGo.com"];
541
+ document.getElementById(`engine-DuckDuckGo.com`).checked = true;
542
+ showNotification("At least one search engine must be selected. Using DuckDuckGo as default.");
543
+ }
544
+
545
+ hideEngineModal();
546
+ showNotification(`Updated search engine settings. Using ${selectedEngines.length} engines.`);
547
+ }
548
+
549
+ function toggleAllEngines(select) {
550
+ const checkboxes = document.querySelectorAll('.engine-checkbox');
551
+ checkboxes.forEach(cb => {
552
+ cb.checked = select;
553
+ });
554
+ }
555
+
556
+ // Toggle data source between real and AI
557
+ function toggleDataSource() {
558
+ useRealData = dataSourceToggle.checked;
559
+ dataSourceText.textContent = useRealData ? "Using: Real + AI Data" : "Using: AI Data Only";
560
+
561
+ // Show or hide engine settings button
562
+ engineSettingsButton.style.display = useRealData ? "flex" : "none";
563
+
564
+ showNotification(`Switched to ${useRealData ? "combined real and synthetic" : "synthetic-only"} data mode`);
565
+ }
566
+
567
+ // Search functionality
568
+ function performSearch() {
569
+ const query = searchInput.value.trim();
570
+ if (!query) return;
571
+
572
+ currentSearchQuery = query;
573
+ currentPage = 1;
574
+ currentDatasets = [];
575
+
576
+ resultsContainer.innerHTML = '';
577
+ showLoadingSkeletons();
578
+
579
+ if (useRealData) {
580
+ // Use real data from search engines + AI
581
+ searchWithRealData(query);
582
+ } else {
583
+ // Use only AI-generated data
584
+ searchWithAIData(query);
585
+ }
586
+ }
587
+
588
+ function searchWithRealData(query) {
589
+ // Randomly select a search engine from the user's selected engines
590
+ currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
591
+
592
+ // Register handler for dataset names based on real search results
593
+ window.Poe.registerHandler("real-search-handler", (result) => {
594
+ if (result.status === "error") {
595
+ showError("Error querying search engines");
596
+ return;
597
+ }
598
+
599
+ const message = result.responses[0];
600
+
601
+ if (message.status === "complete") {
602
+ // Parse the dataset names and tags from the response
603
+ const datasets = parseDatasetResults(message.content);
604
+ datasets.forEach(dataset => {
605
+ dataset.isReal = true;
606
+ dataset.engine = currentEngine;
607
+ });
608
+
609
+ currentDatasets = datasets;
610
+
611
+ // Display the datasets
612
+ resultsContainer.innerHTML = '';
613
+ displayDatasets(datasets);
614
+
615
+ // Show load more button if we have results
616
+ if (datasets.length > 0) {
617
+ loadMoreContainer.classList.remove('hidden');
618
+ }
619
+ }
620
+ });
621
+
622
+ try {
623
+ window.Poe.sendUserMessage(
624
+ `@Claude-3.7-Sonnet You are a data specialist who can transform real search results into structured datasets.
625
+
626
+ A user is searching for data about: "${query}"
627
+
628
+ Imagine you've queried ${currentEngine} and received real search results. Create a list of 10 specific datasets that could be created from these search results.
629
+
630
+ For each dataset:
631
+ 1. Give it a clear, specific name related to the search topic
632
+ 2. Include 3-5 relevant tags in parentheses, with one tag specifying the ML task type (classification, regression, clustering, etc.)
633
+
634
+ Format each dataset as:
635
+ 1. DatasetName (tag1, tag2, ml_task_tag)
636
+
637
+ Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${query}".`,
638
+ {
639
+ handler: "real-search-handler",
640
+ stream: false,
641
+ openChat: false
642
+ }
643
+ );
644
+ } catch (err) {
645
+ showError("Error sending message: " + err);
646
+ // Fall back to AI data
647
+ searchWithAIData(query);
648
+ }
649
+ }
650
+
651
+ function searchWithAIData(query) {
652
+ // Register handler for AI-generated dataset names
653
+ window.Poe.registerHandler("dataset-search-handler", (result) => {
654
+ if (result.status === "error") {
655
+ showError("Error generating datasets");
656
+ return;
657
+ }
658
+
659
+ const message = result.responses[0];
660
+
661
+ if (message.status === "complete") {
662
+ // Parse the dataset names and tags from the response
663
+ const datasets = parseDatasetResults(message.content);
664
+ datasets.forEach(dataset => {
665
+ dataset.isReal = false;
666
+ });
667
+
668
+ currentDatasets = datasets;
669
+
670
+ // Display the datasets
671
+ resultsContainer.innerHTML = '';
672
+ displayDatasets(datasets);
673
+
674
+ // Show load more button if we have results
675
+ if (datasets.length > 0) {
676
+ loadMoreContainer.classList.remove('hidden');
677
+ }
678
+ }
679
+ });
680
+
681
+ try {
682
+ window.Poe.sendUserMessage(
683
+ `@Claude-3.7-Sonnet A Machine Learning Practioner is looking for a dataset that matches '${query}'.
684
+ Generate a list of ${MAX_DATASETS_PER_PAGE} names of quality datasets that don't exist but sound plausible and would
685
+ be helpful. Feel free to reuse words from the query '${query}' to name the datasets.
686
+ Every dataset should be about '${query}' and have descriptive tags/keywords including the ML task name associated with the dataset (classification, regression, anomaly detection, etc.). Use the following format:
687
+ 1. DatasetName1 (tag1, tag2, tag3)
688
+ 2. DatasetName2 (tag1, tag2, tag3)`,
689
+ {
690
+ handler: "dataset-search-handler",
691
+ stream: false,
692
+ openChat: false
693
+ }
694
+ );
695
+ } catch (err) {
696
+ showError("Error sending message: " + err);
697
+ }
698
+ }
699
+
700
+ function loadMoreDatasets() {
701
+ currentPage++;
702
+
703
+ // Use the same data source (real or AI) as the initial search
704
+ if (useRealData) {
705
+ loadMoreRealDatasets();
706
+ } else {
707
+ loadMoreAIDatasets();
708
+ }
709
+ }
710
+
711
+ function loadMoreRealDatasets() {
712
+ // Rotate to a different search engine for variety
713
+ const previousEngine = currentEngine;
714
+ while (currentEngine === previousEngine && selectedEngines.length > 1) {
715
+ currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
716
+ }
717
+
718
+ // Register handler for more datasets
719
+ window.Poe.registerHandler("more-real-datasets-handler", (result) => {
720
+ if (result.status === "error") {
721
+ showError("Error generating more datasets");
722
+ return;
723
+ }
724
+
725
+ const message = result.responses[0];
726
+
727
+ if (message.status === "complete") {
728
+ // Parse the dataset names and tags from the response
729
+ const datasets = parseDatasetResults(message.content);
730
+ datasets.forEach(dataset => {
731
+ dataset.isReal = true;
732
+ dataset.engine = currentEngine;
733
+ });
734
+
735
+ currentDatasets = [...currentDatasets, ...datasets];
736
+
737
+ // Display the datasets
738
+ displayDatasets(datasets);
739
+ }
740
+ });
741
+
742
+ try {
743
+ window.Poe.sendUserMessage(
744
+ `@Claude-3.7-Sonnet You're a data specialist who can transform real search results into structured datasets.
745
+
746
+ Continue our previous search for data about: "${currentSearchQuery}"
747
+
748
+ Now let's use a different search engine: ${currentEngine}
749
+
750
+ Create 10 more specific datasets that could be created from these search results. Make sure these are different from the previous datasets.
751
+
752
+ Use the same format:
753
+ 1. DatasetName (tag1, tag2, ml_task_tag)
754
+
755
+ Make these datasets sound like real collections that could be created from ${currentEngine} search results on "${currentSearchQuery}".`,
756
+ {
757
+ handler: "more-real-datasets-handler",
758
+ stream: false,
759
+ openChat: false
760
+ }
761
+ );
762
+ } catch (err) {
763
+ showError("Error sending message: " + err);
764
+ // Fall back to AI data
765
+ loadMoreAIDatasets();
766
+ }
767
+ }
768
+
769
+ function loadMoreAIDatasets() {
770
+ // Register handler for more AI datasets
771
+ window.Poe.registerHandler("more-datasets-handler", (result) => {
772
+ if (result.status === "error") {
773
+ showError("Error generating more datasets");
774
+ return;
775
+ }
776
+
777
+ const message = result.responses[0];
778
+
779
+ if (message.status === "complete") {
780
+ // Parse the dataset names and tags from the response
781
+ const datasets = parseDatasetResults(message.content);
782
+ datasets.forEach(dataset => {
783
+ dataset.isReal = false;
784
+ });
785
+
786
+ currentDatasets = [...currentDatasets, ...datasets];
787
+
788
+ // Display the datasets
789
+ displayDatasets(datasets);
790
+ }
791
+ });
792
+
793
+ try {
794
+ window.Poe.sendUserMessage(
795
+ `@Claude-3.7-Sonnet Please generate ${MAX_DATASETS_PER_PAGE} more dataset names about '${currentSearchQuery}'. Use the same format as before:
796
+ 1. DatasetName1 (tag1, tag2, tag3)
797
+ Make sure these are completely different from previous suggestions.`,
798
+ {
799
+ handler: "more-datasets-handler",
800
+ stream: false,
801
+ openChat: false
802
+ }
803
+ );
804
+ } catch (err) {
805
+ showError("Error sending message: " + err);
806
+ }
807
+ }
808
+
809
+ function showDatasetDetails(dataset) {
810
+ currentDataset = dataset;
811
+ searchPage.classList.add('hidden');
812
+ datasetPage.classList.remove('hidden');
813
+
814
+ // Update UI with dataset info
815
+ datasetTitle.textContent = dataset.name;
816
+ datasetTags.innerHTML = dataset.tags.map(tag =>
817
+ `<span class="inline-block bg-gray-100 dark:bg-gray-700 text-gray-800 dark:text-gray-300 text-xs px-2 py-1 rounded mr-1 mb-1">${tag}</span>`
818
+ ).join('');
819
+
820
+ // Update source badge
821
+ if (dataset.isReal) {
822
+ dataSourceBadge.textContent = "Real Data";
823
+ dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-green-100 text-green-800 dark:bg-green-900 dark:text-green-200";
824
+ sourceDetails.innerHTML = `This dataset is based on real information queried from <strong>${dataset.engine}</strong> for the search term "<strong>${currentSearchQuery}</strong>". The data has been structured for machine learning use.`;
825
+ } else {
826
+ dataSourceBadge.textContent = "AI-Generated";
827
+ dataSourceBadge.className = "px-3 py-1 rounded-full text-xs font-medium bg-purple-100 text-purple-800 dark:bg-purple-900 dark:text-purple-200";
828
+ sourceDetails.innerHTML = `This is an AI-generated dataset created using Claude-3.7-Sonnet. The content is synthetic and designed to represent plausible data related to "${currentSearchQuery}".`;
829
+ }
830
+
831
+ // Clear previous content
832
+ datasetDescription.innerHTML = '<div class="shimmer h-4 w-full mb-2"></div>'.repeat(3);
833
+ previewTable.innerHTML = '';
834
+ fullDatasetSection.classList.add('hidden');
835
+ generateStatus.classList.add('hidden');
836
+ generateFullButton.disabled = false;
837
+
838
+ // Reset full dataset
839
+ fullDatasetRows = [];
840
+
841
+ // Generate dataset preview - different approach for real vs AI data
842
+ if (dataset.isReal) {
843
+ generateRealDatasetPreview(dataset);
844
+ } else {
845
+ generateAIDatasetPreview(dataset);
846
+ }
847
+
848
+ // Scroll to top
849
+ window.scrollTo(0, 0);
850
+ }
851
+
852
+ function generateRealDatasetPreview(dataset) {
853
+ window.Poe.registerHandler("real-preview-handler", (result) => {
854
+ if (result.status === "error") {
855
+ datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
856
+ return;
857
+ }
858
+
859
+ const message = result.responses[0];
860
+
861
+ if (message.status === "complete") {
862
+ const content = message.content;
863
+
864
+ // Extract description and CSV
865
+ const parts = content.split('**CSV Content Preview:**');
866
+ let description = "";
867
+ let csvContent = "";
868
+
869
+ if (parts.length > 1) {
870
+ description = parts[0].replace('**Dataset Description:**', '').trim();
871
+ csvContent = parts[1].trim();
872
+
873
+ // Clean up CSV content (remove markdown code block markers)
874
+ csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
875
+ } else {
876
+ description = "No description available";
877
+ csvContent = content;
878
+ }
879
+
880
+ // Display description
881
+ datasetDescription.innerHTML = marked.parse(description);
882
+
883
+ // Parse and display CSV preview
884
+ try {
885
+ const results = Papa.parse(csvContent, {
886
+ header: true,
887
+ skipEmptyLines: true
888
+ });
889
+
890
+ if (results.data && results.data.length > 0) {
891
+ // Create table from CSV data
892
+ createTable(previewTable, results.data, results.meta.fields);
893
+ } else {
894
+ previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
895
+ }
896
+ } catch (err) {
897
+ previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
898
+ }
899
+ }
900
+ });
901
+
902
+ try {
903
+ const tagsStr = dataset.tags.join(', ');
904
+ window.Poe.sendUserMessage(
905
+ `@Claude-3.7-Sonnet You're a specialist in converting web search results into structured data.
906
+
907
+ Based on search results from ${dataset.engine} about "${currentSearchQuery}",
908
+ create a preview of the dataset "${dataset.name}" with tags "${tagsStr}".
909
+
910
+ First, write a detailed description of what this dataset contains, its structure, and how it was constructed from web search results.
911
+
912
+ Then, generate a realistic 5-row CSV preview that resembles data you might get if you scraped and structured real results from ${dataset.engine}.
913
+
914
+ Format your response with:
915
+ **Dataset Description:** [detailed description]
916
+
917
+ **CSV Content Preview:**
918
+ \`\`\`csv
919
+ [CSV header and 5 rows of realistic data]
920
+ \`\`\`
921
+
922
+ Include relevant columns for the dataset type, with proper labels/categories where appropriate. The data should look like it came from real sources.`,
923
+ {
924
+ handler: "real-preview-handler",
925
+ stream: false,
926
+ openChat: false
927
+ }
928
+ );
929
+ } catch (err) {
930
+ datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
931
+ }
932
+ }
933
+
934
+ function generateAIDatasetPreview(dataset) {
935
+ window.Poe.registerHandler("dataset-preview-handler", (result) => {
936
+ if (result.status === "error") {
937
+ datasetDescription.innerHTML = '<p class="text-red-500">Error generating dataset preview</p>';
938
+ return;
939
+ }
940
+
941
+ const message = result.responses[0];
942
+
943
+ if (message.status === "complete") {
944
+ const content = message.content;
945
+
946
+ // Extract description and CSV
947
+ const parts = content.split('**CSV Content Preview:**');
948
+ let description = "";
949
+ let csvContent = "";
950
+
951
+ if (parts.length > 1) {
952
+ description = parts[0].replace('**Dataset Description:**', '').trim();
953
+ csvContent = parts[1].trim();
954
+
955
+ // Clean up CSV content (remove markdown code block markers)
956
+ csvContent = csvContent.replace(/```csv\n|```\n|```/g, '').trim();
957
+ } else {
958
+ description = "No description available";
959
+ csvContent = content;
960
+ }
961
+
962
+ // Display description
963
+ datasetDescription.innerHTML = marked.parse(description);
964
+
965
+ // Parse and display CSV preview
966
+ try {
967
+ const results = Papa.parse(csvContent, {
968
+ header: true,
969
+ skipEmptyLines: true
970
+ });
971
+
972
+ if (results.data && results.data.length > 0) {
973
+ // Create table from CSV data
974
+ createTable(previewTable, results.data, results.meta.fields);
975
+ } else {
976
+ previewTable.innerHTML = '<p class="p-4 text-red-500">No preview data available</p>';
977
+ }
978
+ } catch (err) {
979
+ previewTable.innerHTML = `<p class="p-4 text-red-500">Error parsing CSV: ${err.message}</p>`;
980
+ }
981
+ }
982
+ });
983
+
984
+ try {
985
+ const tagsStr = dataset.tags.join(', ');
986
+ window.Poe.sendUserMessage(
987
+ `@Claude-3.7-Sonnet An ML practitioner is looking for a dataset CSV after the query '${currentSearchQuery}'.
988
+ Generate the first 5 rows of a plausible and quality CSV for the dataset '${dataset.name}'.
989
+ You can get inspiration from related keywords '${tagsStr}' but most importantly the dataset should correspond to the query '${currentSearchQuery}'.
990
+ Focus on quality text content and use a 'label' or 'labels' column if it makes sense (invent labels, avoid reusing the keywords, be accurate while labelling texts).
991
+ Reply using a short description of the dataset with title **Dataset Description:** followed by the CSV content in a code block and with title **CSV Content Preview:**`,
992
+ {
993
+ handler: "dataset-preview-handler",
994
+ stream: false,
995
+ openChat: false
996
+ }
997
+ );
998
+ } catch (err) {
999
+ datasetDescription.innerHTML = `<p class="text-red-500">Error: ${err.message}</p>`;
1000
+ }
1001
+ }
1002
+
1003
+ function createTable(container, data, headers) {
1004
+ container.innerHTML = '';
1005
+
1006
+ const table = document.createElement('table');
1007
+ table.className = 'w-full';
1008
+
1009
+ // Create header
1010
+ const thead = document.createElement('thead');
1011
+ const headerRow = document.createElement('tr');
1012
+
1013
+ headers.forEach(header => {
1014
+ const th = document.createElement('th');
1015
+ th.textContent = header;
1016
+ headerRow.appendChild(th);
1017
+ });
1018
+
1019
+ thead.appendChild(headerRow);
1020
+ table.appendChild(thead);
1021
+
1022
+ // Create body
1023
+ const tbody = document.createElement('tbody');
1024
+
1025
+ data.forEach(row => {
1026
+ const tr = document.createElement('tr');
1027
+
1028
+ headers.forEach(header => {
1029
+ const td = document.createElement('td');
1030
+ td.textContent = row[header] || '';
1031
+ tr.appendChild(td);
1032
+ });
1033
+
1034
+ tbody.appendChild(tr);
1035
+ });
1036
+
1037
+ table.appendChild(tbody);
1038
+ container.appendChild(table);
1039
+ }
1040
+
1041
+ function generateFullDataset() {
1042
+ // Disable button and show status
1043
+ generateFullButton.disabled = true;
1044
+ generateStatus.classList.remove('hidden');
1045
+ rowsCount.textContent = '0';
1046
+ progressBar.style.width = '0%';
1047
+
1048
+ // Set up variables for tracking generation
1049
+ let csvHeader = '';
1050
+ const targetRows = MAX_FULL_DATASET_ROWS;
1051
+ let currentRows = 0;
1052
+ fullDatasetRows = [];
1053
+
1054
+ // Get the CSV header from the preview table
1055
+ const previewHeaders = Array.from(previewTable.querySelectorAll('thead th')).map(th => th.textContent);
1056
+ csvHeader = previewHeaders.join(',');
1057
+
1058
+ // Add initial rows from preview
1059
+ const previewRows = Array.from(previewTable.querySelectorAll('tbody tr')).map(tr => {
1060
+ const row = {};
1061
+ Array.from(tr.querySelectorAll('td')).forEach((td, index) => {
1062
+ row[previewHeaders[index]] = td.textContent;
1063
+ });
1064
+ return row;
1065
+ });
1066
+
1067
+ fullDatasetRows = [...previewRows];
1068
+ currentRows = previewRows.length;
1069
+ updateGenerationProgress(currentRows, targetRows);
1070
+
1071
+ // Choose generation method based on dataset type
1072
+ if (currentDataset.isReal) {
1073
+ generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows);
1074
+ } else {
1075
+ generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows);
1076
+ }
1077
+ }
1078
+
1079
+ function generateFullRealDataset(previewHeaders, csvHeader, currentRows, targetRows) {
1080
+ // Function to generate more rows in batches from "real" search results
1081
+ const generateBatch = (batchIndex) => {
1082
+ const batchSize = 15; // Larger batches for efficiency
1083
+ const startRow = currentRows + batchIndex * batchSize;
1084
+
1085
+ if (startRow >= targetRows) {
1086
+ // We've reached the target, show the full dataset
1087
+ showFullDataset();
1088
+ return;
1089
+ }
1090
+
1091
+ window.Poe.registerHandler(`real-batch-${batchIndex}-handler`, (result) => {
1092
+ if (result.status === "error") {
1093
+ showError("Error generating dataset rows");
1094
+ return;
1095
+ }
1096
+
1097
+ const message = result.responses[0];
1098
+
1099
+ if (message.status === "complete") {
1100
+ const content = message.content;
1101
+
1102
+ // Extract CSV content (remove markdown code block markers)
1103
+ let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
1104
+
1105
+ // If there are multiple code blocks, try to find one with CSV data
1106
+ if (csvContent.includes('```')) {
1107
+ const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
1108
+ if (codeBlocks.length > 0) {
1109
+ csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
1110
+ }
1111
+ }
1112
+
1113
+ try {
1114
+ // Parse the CSV
1115
+ const results = Papa.parse(csvContent, {
1116
+ header: true,
1117
+ skipEmptyLines: true
1118
+ });
1119
+
1120
+ if (results.data && results.data.length > 0) {
1121
+ // Add the new rows
1122
+ fullDatasetRows = [...fullDatasetRows, ...results.data];
1123
+ currentRows += results.data.length;
1124
+
1125
+ // Update progress
1126
+ updateGenerationProgress(currentRows, targetRows);
1127
+
1128
+ // Generate next batch
1129
+ generateBatch(batchIndex + 1);
1130
+ } else {
1131
+ // Try again with a different prompt
1132
+ generateBatch(batchIndex);
1133
+ }
1134
+ } catch (err) {
1135
+ console.error("Error parsing CSV:", err);
1136
+ // Try again
1137
+ generateBatch(batchIndex);
1138
+ }
1139
+ }
1140
+ });
1141
+
1142
+ try {
1143
+ // For variation, rotate through engines for each batch
1144
+ const engineForBatch = selectedEngines[batchIndex % selectedEngines.length] || currentDataset.engine;
1145
+
1146
+ window.Poe.sendUserMessage(
1147
+ `@Claude-3.7-Sonnet You're expanding a dataset based on search results from ${engineForBatch}.
1148
+
1149
+ For the dataset "${currentDataset.name}" about "${currentSearchQuery}", please generate ${batchSize} more rows of data.
1150
+
1151
+ Use this exact CSV header: ${csvHeader}
1152
+
1153
+ The data should look realistic, as if it came from actual ${engineForBatch} search results for "${currentSearchQuery}".
1154
+ Include appropriate values for each field, maintaining the same patterns and types as seen in the existing data.
1155
+
1156
+ Only include the CSV data in your response (header + ${batchSize} rows), no explanations or additional text.`,
1157
+ {
1158
+ handler: `real-batch-${batchIndex}-handler`,
1159
+ stream: false,
1160
+ openChat: false
1161
+ }
1162
+ );
1163
+ } catch (err) {
1164
+ showError("Error sending message: " + err);
1165
+ // Fall back to AI data
1166
+ generateBatch(batchIndex + 1);
1167
+ }
1168
+ };
1169
+
1170
+ // Start generating batches
1171
+ generateBatch(0);
1172
+ }
1173
+
1174
+ function generateFullAIDataset(previewHeaders, csvHeader, currentRows, targetRows) {
1175
+ // Function to generate more rows in batches from AI
1176
+ const generateBatch = (batchIndex) => {
1177
+ const batchSize = 10;
1178
+ const startRow = currentRows + batchIndex * batchSize;
1179
+
1180
+ if (startRow >= targetRows) {
1181
+ // We've reached the target, show the full dataset
1182
+ showFullDataset();
1183
+ return;
1184
+ }
1185
+
1186
+ window.Poe.registerHandler(`batch-${batchIndex}-handler`, (result) => {
1187
+ if (result.status === "error") {
1188
+ showError("Error generating dataset rows");
1189
+ return;
1190
+ }
1191
+
1192
+ const message = result.responses[0];
1193
+
1194
+ if (message.status === "complete") {
1195
+ const content = message.content;
1196
+
1197
+ // Extract CSV content (remove markdown code block markers)
1198
+ let csvContent = content.replace(/```csv\n|```\n|```/g, '').trim();
1199
+
1200
+ // If there are multiple code blocks, try to find one with CSV data
1201
+ if (csvContent.includes('```')) {
1202
+ const codeBlocks = content.match(/```(?:csv)?\n([\s\S]*?)```/g) || [];
1203
+ if (codeBlocks.length > 0) {
1204
+ csvContent = codeBlocks[0].replace(/```(?:csv)?\n|```/g, '').trim();
1205
+ }
1206
+ }
1207
+
1208
+ try {
1209
+ // Parse the CSV
1210
+ const results = Papa.parse(csvContent, {
1211
+ header: true,
1212
+ skipEmptyLines: true
1213
+ });
1214
+
1215
+ if (results.data && results.data.length > 0) {
1216
+ // Add the new rows
1217
+ fullDatasetRows = [...fullDatasetRows, ...results.data];
1218
+ currentRows += results.data.length;
1219
+
1220
+ // Update progress
1221
+ updateGenerationProgress(currentRows, targetRows);
1222
+
1223
+ // Generate next batch
1224
+ generateBatch(batchIndex + 1);
1225
+ } else {
1226
+ // Try again with a different prompt
1227
+ generateBatch(batchIndex);
1228
+ }
1229
+ } catch (err) {
1230
+ console.error("Error parsing CSV:", err);
1231
+ // Try again
1232
+ generateBatch(batchIndex);
1233
+ }
1234
+ }
1235
+ });
1236
+
1237
+ try {
1238
+ const tagsStr = currentDataset.tags.join(', ');
1239
+ window.Poe.sendUserMessage(
1240
+ `@Claude-3.7-Sonnet For the dataset '${currentDataset.name}' about '${currentSearchQuery}' with tags '${tagsStr}',
1241
+ please generate ${batchSize} more sample rows in CSV format. Use the same CSV header: ${csvHeader}
1242
+ Only include the CSV data in your response, no explanations or additional text.`,
1243
+ {
1244
+ handler: `batch-${batchIndex}-handler`,
1245
+ stream: false,
1246
+ openChat: false
1247
+ }
1248
+ );
1249
+ } catch (err) {
1250
+ showError("Error sending message: " + err);
1251
+ }
1252
+ };
1253
+
1254
+ // Start generating batches
1255
+ generateBatch(0);
1256
+ }
1257
+
1258
+ function updateGenerationProgress(current, total) {
1259
+ rowsCount.textContent = current;
1260
+ const percentage = Math.min(100, Math.floor((current / total) * 100));
1261
+ progressBar.style.width = `${percentage}%`;
1262
+ }
1263
+
1264
+ function showFullDataset() {
1265
+ // Hide generation status
1266
+ generateStatus.classList.add('hidden');
1267
 
1268
+ // Show full dataset section
1269
+ fullDatasetSection.classList.remove('hidden');
 
 
 
 
1270
 
1271
+ // Get headers from the data
1272
+ const headers = Object.keys(fullDatasetRows[0] || {});
1273
 
1274
+ // Create and display the table
1275
+ createTable(fullTable, fullDatasetRows.slice(0, 10), headers);
 
 
 
 
 
1276
 
1277
+ // Add a note about showing limited rows
1278
+ const note = document.createElement('p');
1279
+ note.className = 'text-sm text-gray-600 dark:text-gray-400 mt-2';
1280
+ note.textContent = `Showing 10 of ${fullDatasetRows.length} rows. Use the download buttons to get the complete dataset.`;
1281
+ fullTable.appendChild(note);
 
 
 
 
 
 
 
 
 
1282
  }
1283
 
1284
+ function downloadData(format) {
1285
+ if (fullDatasetRows.length === 0) return;
1286
+
1287
+ const filename = `${currentDataset.name.replace(/\s+/g, '_')}_dataset`;
1288
+
1289
+ switch(format) {
1290
+ case 'csv':
1291
+ downloadCsv(filename);
1292
+ break;
1293
+ case 'json':
1294
+ downloadJson(filename);
1295
+ break;
1296
+ case 'parquet':
1297
+ // Show a notification that this format is simulated
1298
+ showNotification("Parquet format download simulated - actual conversion would require a server component");
1299
+ downloadJson(filename + "_parquet_simulated");
1300
+ break;
1301
+ }
1302
  }
1303
 
1304
+ function downloadCsv(filename) {
1305
+ // Convert data to CSV
1306
+ const csv = Papa.unparse(fullDatasetRows);
1307
+
1308
+ // Create a blob and download link
1309
+ const blob = new Blob([csv], { type: 'text/csv' });
1310
+ const url = URL.createObjectURL(blob);
1311
+ const a = document.createElement('a');
1312
+
1313
+ a.href = url;
1314
+ a.download = `${filename}.csv`;
1315
+ document.body.appendChild(a);
1316
+ a.click();
1317
+
1318
+ // Clean up
1319
+ setTimeout(() => {
1320
+ document.body.removeChild(a);
1321
+ URL.revokeObjectURL(url);
1322
+ }, 100);
1323
  }
1324
 
1325
+ function downloadJson(filename) {
1326
+ // Convert data to JSON
1327
+ const json = JSON.stringify(fullDatasetRows, null, 2);
1328
 
1329
+ // Create a blob and download link
1330
+ const blob = new Blob([json], { type: 'application/json' });
1331
+ const url = URL.createObjectURL(blob);
1332
+ const a = document.createElement('a');
 
 
1333
 
1334
+ a.href = url;
1335
+ a.download = `${filename}.json`;
1336
+ document.body.appendChild(a);
1337
+ a.click();
1338
+
1339
+ // Clean up
1340
+ setTimeout(() => {
1341
+ document.body.removeChild(a);
1342
+ URL.revokeObjectURL(url);
1343
+ }, 100);
1344
  }
1345
 
1346
+ function showSearchPage() {
1347
+ searchPage.classList.remove('hidden');
1348
+ datasetPage.classList.add('hidden');
 
 
1349
  }
1350
 
1351
+ function showError(message) {
1352
+ console.error(message);
1353
+ showNotification(message, true);
 
 
 
 
 
 
1354
  }
1355
 
1356
+ function showNotification(message, isError = false) {
1357
+ const notification = document.createElement('div');
1358
+ notification.className = `fixed bottom-4 right-4 px-6 py-3 rounded-lg shadow-lg ${
1359
+ isError
1360
+ ? 'bg-red-500 text-white'
1361
+ : 'bg-green-500 text-white'
1362
+ } z-50 transition-opacity duration-300`;
1363
+ notification.textContent = message;
1364
 
1365
+ document.body.appendChild(notification);
 
1366
 
1367
+ setTimeout(() => {
1368
+ notification.style.opacity = '0';
1369
+ setTimeout(() => {
1370
+ document.body.removeChild(notification);
1371
+ }, 300);
1372
+ }, 3000);
 
1373
  }
1374
 
1375
+ function showPlaceholderDatasets() {
1376
+ const placeholders = [
1377
+ {
1378
+ name: "NewsEventsPredict",
1379
+ tags: ["classification", "media", "trend"],
1380
+ isReal: true,
1381
+ engine: "AlltheInternet.com"
1382
+ },
1383
+ {
1384
+ name: "FinancialForecast",
1385
+ tags: ["economy", "stocks", "regression"],
1386
+ isReal: false
1387
+ },
1388
+ {
1389
+ name: "HealthMonitor",
1390
+ tags: ["science", "real-time", "anomaly detection"],
1391
+ isReal: true,
1392
+ engine: "DuckDuckGo.com"
1393
+ },
1394
+ {
1395
+ name: "SportsAnalysis",
1396
+ tags: ["classification", "performance", "player tracking"],
1397
+ isReal: false
1398
+ },
1399
+ {
1400
+ name: "RetailSalesAnalyzer",
1401
+ tags: ["consumer behavior", "sales trend", "segmentation"],
1402
+ isReal: true,
1403
+ engine: "Bing.com"
1404
+ },
1405
+ {
1406
+ name: "SocialMediaSentiment",
1407
+ tags: ["text classification", "opinion mining", "NLP"],
1408
+ isReal: false
1409
+ }
1410
+ ];
1411
+
1412
+ currentDatasets = placeholders;
1413
+ displayDatasets(placeholders);
1414
+ loadMoreContainer.classList.remove('hidden');
1415
+ }
1416
  function searchWithRealData(query) {
1417
  // Randomly select a search engine from the user's selected engines
1418
  currentEngine = selectedEngines[Math.floor(Math.random() * selectedEngines.length)];
 
1525
  }
1526
  }
1527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1528
  function loadMoreDatasets() {
1529
  currentPage++;
1530
 
 
1990
  );
1991
  } catch (err) {
1992
  showError("Error sending message: " + err);
1993
+ // Fall back to AI data
1994
+ generateBatch(batchIndex + 1);
1995
  }
1996
  };
1997
 
 
2803
  engine_settings_button = gr.Button("Configure Search Engines", icon="https://img.icons8.com/ios-filled/50/000000/settings--v1.png", size="sm")
2804
 
2805
  # Engine Selection Modal
2806
+ with gr.Column(visible=False, id="engine-modal-container") as engine_modal:
2807
+ with gr.Blocks():
2808
+ with gr.Row():
2809
+ gr.Markdown("### Search Engine Settings", elem_classes="text-xl font-bold")
2810
+ close_modal_btn = gr.Button("")
2811
+ gr.Markdown("Select which search engines to use for real data retrieval. A diverse selection improves results.")
2812
+ engine_options_html_comp = gr.HTML(elem_id="engine-options")
2813
+ with gr.Row():
2814
+ select_all_engines_btn = gr.Button("Select All")
2815
+ deselect_all_engines_btn = gr.Button("Deselect All")
2816
+ save_engines_btn = gr.Button("Save Settings", variant="primary")
2817
 
2818
  # --- Dataset Detail Page UI ---
2819
  with gr.Column(visible=False, elem_id="dataset-page") as dataset_page:
 
2860
  if "I'm sorry" in line or "policy" in line: raise gr.Error("Inappropriate content detected.")
2861
  if generated_count >= MAX_NB_ITEMS_PER_GENERATION_CALL: break
2862
 
2863
+ match = re.match(r"^\s*\d+\.\s+(.+?)\s+\((.+?)\)", line) # Parse line format
2864
  if match:
2865
  dataset_name, tags = match.groups()
2866
  dataset_name, tags = dataset_name.strip(), tags.strip()
 
3125
  outputs=[selected_engines_state, current_engine_state, gr.Info()]
3126
  )
3127
 
3128
+ engine_settings_button.click(lambda: gr.Column(visible=True), outputs=[engine_modal])
3129
+ close_modal_btn.click(lambda: gr.Column(visible=False), outputs=[engine_modal])
3130
+
3131
  # Initial App Load Logic
3132
  @demo.load(outputs=([search_page, dataset_page, dataset_title_md, dataset_description_md, dataset_source_badge, dataset_source_info, dataset_share_textbox, full_dataset_section, save_dataset_button, open_dataset_message, search_bar] + # Outputs for detail page and search bar
3133
  buttons + [generated_texts_state] + # Outputs for search results buttons and state
 
3190
 
3191
 
3192
  if __name__ == "__main__":
3193
+ demo.launch(share=False, server_name="0.0.0.0")