const express = require('express'); const fs = require('fs'); const path = require('path'); const axios = require('axios'); const NodeCache = require('node-cache'); const router = express.Router(); const cache = new NodeCache({ stdTTL: 3600 }); const ALGORITHMS_PATH = path.join(__dirname, '../../data/algorithms.json'); const TIMELINE_CACHE_PATH = path.join(__dirname, '../../data/timeline-cache.json'); const PUBMED_BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'; function loadAlgorithms() { const data = fs.readFileSync(ALGORITHMS_PATH, 'utf8'); return JSON.parse(data); } function loadTimelineCache() { try { if (fs.existsSync(TIMELINE_CACHE_PATH)) { const data = fs.readFileSync(TIMELINE_CACHE_PATH, 'utf8'); return JSON.parse(data); } } catch (error) { console.warn('Error loading timeline cache:', error.message); } return {}; } function saveTimelineCache(cache) { try { fs.writeFileSync(TIMELINE_CACHE_PATH, JSON.stringify(cache, null, 2)); } catch (error) { console.error('Error saving timeline cache:', error.message); } } function getCacheKey(algorithmKey, year) { return `${algorithmKey}-${year}`; } function isCurrentYear(year) { return year === new Date().getFullYear(); } async function searchAlgorithmUsage(problem, algorithmKey, algorithmData) { try { const synonymQueries = algorithmData.synonyms.map(synonym => `("${problem}" AND "${synonym}")` ).join(' OR '); // Build blacklist exclusions if they exist let blacklistExclusions = ''; if (algorithmData.blacklist && algorithmData.blacklist.length > 0) { const blacklistTerms = algorithmData.blacklist.map(term => `NOT "${term}"`).join(' '); blacklistExclusions = ` ${blacklistTerms}`; } // Add filters to exclude review papers, meta-analyses, and systematic reviews const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; // Debug logging for CNN and GAN specifically if (algorithmKey === 'cnn') { console.log(`CNN Search for "${problem}":`); console.log(`Query: ${filteredQuery}`); console.log(`URL: ${searchUrl}`); } if (algorithmKey === 'gan') { console.log(`GAN Search for "${problem}":`); console.log(`Synonym queries: ${synonymQueries}`); console.log(`Blacklist exclusions: ${blacklistExclusions}`); console.log(`Final query: ${filteredQuery}`); console.log(`URL: ${searchUrl}`); } const response = await axios.get(searchUrl); const count = parseInt(response.data.esearchresult.count) || 0; // Debug logging for CNN and GAN results if (algorithmKey === 'cnn') { console.log(`CNN Results: ${count} papers found`); console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3)); } if (algorithmKey === 'gan') { console.log(`GAN Results: ${count} papers found`); console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3)); } return { algorithm: algorithmKey, name: algorithmData.name, category: algorithmData.category, description: algorithmData.description, count: count, sampleIds: response.data.esearchresult.idlist?.slice(0, 3) || [] }; } catch (error) { if (algorithmKey === 'cnn') { console.error(`CNN Search Error:`, error.message); } return { algorithm: algorithmKey, name: algorithmData.name, category: algorithmData.category, description: algorithmData.description, count: 0, sampleIds: [] }; } } router.post('/problem', async (req, res) => { try { const { problem } = req.body; if (!problem) { return res.status(400).json({ error: 'Problem parameter is required' }); } const algorithms = loadAlgorithms(); const results = []; for (const [key, algo] of Object.entries(algorithms.algorithms)) { const result = await searchAlgorithmUsage(problem, key, algo); results.push(result); } results.sort((a, b) => b.count - a.count); res.json({ problem, totalAlgorithms: results.length, results: results.filter(r => r.count > 0), allResults: results }); } catch (error) { res.status(500).json({ error: error.message }); } }); async function delay(ms) { return new Promise(resolve => setTimeout(resolve, ms)); } async function fetchAlgorithmCount(key, algo) { const cacheKey = `dashboard-${key}`; const cached = cache.get(cacheKey); if (cached !== undefined) { console.log(`${algo.name}: ${cached} results (cached)`); return cached; } const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR '); // Build blacklist exclusions if they exist let blacklistExclusions = ''; if (algo.blacklist && algo.blacklist.length > 0) { const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' '); blacklistExclusions = ` ${blacklistTerms}`; } // Add filters to exclude review papers, meta-analyses, and systematic reviews const filteredQuery = `(${generalQuery})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; try { const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; const response = await axios.get(searchUrl, { timeout: 15000 }); const count = parseInt(response.data.esearchresult.count) || 0; console.log(`${algo.name}: ${count} results for query: ${filteredQuery}`); cache.set(cacheKey, count); return count; } catch (error) { console.error(`Error fetching data for ${algo.name}:`, error.message); return 0; } } router.get('/dashboard-stats', async (req, res) => { try { const algorithms = loadAlgorithms(); const stats = { classical_ml: [], deep_learning: [], llms: [] }; // Process algorithms sequentially to avoid rate limiting for (const [key, algo] of Object.entries(algorithms.algorithms)) { const count = await fetchAlgorithmCount(key, algo); stats[algo.category].push({ algorithm: key, name: algo.name, count: count }); // Add delay between requests to respect rate limits await delay(200); } stats.classical_ml.sort((a, b) => b.count - a.count); stats.deep_learning.sort((a, b) => b.count - a.count); stats.llms.sort((a, b) => b.count - a.count); res.json(stats); } catch (error) { res.status(500).json({ error: error.message }); } }); router.get('/pubmed-link', (req, res) => { const { problem, algorithm } = req.query; if (!problem || !algorithm) { return res.status(400).json({ error: 'Both problem and algorithm parameters are required' }); } const algorithms = loadAlgorithms(); const algoData = algorithms.algorithms[algorithm]; if (!algoData) { return res.status(404).json({ error: 'Algorithm not found' }); } const synonymQueries = algoData.synonyms.map(synonym => `("${problem}" AND "${synonym}")` ).join(' OR '); // Build blacklist exclusions if they exist let blacklistExclusions = ''; if (algoData.blacklist && algoData.blacklist.length > 0) { const blacklistTerms = algoData.blacklist.map(term => `NOT "${term}"`).join(' '); blacklistExclusions = ` ${blacklistTerms}`; } // Add filters to exclude review papers for PubMed links too const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; const pubmedUrl = `https://pubmed.ncbi.nlm.nih.gov/?term=${encodeURIComponent(filteredQuery)}`; res.json({ url: pubmedUrl }); }); async function fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount = 0) { const diskCacheKey = getCacheKey(key, year); // Check disk cache first for past years (which never change) if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { console.log(`Using disk cache for ${algo.name} (${year}): ${diskCache[diskCacheKey]}`); return diskCache[diskCacheKey]; } // Check memory cache for current year const memoryCacheKey = `timeline-${key}-${year}`; const memCached = cache.get(memoryCacheKey); if (isCurrentYear(year) && memCached !== undefined) { return memCached; } const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR '); const yearFilter = `"${year}"[Date - Publication]`; // Build blacklist exclusions if they exist let blacklistExclusions = ''; if (algo.blacklist && algo.blacklist.length > 0) { const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' '); blacklistExclusions = ` ${blacklistTerms}`; } const filteredQuery = `(${generalQuery}) AND ${yearFilter}${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; try { const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; const response = await axios.get(searchUrl, { timeout: 15000 }); const count = parseInt(response.data.esearchresult.count) || 0; // Save to appropriate cache if (isCurrentYear(year)) { // Current year: save to memory cache (expires) cache.set(memoryCacheKey, count); } else { // Past years: save to disk cache (permanent) diskCache[diskCacheKey] = count; } console.log(`Fetched ${algo.name} (${year}): ${count} papers`); return count; } catch (error) { if (error.response?.status === 429 && retryCount < 3) { const backoffTime = Math.pow(2, retryCount) * 1000; // 1s, 2s, 4s console.log(`Rate limited for ${algo.name} (${year}), retrying in ${backoffTime}ms (attempt ${retryCount + 1})`); await delay(backoffTime); return fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount + 1); } console.error(`Error fetching timeline data for ${algo.name} (${year}):`, error.message); return 0; } } router.get('/timeline-stream', async (req, res) => { const { startYear = 2015, endYear = 2024 } = req.query; const start = parseInt(startYear); const end = parseInt(endYear); if (start > end || start < 2010 || end > 2024) { return res.status(400).json({ error: 'Invalid year range' }); } // Set up Server-Sent Events res.writeHead(200, { 'Content-Type': 'text/event-stream', 'Cache-Control': 'no-cache', 'Connection': 'keep-alive', 'Access-Control-Allow-Origin': '*', 'Access-Control-Allow-Headers': 'Cache-Control' }); const sendProgress = (data) => { res.write(`data: ${JSON.stringify(data)}\n\n`); }; try { const algorithms = loadAlgorithms(); const diskCache = loadTimelineCache(); let cacheUpdated = false; const years = []; for (let year = start; year <= end; year++) { years.push(year); } const timelineData = []; const algorithmsData = []; // Initialize timeline structure for (const year of years) { timelineData.push({ year }); } // Count total operations const totalAlgorithms = Object.keys(algorithms.algorithms).length; const totalYears = years.length; const totalOperations = totalAlgorithms * totalYears; let completedOperations = 0; let cachedResults = 0; let fetchedResults = 0; // Count cached vs fetched upfront for (const [key, algo] of Object.entries(algorithms.algorithms)) { for (const year of years) { const diskCacheKey = getCacheKey(key, year); if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { cachedResults++; } else { fetchedResults++; } } } sendProgress({ type: 'init', totalOperations, cachedResults, fetchedResults, message: 'Starting timeline data collection...' }); // Process each algorithm for (const [key, algo] of Object.entries(algorithms.algorithms)) { const algorithmTimeline = { algorithm: key, name: algo.name, category: algo.category, data: [] }; sendProgress({ type: 'algorithm_start', algorithm: algo.name, progress: Math.round((completedOperations / totalOperations) * 100), completed: completedOperations, total: totalOperations }); // Get data for each year for (const year of years) { const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache); algorithmTimeline.data.push({ year, count }); // Add to timeline data structure const yearIndex = timelineData.findIndex(item => item.year === year); if (yearIndex !== -1) { timelineData[yearIndex][key] = count; } completedOperations++; sendProgress({ type: 'year_complete', algorithm: algo.name, year, count, progress: Math.round((completedOperations / totalOperations) * 100), completed: completedOperations, total: totalOperations }); // Check if we made an API call and need to save cache const diskCacheKey = getCacheKey(key, year); if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) { cacheUpdated = true; } // Add delay only if we made an actual API call if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) { await delay(500); } } algorithmsData.push(algorithmTimeline); sendProgress({ type: 'algorithm_complete', algorithm: algo.name, progress: Math.round((completedOperations / totalOperations) * 100), completed: completedOperations, total: totalOperations }); } // Save cache if updated if (cacheUpdated) { saveTimelineCache(diskCache); sendProgress({ type: 'cache_saved', message: 'Timeline cache updated and saved to disk' }); } // Send final results sendProgress({ type: 'complete', timelineData, algorithms: algorithmsData, yearRange: { start, end }, cacheStats: { cached: cachedResults, fetched: fetchedResults } }); res.end(); } catch (error) { sendProgress({ type: 'error', error: error.message }); res.end(); } }); router.get('/timeline', async (req, res) => { try { const { startYear = 2015, endYear = 2024 } = req.query; const start = parseInt(startYear); const end = parseInt(endYear); if (start > end || start < 2010 || end > 2024) { return res.status(400).json({ error: 'Invalid year range' }); } const algorithms = loadAlgorithms(); const diskCache = loadTimelineCache(); let cacheUpdated = false; const years = []; for (let year = start; year <= end; year++) { years.push(year); } const timelineData = []; const algorithmsData = []; // Initialize timeline structure for (const year of years) { timelineData.push({ year }); } // Count how many API calls we'll need to make let totalApiCalls = 0; let cachedResults = 0; for (const [key, algo] of Object.entries(algorithms.algorithms)) { for (const year of years) { const diskCacheKey = getCacheKey(key, year); if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { cachedResults++; } else { totalApiCalls++; } } } console.log(`Timeline request: ${cachedResults} cached results, ${totalApiCalls} API calls needed`); // Process each algorithm for (const [key, algo] of Object.entries(algorithms.algorithms)) { const algorithmTimeline = { algorithm: key, name: algo.name, category: algo.category, data: [] }; // Get data for each year for (const year of years) { const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache); algorithmTimeline.data.push({ year, count }); // Add to timeline data structure const yearIndex = timelineData.findIndex(item => item.year === year); if (yearIndex !== -1) { timelineData[yearIndex][key] = count; } // Check if we made an API call (not cached) and need to save cache const diskCacheKey = getCacheKey(key, year); if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) { cacheUpdated = true; } // Add delay only if we made an actual API call if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) { await delay(500); } } algorithmsData.push(algorithmTimeline); console.log(`Completed timeline data for ${algo.name}`); } // Save updated cache to disk if needed if (cacheUpdated) { saveTimelineCache(diskCache); console.log('Timeline cache updated and saved to disk'); } res.json({ timelineData, algorithms: algorithmsData, yearRange: { start, end }, cacheStats: { cached: cachedResults, fetched: totalApiCalls } }); } catch (error) { console.error('Timeline API error:', error); res.status(500).json({ error: error.message }); } }); module.exports = router;