Spaces:
Running
Running
const express = require('express'); | |
const fs = require('fs'); | |
const path = require('path'); | |
const axios = require('axios'); | |
const NodeCache = require('node-cache'); | |
const router = express.Router(); | |
const cache = new NodeCache({ stdTTL: 3600 }); | |
const ALGORITHMS_PATH = path.join(__dirname, '../../data/algorithms.json'); | |
const TIMELINE_CACHE_PATH = path.join(__dirname, '../../data/timeline-cache.json'); | |
const PUBMED_BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils'; | |
function loadAlgorithms() { | |
const data = fs.readFileSync(ALGORITHMS_PATH, 'utf8'); | |
return JSON.parse(data); | |
} | |
function loadTimelineCache() { | |
try { | |
if (fs.existsSync(TIMELINE_CACHE_PATH)) { | |
const data = fs.readFileSync(TIMELINE_CACHE_PATH, 'utf8'); | |
return JSON.parse(data); | |
} | |
} catch (error) { | |
console.warn('Error loading timeline cache:', error.message); | |
} | |
return {}; | |
} | |
function saveTimelineCache(cache) { | |
try { | |
fs.writeFileSync(TIMELINE_CACHE_PATH, JSON.stringify(cache, null, 2)); | |
} catch (error) { | |
console.error('Error saving timeline cache:', error.message); | |
} | |
} | |
function getCacheKey(algorithmKey, year) { | |
return `${algorithmKey}-${year}`; | |
} | |
function isCurrentYear(year) { | |
return year === new Date().getFullYear(); | |
} | |
async function searchAlgorithmUsage(problem, algorithmKey, algorithmData) { | |
try { | |
const synonymQueries = algorithmData.synonyms.map(synonym => | |
`("${problem}" AND "${synonym}")` | |
).join(' OR '); | |
// Build blacklist exclusions if they exist | |
let blacklistExclusions = ''; | |
if (algorithmData.blacklist && algorithmData.blacklist.length > 0) { | |
const blacklistTerms = algorithmData.blacklist.map(term => `NOT "${term}"`).join(' '); | |
blacklistExclusions = ` ${blacklistTerms}`; | |
} | |
// Add filters to exclude review papers, meta-analyses, and systematic reviews | |
const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; | |
const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; | |
// Debug logging for CNN and GAN specifically | |
if (algorithmKey === 'cnn') { | |
console.log(`CNN Search for "${problem}":`); | |
console.log(`Query: ${filteredQuery}`); | |
console.log(`URL: ${searchUrl}`); | |
} | |
if (algorithmKey === 'gan') { | |
console.log(`GAN Search for "${problem}":`); | |
console.log(`Synonym queries: ${synonymQueries}`); | |
console.log(`Blacklist exclusions: ${blacklistExclusions}`); | |
console.log(`Final query: ${filteredQuery}`); | |
console.log(`URL: ${searchUrl}`); | |
} | |
const response = await axios.get(searchUrl); | |
const count = parseInt(response.data.esearchresult.count) || 0; | |
// Debug logging for CNN and GAN results | |
if (algorithmKey === 'cnn') { | |
console.log(`CNN Results: ${count} papers found`); | |
console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3)); | |
} | |
if (algorithmKey === 'gan') { | |
console.log(`GAN Results: ${count} papers found`); | |
console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3)); | |
} | |
return { | |
algorithm: algorithmKey, | |
name: algorithmData.name, | |
category: algorithmData.category, | |
description: algorithmData.description, | |
count: count, | |
sampleIds: response.data.esearchresult.idlist?.slice(0, 3) || [] | |
}; | |
} catch (error) { | |
if (algorithmKey === 'cnn') { | |
console.error(`CNN Search Error:`, error.message); | |
} | |
return { | |
algorithm: algorithmKey, | |
name: algorithmData.name, | |
category: algorithmData.category, | |
description: algorithmData.description, | |
count: 0, | |
sampleIds: [] | |
}; | |
} | |
} | |
router.post('/problem', async (req, res) => { | |
try { | |
const { problem } = req.body; | |
if (!problem) { | |
return res.status(400).json({ error: 'Problem parameter is required' }); | |
} | |
const algorithms = loadAlgorithms(); | |
const results = []; | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
const result = await searchAlgorithmUsage(problem, key, algo); | |
results.push(result); | |
} | |
results.sort((a, b) => b.count - a.count); | |
res.json({ | |
problem, | |
totalAlgorithms: results.length, | |
results: results.filter(r => r.count > 0), | |
allResults: results | |
}); | |
} catch (error) { | |
res.status(500).json({ error: error.message }); | |
} | |
}); | |
async function delay(ms) { | |
return new Promise(resolve => setTimeout(resolve, ms)); | |
} | |
async function fetchAlgorithmCount(key, algo) { | |
const cacheKey = `dashboard-${key}`; | |
const cached = cache.get(cacheKey); | |
if (cached !== undefined) { | |
console.log(`${algo.name}: ${cached} results (cached)`); | |
return cached; | |
} | |
const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR '); | |
// Build blacklist exclusions if they exist | |
let blacklistExclusions = ''; | |
if (algo.blacklist && algo.blacklist.length > 0) { | |
const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' '); | |
blacklistExclusions = ` ${blacklistTerms}`; | |
} | |
// Add filters to exclude review papers, meta-analyses, and systematic reviews | |
const filteredQuery = `(${generalQuery})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; | |
try { | |
const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; | |
const response = await axios.get(searchUrl, { timeout: 15000 }); | |
const count = parseInt(response.data.esearchresult.count) || 0; | |
console.log(`${algo.name}: ${count} results for query: ${filteredQuery}`); | |
cache.set(cacheKey, count); | |
return count; | |
} catch (error) { | |
console.error(`Error fetching data for ${algo.name}:`, error.message); | |
return 0; | |
} | |
} | |
router.get('/dashboard-stats', async (req, res) => { | |
try { | |
const algorithms = loadAlgorithms(); | |
const stats = { | |
classical_ml: [], | |
deep_learning: [], | |
llms: [] | |
}; | |
// Process algorithms sequentially to avoid rate limiting | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
const count = await fetchAlgorithmCount(key, algo); | |
stats[algo.category].push({ | |
algorithm: key, | |
name: algo.name, | |
count: count | |
}); | |
// Add delay between requests to respect rate limits | |
await delay(200); | |
} | |
stats.classical_ml.sort((a, b) => b.count - a.count); | |
stats.deep_learning.sort((a, b) => b.count - a.count); | |
stats.llms.sort((a, b) => b.count - a.count); | |
res.json(stats); | |
} catch (error) { | |
res.status(500).json({ error: error.message }); | |
} | |
}); | |
router.get('/pubmed-link', (req, res) => { | |
const { problem, algorithm } = req.query; | |
if (!problem || !algorithm) { | |
return res.status(400).json({ error: 'Both problem and algorithm parameters are required' }); | |
} | |
const algorithms = loadAlgorithms(); | |
const algoData = algorithms.algorithms[algorithm]; | |
if (!algoData) { | |
return res.status(404).json({ error: 'Algorithm not found' }); | |
} | |
const synonymQueries = algoData.synonyms.map(synonym => | |
`("${problem}" AND "${synonym}")` | |
).join(' OR '); | |
// Build blacklist exclusions if they exist | |
let blacklistExclusions = ''; | |
if (algoData.blacklist && algoData.blacklist.length > 0) { | |
const blacklistTerms = algoData.blacklist.map(term => `NOT "${term}"`).join(' '); | |
blacklistExclusions = ` ${blacklistTerms}`; | |
} | |
// Add filters to exclude review papers for PubMed links too | |
const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; | |
const pubmedUrl = `https://pubmed.ncbi.nlm.nih.gov/?term=${encodeURIComponent(filteredQuery)}`; | |
res.json({ url: pubmedUrl }); | |
}); | |
async function fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount = 0) { | |
const diskCacheKey = getCacheKey(key, year); | |
// Check disk cache first for past years (which never change) | |
if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { | |
console.log(`Using disk cache for ${algo.name} (${year}): ${diskCache[diskCacheKey]}`); | |
return diskCache[diskCacheKey]; | |
} | |
// Check memory cache for current year | |
const memoryCacheKey = `timeline-${key}-${year}`; | |
const memCached = cache.get(memoryCacheKey); | |
if (isCurrentYear(year) && memCached !== undefined) { | |
return memCached; | |
} | |
const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR '); | |
const yearFilter = `"${year}"[Date - Publication]`; | |
// Build blacklist exclusions if they exist | |
let blacklistExclusions = ''; | |
if (algo.blacklist && algo.blacklist.length > 0) { | |
const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' '); | |
blacklistExclusions = ` ${blacklistTerms}`; | |
} | |
const filteredQuery = `(${generalQuery}) AND ${yearFilter}${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`; | |
try { | |
const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`; | |
const response = await axios.get(searchUrl, { timeout: 15000 }); | |
const count = parseInt(response.data.esearchresult.count) || 0; | |
// Save to appropriate cache | |
if (isCurrentYear(year)) { | |
// Current year: save to memory cache (expires) | |
cache.set(memoryCacheKey, count); | |
} else { | |
// Past years: save to disk cache (permanent) | |
diskCache[diskCacheKey] = count; | |
} | |
console.log(`Fetched ${algo.name} (${year}): ${count} papers`); | |
return count; | |
} catch (error) { | |
if (error.response?.status === 429 && retryCount < 3) { | |
const backoffTime = Math.pow(2, retryCount) * 1000; // 1s, 2s, 4s | |
console.log(`Rate limited for ${algo.name} (${year}), retrying in ${backoffTime}ms (attempt ${retryCount + 1})`); | |
await delay(backoffTime); | |
return fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount + 1); | |
} | |
console.error(`Error fetching timeline data for ${algo.name} (${year}):`, error.message); | |
return 0; | |
} | |
} | |
router.get('/timeline-stream', async (req, res) => { | |
const { startYear = 2015, endYear = 2024 } = req.query; | |
const start = parseInt(startYear); | |
const end = parseInt(endYear); | |
if (start > end || start < 2010 || end > 2024) { | |
return res.status(400).json({ error: 'Invalid year range' }); | |
} | |
// Set up Server-Sent Events | |
res.writeHead(200, { | |
'Content-Type': 'text/event-stream', | |
'Cache-Control': 'no-cache', | |
'Connection': 'keep-alive', | |
'Access-Control-Allow-Origin': '*', | |
'Access-Control-Allow-Headers': 'Cache-Control' | |
}); | |
const sendProgress = (data) => { | |
res.write(`data: ${JSON.stringify(data)}\n\n`); | |
}; | |
try { | |
const algorithms = loadAlgorithms(); | |
const diskCache = loadTimelineCache(); | |
let cacheUpdated = false; | |
const years = []; | |
for (let year = start; year <= end; year++) { | |
years.push(year); | |
} | |
const timelineData = []; | |
const algorithmsData = []; | |
// Initialize timeline structure | |
for (const year of years) { | |
timelineData.push({ year }); | |
} | |
// Count total operations | |
const totalAlgorithms = Object.keys(algorithms.algorithms).length; | |
const totalYears = years.length; | |
const totalOperations = totalAlgorithms * totalYears; | |
let completedOperations = 0; | |
let cachedResults = 0; | |
let fetchedResults = 0; | |
// Count cached vs fetched upfront | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
for (const year of years) { | |
const diskCacheKey = getCacheKey(key, year); | |
if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { | |
cachedResults++; | |
} else { | |
fetchedResults++; | |
} | |
} | |
} | |
sendProgress({ | |
type: 'init', | |
totalOperations, | |
cachedResults, | |
fetchedResults, | |
message: 'Starting timeline data collection...' | |
}); | |
// Process each algorithm | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
const algorithmTimeline = { | |
algorithm: key, | |
name: algo.name, | |
category: algo.category, | |
data: [] | |
}; | |
sendProgress({ | |
type: 'algorithm_start', | |
algorithm: algo.name, | |
progress: Math.round((completedOperations / totalOperations) * 100), | |
completed: completedOperations, | |
total: totalOperations | |
}); | |
// Get data for each year | |
for (const year of years) { | |
const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache); | |
algorithmTimeline.data.push({ year, count }); | |
// Add to timeline data structure | |
const yearIndex = timelineData.findIndex(item => item.year === year); | |
if (yearIndex !== -1) { | |
timelineData[yearIndex][key] = count; | |
} | |
completedOperations++; | |
sendProgress({ | |
type: 'year_complete', | |
algorithm: algo.name, | |
year, | |
count, | |
progress: Math.round((completedOperations / totalOperations) * 100), | |
completed: completedOperations, | |
total: totalOperations | |
}); | |
// Check if we made an API call and need to save cache | |
const diskCacheKey = getCacheKey(key, year); | |
if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) { | |
cacheUpdated = true; | |
} | |
// Add delay only if we made an actual API call | |
if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) { | |
await delay(500); | |
} | |
} | |
algorithmsData.push(algorithmTimeline); | |
sendProgress({ | |
type: 'algorithm_complete', | |
algorithm: algo.name, | |
progress: Math.round((completedOperations / totalOperations) * 100), | |
completed: completedOperations, | |
total: totalOperations | |
}); | |
} | |
// Save cache if updated | |
if (cacheUpdated) { | |
saveTimelineCache(diskCache); | |
sendProgress({ | |
type: 'cache_saved', | |
message: 'Timeline cache updated and saved to disk' | |
}); | |
} | |
// Send final results | |
sendProgress({ | |
type: 'complete', | |
timelineData, | |
algorithms: algorithmsData, | |
yearRange: { start, end }, | |
cacheStats: { | |
cached: cachedResults, | |
fetched: fetchedResults | |
} | |
}); | |
res.end(); | |
} catch (error) { | |
sendProgress({ | |
type: 'error', | |
error: error.message | |
}); | |
res.end(); | |
} | |
}); | |
router.get('/timeline', async (req, res) => { | |
try { | |
const { startYear = 2015, endYear = 2024 } = req.query; | |
const start = parseInt(startYear); | |
const end = parseInt(endYear); | |
if (start > end || start < 2010 || end > 2024) { | |
return res.status(400).json({ error: 'Invalid year range' }); | |
} | |
const algorithms = loadAlgorithms(); | |
const diskCache = loadTimelineCache(); | |
let cacheUpdated = false; | |
const years = []; | |
for (let year = start; year <= end; year++) { | |
years.push(year); | |
} | |
const timelineData = []; | |
const algorithmsData = []; | |
// Initialize timeline structure | |
for (const year of years) { | |
timelineData.push({ year }); | |
} | |
// Count how many API calls we'll need to make | |
let totalApiCalls = 0; | |
let cachedResults = 0; | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
for (const year of years) { | |
const diskCacheKey = getCacheKey(key, year); | |
if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) { | |
cachedResults++; | |
} else { | |
totalApiCalls++; | |
} | |
} | |
} | |
console.log(`Timeline request: ${cachedResults} cached results, ${totalApiCalls} API calls needed`); | |
// Process each algorithm | |
for (const [key, algo] of Object.entries(algorithms.algorithms)) { | |
const algorithmTimeline = { | |
algorithm: key, | |
name: algo.name, | |
category: algo.category, | |
data: [] | |
}; | |
// Get data for each year | |
for (const year of years) { | |
const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache); | |
algorithmTimeline.data.push({ year, count }); | |
// Add to timeline data structure | |
const yearIndex = timelineData.findIndex(item => item.year === year); | |
if (yearIndex !== -1) { | |
timelineData[yearIndex][key] = count; | |
} | |
// Check if we made an API call (not cached) and need to save cache | |
const diskCacheKey = getCacheKey(key, year); | |
if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) { | |
cacheUpdated = true; | |
} | |
// Add delay only if we made an actual API call | |
if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) { | |
await delay(500); | |
} | |
} | |
algorithmsData.push(algorithmTimeline); | |
console.log(`Completed timeline data for ${algo.name}`); | |
} | |
// Save updated cache to disk if needed | |
if (cacheUpdated) { | |
saveTimelineCache(diskCache); | |
console.log('Timeline cache updated and saved to disk'); | |
} | |
res.json({ | |
timelineData, | |
algorithms: algorithmsData, | |
yearRange: { start, end }, | |
cacheStats: { | |
cached: cachedResults, | |
fetched: totalApiCalls | |
} | |
}); | |
} catch (error) { | |
console.error('Timeline API error:', error); | |
res.status(500).json({ error: error.message }); | |
} | |
}); | |
module.exports = router; |