Spaces:

lion-ai
/

MedicalAIWiki

Running

File size: 18,040 Bytes

const express = require('express');
const fs = require('fs');
const path = require('path');
const axios = require('axios');
const NodeCache = require('node-cache');
const router = express.Router();

const cache = new NodeCache({ stdTTL: 3600 });

const ALGORITHMS_PATH = path.join(__dirname, '../../data/algorithms.json');
const TIMELINE_CACHE_PATH = path.join(__dirname, '../../data/timeline-cache.json');
const PUBMED_BASE_URL = 'https://eutils.ncbi.nlm.nih.gov/entrez/eutils';

function loadAlgorithms() {
  const data = fs.readFileSync(ALGORITHMS_PATH, 'utf8');
  return JSON.parse(data);
}

function loadTimelineCache() {
  try {
    if (fs.existsSync(TIMELINE_CACHE_PATH)) {
      const data = fs.readFileSync(TIMELINE_CACHE_PATH, 'utf8');
      return JSON.parse(data);
    }
  } catch (error) {
    console.warn('Error loading timeline cache:', error.message);
  }
  return {};
}

function saveTimelineCache(cache) {
  try {
    fs.writeFileSync(TIMELINE_CACHE_PATH, JSON.stringify(cache, null, 2));
  } catch (error) {
    console.error('Error saving timeline cache:', error.message);
  }
}

function getCacheKey(algorithmKey, year) {
  return `${algorithmKey}-${year}`;
}

function isCurrentYear(year) {
  return year === new Date().getFullYear();
}

async function searchAlgorithmUsage(problem, algorithmKey, algorithmData) {
  try {
    const synonymQueries = algorithmData.synonyms.map(synonym => 
      `("${problem}" AND "${synonym}")`
    ).join(' OR ');
    
    // Build blacklist exclusions if they exist
    let blacklistExclusions = '';
    if (algorithmData.blacklist && algorithmData.blacklist.length > 0) {
      const blacklistTerms = algorithmData.blacklist.map(term => `NOT "${term}"`).join(' ');
      blacklistExclusions = ` ${blacklistTerms}`;
    }
    
    // Add filters to exclude review papers, meta-analyses, and systematic reviews
    const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`;
    
    const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`;
    
    // Debug logging for CNN and GAN specifically
    if (algorithmKey === 'cnn') {
      console.log(`CNN Search for "${problem}":`);
      console.log(`Query: ${filteredQuery}`);
      console.log(`URL: ${searchUrl}`);
    }
    
    if (algorithmKey === 'gan') {
      console.log(`GAN Search for "${problem}":`);
      console.log(`Synonym queries: ${synonymQueries}`);
      console.log(`Blacklist exclusions: ${blacklistExclusions}`);
      console.log(`Final query: ${filteredQuery}`);
      console.log(`URL: ${searchUrl}`);
    }
    
    const response = await axios.get(searchUrl);
    const count = parseInt(response.data.esearchresult.count) || 0;
    
    // Debug logging for CNN and GAN results
    if (algorithmKey === 'cnn') {
      console.log(`CNN Results: ${count} papers found`);
      console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3));
    }
    
    if (algorithmKey === 'gan') {
      console.log(`GAN Results: ${count} papers found`);
      console.log(`Sample IDs:`, response.data.esearchresult.idlist?.slice(0, 3));
    }
    
    return {
      algorithm: algorithmKey,
      name: algorithmData.name,
      category: algorithmData.category,
      description: algorithmData.description,
      count: count,
      sampleIds: response.data.esearchresult.idlist?.slice(0, 3) || []
    };
  } catch (error) {
    if (algorithmKey === 'cnn') {
      console.error(`CNN Search Error:`, error.message);
    }
    return {
      algorithm: algorithmKey,
      name: algorithmData.name,
      category: algorithmData.category,
      description: algorithmData.description,
      count: 0,
      sampleIds: []
    };
  }
}

router.post('/problem', async (req, res) => {
  try {
    const { problem } = req.body;
    if (!problem) {
      return res.status(400).json({ error: 'Problem parameter is required' });
    }

    const algorithms = loadAlgorithms();
    const results = [];
    
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      const result = await searchAlgorithmUsage(problem, key, algo);
      results.push(result);
    }
    
    results.sort((a, b) => b.count - a.count);
    
    res.json({
      problem,
      totalAlgorithms: results.length,
      results: results.filter(r => r.count > 0),
      allResults: results
    });
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

async function delay(ms) {
  return new Promise(resolve => setTimeout(resolve, ms));
}

async function fetchAlgorithmCount(key, algo) {
  const cacheKey = `dashboard-${key}`;
  const cached = cache.get(cacheKey);
  if (cached !== undefined) {
    console.log(`${algo.name}: ${cached} results (cached)`);
    return cached;
  }

  const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR ');
  
  // Build blacklist exclusions if they exist
  let blacklistExclusions = '';
  if (algo.blacklist && algo.blacklist.length > 0) {
    const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' ');
    blacklistExclusions = ` ${blacklistTerms}`;
  }
  
  // Add filters to exclude review papers, meta-analyses, and systematic reviews
  const filteredQuery = `(${generalQuery})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`;
  
  try {
    const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`;
    const response = await axios.get(searchUrl, { timeout: 15000 });
    const count = parseInt(response.data.esearchresult.count) || 0;
    
    console.log(`${algo.name}: ${count} results for query: ${filteredQuery}`);
    cache.set(cacheKey, count);
    return count;
  } catch (error) {
    console.error(`Error fetching data for ${algo.name}:`, error.message);
    return 0;
  }
}

router.get('/dashboard-stats', async (req, res) => {
  try {
    const algorithms = loadAlgorithms();
    const stats = {
      classical_ml: [],
      deep_learning: [],
      llms: []
    };
    
    // Process algorithms sequentially to avoid rate limiting
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      const count = await fetchAlgorithmCount(key, algo);
      
      stats[algo.category].push({
        algorithm: key,
        name: algo.name,
        count: count
      });
      
      // Add delay between requests to respect rate limits
      await delay(200);
    }
    
    stats.classical_ml.sort((a, b) => b.count - a.count);
    stats.deep_learning.sort((a, b) => b.count - a.count);
    stats.llms.sort((a, b) => b.count - a.count);
    
    res.json(stats);
  } catch (error) {
    res.status(500).json({ error: error.message });
  }
});

router.get('/pubmed-link', (req, res) => {
  const { problem, algorithm } = req.query;
  if (!problem || !algorithm) {
    return res.status(400).json({ error: 'Both problem and algorithm parameters are required' });
  }
  
  const algorithms = loadAlgorithms();
  const algoData = algorithms.algorithms[algorithm];
  
  if (!algoData) {
    return res.status(404).json({ error: 'Algorithm not found' });
  }
  
  const synonymQueries = algoData.synonyms.map(synonym => 
    `("${problem}" AND "${synonym}")`
  ).join(' OR ');
  
  // Build blacklist exclusions if they exist
  let blacklistExclusions = '';
  if (algoData.blacklist && algoData.blacklist.length > 0) {
    const blacklistTerms = algoData.blacklist.map(term => `NOT "${term}"`).join(' ');
    blacklistExclusions = ` ${blacklistTerms}`;
  }
  
  // Add filters to exclude review papers for PubMed links too
  const filteredQuery = `(${synonymQueries})${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`;
  
  const pubmedUrl = `https://pubmed.ncbi.nlm.nih.gov/?term=${encodeURIComponent(filteredQuery)}`;
  
  res.json({ url: pubmedUrl });
});

async function fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount = 0) {
  const diskCacheKey = getCacheKey(key, year);
  
  // Check disk cache first for past years (which never change)
  if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) {
    console.log(`Using disk cache for ${algo.name} (${year}): ${diskCache[diskCacheKey]}`);
    return diskCache[diskCacheKey];
  }
  
  // Check memory cache for current year
  const memoryCacheKey = `timeline-${key}-${year}`;
  const memCached = cache.get(memoryCacheKey);
  if (isCurrentYear(year) && memCached !== undefined) {
    return memCached;
  }

  const generalQuery = algo.synonyms.map(s => `"${s}"`).join(' OR ');
  const yearFilter = `"${year}"[Date - Publication]`;
  
  // Build blacklist exclusions if they exist
  let blacklistExclusions = '';
  if (algo.blacklist && algo.blacklist.length > 0) {
    const blacklistTerms = algo.blacklist.map(term => `NOT "${term}"`).join(' ');
    blacklistExclusions = ` ${blacklistTerms}`;
  }
  
  const filteredQuery = `(${generalQuery}) AND ${yearFilter}${blacklistExclusions} NOT Review[Publication Type] NOT Meta-Analysis[Publication Type] NOT Systematic Review[Publication Type]`;
  
  try {
    const searchUrl = `${PUBMED_BASE_URL}/esearch.fcgi?db=pubmed&term=${encodeURIComponent(filteredQuery)}&retmode=json`;
    const response = await axios.get(searchUrl, { timeout: 15000 });
    const count = parseInt(response.data.esearchresult.count) || 0;
    
    // Save to appropriate cache
    if (isCurrentYear(year)) {
      // Current year: save to memory cache (expires)
      cache.set(memoryCacheKey, count);
    } else {
      // Past years: save to disk cache (permanent)
      diskCache[diskCacheKey] = count;
    }
    
    console.log(`Fetched ${algo.name} (${year}): ${count} papers`);
    return count;
  } catch (error) {
    if (error.response?.status === 429 && retryCount < 3) {
      const backoffTime = Math.pow(2, retryCount) * 1000; // 1s, 2s, 4s
      console.log(`Rate limited for ${algo.name} (${year}), retrying in ${backoffTime}ms (attempt ${retryCount + 1})`);
      await delay(backoffTime);
      return fetchAlgorithmCountByYear(key, algo, year, diskCache, retryCount + 1);
    }
    
    console.error(`Error fetching timeline data for ${algo.name} (${year}):`, error.message);
    return 0;
  }
}

router.get('/timeline-stream', async (req, res) => {
  const { startYear = 2015, endYear = 2024 } = req.query;
  const start = parseInt(startYear);
  const end = parseInt(endYear);
  
  if (start > end || start < 2010 || end > 2024) {
    return res.status(400).json({ error: 'Invalid year range' });
  }
  
  // Set up Server-Sent Events
  res.writeHead(200, {
    'Content-Type': 'text/event-stream',
    'Cache-Control': 'no-cache',
    'Connection': 'keep-alive',
    'Access-Control-Allow-Origin': '*',
    'Access-Control-Allow-Headers': 'Cache-Control'
  });
  
  const sendProgress = (data) => {
    res.write(`data: ${JSON.stringify(data)}\n\n`);
  };
  
  try {
    const algorithms = loadAlgorithms();
    const diskCache = loadTimelineCache();
    let cacheUpdated = false;
    
    const years = [];
    for (let year = start; year <= end; year++) {
      years.push(year);
    }
    
    const timelineData = [];
    const algorithmsData = [];
    
    // Initialize timeline structure
    for (const year of years) {
      timelineData.push({ year });
    }
    
    // Count total operations
    const totalAlgorithms = Object.keys(algorithms.algorithms).length;
    const totalYears = years.length;
    const totalOperations = totalAlgorithms * totalYears;
    
    let completedOperations = 0;
    let cachedResults = 0;
    let fetchedResults = 0;
    
    // Count cached vs fetched upfront
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      for (const year of years) {
        const diskCacheKey = getCacheKey(key, year);
        if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) {
          cachedResults++;
        } else {
          fetchedResults++;
        }
      }
    }
    
    sendProgress({
      type: 'init',
      totalOperations,
      cachedResults,
      fetchedResults,
      message: 'Starting timeline data collection...'
    });
    
    // Process each algorithm
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      const algorithmTimeline = {
        algorithm: key,
        name: algo.name,
        category: algo.category,
        data: []
      };
      
      sendProgress({
        type: 'algorithm_start',
        algorithm: algo.name,
        progress: Math.round((completedOperations / totalOperations) * 100),
        completed: completedOperations,
        total: totalOperations
      });
      
      // Get data for each year
      for (const year of years) {
        const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache);
        algorithmTimeline.data.push({ year, count });
        
        // Add to timeline data structure
        const yearIndex = timelineData.findIndex(item => item.year === year);
        if (yearIndex !== -1) {
          timelineData[yearIndex][key] = count;
        }
        
        completedOperations++;
        
        sendProgress({
          type: 'year_complete',
          algorithm: algo.name,
          year,
          count,
          progress: Math.round((completedOperations / totalOperations) * 100),
          completed: completedOperations,
          total: totalOperations
        });
        
        // Check if we made an API call and need to save cache
        const diskCacheKey = getCacheKey(key, year);
        if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) {
          cacheUpdated = true;
        }
        
        // Add delay only if we made an actual API call
        if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) {
          await delay(500);
        }
      }
      
      algorithmsData.push(algorithmTimeline);
      
      sendProgress({
        type: 'algorithm_complete',
        algorithm: algo.name,
        progress: Math.round((completedOperations / totalOperations) * 100),
        completed: completedOperations,
        total: totalOperations
      });
    }
    
    // Save cache if updated
    if (cacheUpdated) {
      saveTimelineCache(diskCache);
      sendProgress({
        type: 'cache_saved',
        message: 'Timeline cache updated and saved to disk'
      });
    }
    
    // Send final results
    sendProgress({
      type: 'complete',
      timelineData,
      algorithms: algorithmsData,
      yearRange: { start, end },
      cacheStats: {
        cached: cachedResults,
        fetched: fetchedResults
      }
    });
    
    res.end();
  } catch (error) {
    sendProgress({
      type: 'error',
      error: error.message
    });
    res.end();
  }
});

router.get('/timeline', async (req, res) => {
  try {
    const { startYear = 2015, endYear = 2024 } = req.query;
    const start = parseInt(startYear);
    const end = parseInt(endYear);
    
    if (start > end || start < 2010 || end > 2024) {
      return res.status(400).json({ error: 'Invalid year range' });
    }
    
    const algorithms = loadAlgorithms();
    const diskCache = loadTimelineCache();
    let cacheUpdated = false;
    
    const years = [];
    for (let year = start; year <= end; year++) {
      years.push(year);
    }
    
    const timelineData = [];
    const algorithmsData = [];
    
    // Initialize timeline structure
    for (const year of years) {
      timelineData.push({ year });
    }
    
    // Count how many API calls we'll need to make
    let totalApiCalls = 0;
    let cachedResults = 0;
    
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      for (const year of years) {
        const diskCacheKey = getCacheKey(key, year);
        if (!isCurrentYear(year) && diskCache[diskCacheKey] !== undefined) {
          cachedResults++;
        } else {
          totalApiCalls++;
        }
      }
    }
    
    console.log(`Timeline request: ${cachedResults} cached results, ${totalApiCalls} API calls needed`);
    
    // Process each algorithm
    for (const [key, algo] of Object.entries(algorithms.algorithms)) {
      const algorithmTimeline = {
        algorithm: key,
        name: algo.name,
        category: algo.category,
        data: []
      };
      
      // Get data for each year
      for (const year of years) {
        const count = await fetchAlgorithmCountByYear(key, algo, year, diskCache);
        algorithmTimeline.data.push({ year, count });
        
        // Add to timeline data structure
        const yearIndex = timelineData.findIndex(item => item.year === year);
        if (yearIndex !== -1) {
          timelineData[yearIndex][key] = count;
        }
        
        // Check if we made an API call (not cached) and need to save cache
        const diskCacheKey = getCacheKey(key, year);
        if (!isCurrentYear(year) && diskCache[diskCacheKey] === count) {
          cacheUpdated = true;
        }
        
        // Add delay only if we made an actual API call
        if (isCurrentYear(year) || diskCache[diskCacheKey] === undefined) {
          await delay(500);
        }
      }
      
      algorithmsData.push(algorithmTimeline);
      console.log(`Completed timeline data for ${algo.name}`);
    }
    
    // Save updated cache to disk if needed
    if (cacheUpdated) {
      saveTimelineCache(diskCache);
      console.log('Timeline cache updated and saved to disk');
    }
    
    res.json({
      timelineData,
      algorithms: algorithmsData,
      yearRange: { start, end },
      cacheStats: {
        cached: cachedResults,
        fetched: totalApiCalls
      }
    });
  } catch (error) {
    console.error('Timeline API error:', error);
    res.status(500).json({ error: error.message });
  }
});

module.exports = router;