import { Router } from 'express'; import { upload, validateUpload, FileProcessor } from './file-upload'; import { documentProcessor } from './document-processor'; import { storage } from './storage'; import { fileUploadSchema, documentProcessingSchema, batchProcessingSchema } from '@shared/schema'; import path from 'path'; const router = Router(); /** * Upload documents (multiple files supported) */ router.post('/upload', (req, res, next) => { upload.array('files', 10)(req, res, (err) => { if (err) { console.error('Multer upload error:', err); // Handle specific multer errors if (err.code === 'LIMIT_FILE_SIZE') { return res.status(400).json({ success: false, error: 'File too large', message: `File size exceeds the maximum limit of 50MB. File: ${err.field}` }); } if (err.code === 'LIMIT_FILE_COUNT') { return res.status(400).json({ success: false, error: 'Too many files', message: 'Maximum 10 files allowed per upload' }); } if (err.code === 'LIMIT_UNEXPECTED_FILE') { return res.status(400).json({ success: false, error: 'Unexpected file field', message: `Unexpected file field: ${err.field}` }); } // Handle file type errors if (err.message && err.message.includes('Unsupported file type')) { return res.status(400).json({ success: false, error: 'Unsupported file type', message: err.message }); } // Generic multer error return res.status(400).json({ success: false, error: 'File upload error', message: err.message || 'Unknown upload error' }); } next(); }); }, validateUpload, async (req, res) => { try { const files = req.files as Express.Multer.File[]; if (!files || files.length === 0) { return res.status(400).json({ success: false, error: 'No files received', message: 'No files were received by the server' }); } console.log(`Processing ${files.length} uploaded files`); const uploadedDocuments = []; for (const file of files) { console.log(`Processing file: ${file.originalname}, size: ${file.size} bytes, type: ${file.mimetype}`); // Extract title from filename or use provided title const title = req.body.title || path.basename(file.originalname, path.extname(file.originalname)); const source = req.body.source || `Uploaded file: ${file.originalname}`; // Determine source type based on MIME type let sourceType = 'document'; if (FileProcessor.isPdfFile(file.mimetype)) { sourceType = 'pdf'; } else if (FileProcessor.isImageFile(file.mimetype)) { sourceType = 'image'; } else if (file.mimetype.includes('text') || file.mimetype.includes('json')) { sourceType = 'text'; } // Read text content for text files let content = 'Processing...'; if (FileProcessor.isTextFile(file.mimetype)) { try { content = await FileProcessor.readTextFile(file.path); } catch (error) { console.warn(`Failed to read text file ${file.originalname}:`, error); content = 'Failed to read file content'; } } // Create document record try { const document = await storage.createDocument({ title, content, source, sourceType, url: null, metadata: { originalName: file.originalname, uploadedAt: new Date().toISOString(), mimeType: file.mimetype, size: file.size }, embedding: null, filePath: file.path, fileName: file.originalname, fileSize: file.size, mimeType: file.mimetype, processingStatus: FileProcessor.requiresOCR(file.mimetype) ? 'pending' : 'completed' } as any); console.log(`Successfully created document record for ${file.originalname} with ID ${document.id}`); uploadedDocuments.push(document); } catch (dbError) { console.error(`Failed to create document record for ${file.originalname}:`, dbError); throw new Error(`Database error while saving ${file.originalname}: ${dbError instanceof Error ? dbError.message : 'Unknown database error'}`); } } res.status(201).json({ success: true, message: `Successfully uploaded ${uploadedDocuments.length} document(s)`, documents: uploadedDocuments }); } catch (error) { console.error('File upload error:', error); res.status(500).json({ success: false, error: 'File upload failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Process a single document */ router.post('/process/:id', async (req, res) => { try { const documentId = parseInt(req.params.id); const requestBody = req.body || {}; const operations = requestBody.operations || ['extract_text']; const indexName = requestBody.indexName; const document = await storage.getDocument(documentId); if (!document) { return res.status(404).json({ success: false, error: 'Document not found' }); } // Update status to processing await storage.updateDocument(documentId, { processingStatus: 'processing' } as any); // Process the document const result = await documentProcessor.processDocument(document, operations); if (result.success) { // Update document with results const updateData: any = { processingStatus: 'completed', processedAt: new Date() }; if (result.extractedText && result.extractedText !== document.content) { updateData.content = result.extractedText; } if (result.embeddings) { updateData.embedding = JSON.stringify(result.embeddings); } if (result.modalTaskId) { updateData.modalTaskId = result.modalTaskId; } const updatedDocument = await storage.updateDocument(documentId, updateData); res.json({ success: true, message: 'Document processed successfully', document: updatedDocument, processingTime: result.processingTime }); } else { // Update status to failed await storage.updateDocument(documentId, { processingStatus: 'failed' } as any); res.status(500).json({ success: false, error: 'Document processing failed', message: result.error, processingTime: result.processingTime }); } } catch (error) { console.error('Document processing error:', error); res.status(500).json({ success: false, error: 'Processing request failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Batch process multiple documents */ router.post('/process/batch', async (req, res) => { try { const { documentIds, operations = ['extract_text'], indexName } = batchProcessingSchema.parse(req.body); // Fetch all documents const documents = await Promise.all( documentIds.map(id => storage.getDocument(id)) ); const validDocuments = documents.filter(doc => doc !== undefined) as any[]; if (validDocuments.length === 0) { return res.status(404).json({ success: false, error: 'No valid documents found' }); } // Update all documents to processing status await Promise.all( validDocuments.map(doc => storage.updateDocument(doc.id, { processingStatus: 'processing' } as any) ) ); // Process documents in batch const batchResult = await documentProcessor.batchProcessDocuments(validDocuments, operations); // Update documents with results const updatePromises = batchResult.results.map(async (result) => { const updateData: any = { processingStatus: result.success ? 'completed' : 'failed', processedAt: new Date() }; if (result.success) { if (result.extractedText) { updateData.content = result.extractedText; } if (result.embeddings) { updateData.embedding = JSON.stringify(result.embeddings); } } return storage.updateDocument(result.documentId, updateData); }); await Promise.all(updatePromises); res.json({ success: true, message: `Batch processing completed: ${batchResult.processedCount} successful, ${batchResult.failedCount} failed`, processedCount: batchResult.processedCount, failedCount: batchResult.failedCount, results: batchResult.results, totalProcessingTime: batchResult.totalProcessingTime }); } catch (error) { console.error('Batch processing error:', error); res.status(500).json({ success: false, error: 'Batch processing failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Build vector index from documents */ router.post('/index/build', async (req, res) => { try { const { documentIds, indexName = 'research_papers_clean_v2' } = req.body; let documents; if (documentIds && Array.isArray(documentIds)) { // Build index from specific documents const fetchedDocs = await Promise.all( documentIds.map((id: number) => storage.getDocument(id)) ); documents = fetchedDocs.filter(doc => doc !== undefined) as any[]; } else { // Build index from all completed documents documents = await storage.getDocuments(1000, 0); documents = documents.filter(doc => doc.processingStatus === 'completed'); } if (documents.length === 0) { return res.status(400).json({ success: false, error: 'No processed documents available for indexing' }); } const result = await documentProcessor.buildVectorIndex(documents, indexName); if (result.success) { res.json({ success: true, message: 'Vector index built successfully', indexName: result.indexName, documentCount: result.documentCount }); } else { res.status(500).json({ success: false, error: 'Index building failed', message: result.error }); } } catch (error) { console.error('Index building error:', error); res.status(500).json({ success: false, error: 'Index building request failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Search vector index */ router.post('/search/vector', async (req, res) => { try { const { query, indexName = 'research_papers_clean_v2', maxResults = 10 } = req.body; if (!query || typeof query !== 'string') { return res.status(400).json({ success: false, error: 'Query parameter is required and must be a string' }); } const result = await documentProcessor.searchVectorIndex(query, indexName, maxResults); if (result.success) { res.json({ success: true, query, indexName, results: result.results, totalFound: result.results?.length || 0 }); } else { res.status(500).json({ success: false, error: 'Vector search failed', message: result.error }); } } catch (error) { console.error('Vector search error:', error); res.status(500).json({ success: false, error: 'Vector search request failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Get document processing status */ router.get('/status/:id', async (req, res) => { try { const documentId = parseInt(req.params.id); const document = await storage.getDocument(documentId); if (!document) { return res.status(404).json({ success: false, error: 'Document not found' }); } res.json({ success: true, document: { id: document.id, title: document.title, processingStatus: (document as any).processingStatus, modalTaskId: (document as any).modalTaskId, createdAt: document.createdAt, processedAt: (document as any).processedAt, fileSize: (document as any).fileSize, mimeType: (document as any).mimeType } }); } catch (error) { console.error('Status check error:', error); res.status(500).json({ success: false, error: 'Status check failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Get all documents with filtering */ router.get('/list', async (req, res) => { try { const { limit = 50, offset = 0, sourceType, processingStatus } = req.query; let documents; if (sourceType) { documents = await storage.getDocumentsBySourceType(sourceType as string); } else if (processingStatus && 'getDocumentsByProcessingStatus' in storage) { documents = await (storage as any).getDocumentsByProcessingStatus(processingStatus as string); } else { documents = await storage.getDocuments(Number(limit), Number(offset)); } res.json({ success: true, documents, totalCount: documents.length }); } catch (error) { console.error('Document list error:', error); res.status(500).json({ success: false, error: 'Failed to retrieve documents', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); /** * Delete a document and its file */ router.delete('/:id', async (req, res) => { try { const documentId = parseInt(req.params.id); const document = await storage.getDocument(documentId); if (!document) { return res.status(404).json({ success: false, error: 'Document not found' }); } // Delete file if it exists if ((document as any).filePath) { await FileProcessor.deleteFile((document as any).filePath); } // Delete document record const deleted = await storage.deleteDocument(documentId); if (deleted) { res.json({ success: true, message: 'Document deleted successfully' }); } else { res.status(500).json({ success: false, error: 'Failed to delete document' }); } } catch (error) { console.error('Document deletion error:', error); res.status(500).json({ success: false, error: 'Document deletion failed', message: error instanceof Error ? error.message : 'Unknown error occurred' }); } }); export default router;