import Database from 'better-sqlite3'; import path from 'path'; import fs from 'fs'; import { type Document, type InsertDocument, type SearchQuery, type InsertSearchQuery, type SearchResult, type InsertSearchResult, type Citation, type InsertCitation, type SearchRequest, type SearchResponse, type DocumentWithContext } from "@shared/schema"; import { IStorage } from './storage'; export class SQLiteStorage implements IStorage { private db: Database.Database; constructor(dbPath?: string) { // Use /tmp for database in production environments (like Hugging Face Spaces) const defaultPath = process.env.NODE_ENV === 'production' ? '/tmp/knowledgebridge.db' : './data/knowledgebridge.db'; const finalPath = dbPath || defaultPath; // Ensure data directory exists with error handling const dir = path.dirname(finalPath); try { if (!fs.existsSync(dir)) { fs.mkdirSync(dir, { recursive: true }); } } catch (error) { console.warn(`Failed to create database directory at ${dir}:`, error); } this.db = new Database(finalPath); this.initializeTables(); } private initializeTables() { // Enable foreign keys this.db.pragma('foreign_keys = ON'); // Create documents table this.db.exec(` CREATE TABLE IF NOT EXISTS documents ( id INTEGER PRIMARY KEY AUTOINCREMENT, title TEXT NOT NULL, content TEXT NOT NULL, source TEXT NOT NULL, source_type TEXT NOT NULL, url TEXT, metadata TEXT, -- JSON string embedding TEXT, -- JSON string file_path TEXT, file_name TEXT, file_size INTEGER, mime_type TEXT, processing_status TEXT NOT NULL DEFAULT 'pending', modal_task_id TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, processed_at DATETIME ) `); // Create search_queries table this.db.exec(` CREATE TABLE IF NOT EXISTS search_queries ( id INTEGER PRIMARY KEY AUTOINCREMENT, query TEXT NOT NULL, search_type TEXT NOT NULL DEFAULT 'semantic', filters TEXT, -- JSON string results_count INTEGER DEFAULT 0, search_time REAL, created_at DATETIME DEFAULT CURRENT_TIMESTAMP ) `); // Create search_results table this.db.exec(` CREATE TABLE IF NOT EXISTS search_results ( id INTEGER PRIMARY KEY AUTOINCREMENT, query_id INTEGER NOT NULL, document_id INTEGER NOT NULL, relevance_score REAL NOT NULL, snippet TEXT NOT NULL, rank INTEGER NOT NULL, FOREIGN KEY (query_id) REFERENCES search_queries(id), FOREIGN KEY (document_id) REFERENCES documents(id) ) `); // Create citations table this.db.exec(` CREATE TABLE IF NOT EXISTS citations ( id INTEGER PRIMARY KEY AUTOINCREMENT, document_id INTEGER NOT NULL, citation_text TEXT NOT NULL, page_number INTEGER, section TEXT, created_at DATETIME DEFAULT CURRENT_TIMESTAMP, FOREIGN KEY (document_id) REFERENCES documents(id) ) `); // Create indexes for better performance this.db.exec(` CREATE INDEX IF NOT EXISTS idx_documents_source_type ON documents(source_type); CREATE INDEX IF NOT EXISTS idx_documents_processing_status ON documents(processing_status); CREATE INDEX IF NOT EXISTS idx_search_results_query_id ON search_results(query_id); CREATE INDEX IF NOT EXISTS idx_search_results_document_id ON search_results(document_id); CREATE INDEX IF NOT EXISTS idx_citations_document_id ON citations(document_id); `); } async getDocument(id: number): Promise { const stmt = this.db.prepare('SELECT * FROM documents WHERE id = ?'); const row = stmt.get(id) as any; return row ? this.mapDocumentRow(row) : undefined; } async getDocuments(limit = 50, offset = 0): Promise { const stmt = this.db.prepare('SELECT * FROM documents ORDER BY created_at DESC LIMIT ? OFFSET ?'); const rows = stmt.all(limit, offset) as any[]; return rows.map(row => this.mapDocumentRow(row)); } async createDocument(insertDocument: InsertDocument): Promise { const stmt = this.db.prepare(` INSERT INTO documents ( title, content, source, source_type, url, metadata, embedding, file_path, file_name, file_size, mime_type, processing_status, modal_task_id ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); const result = stmt.run( insertDocument.title, insertDocument.content, insertDocument.source, insertDocument.sourceType, insertDocument.url || null, insertDocument.metadata ? JSON.stringify(insertDocument.metadata) : null, insertDocument.embedding || null, (insertDocument as any).filePath || null, (insertDocument as any).fileName || null, (insertDocument as any).fileSize || null, (insertDocument as any).mimeType || null, (insertDocument as any).processingStatus || 'pending', (insertDocument as any).modalTaskId || null ); const created = await this.getDocument(result.lastInsertRowid as number); if (!created) throw new Error('Failed to create document'); return created; } async updateDocument(id: number, updateData: Partial): Promise { const existing = await this.getDocument(id); if (!existing) return undefined; const fields: string[] = []; const values: any[] = []; Object.entries(updateData).forEach(([key, value]) => { if (value !== undefined) { switch (key) { case 'sourceType': fields.push('source_type = ?'); break; case 'processingStatus': fields.push('processing_status = ?'); break; case 'modalTaskId': fields.push('modal_task_id = ?'); break; case 'filePath': fields.push('file_path = ?'); break; case 'fileName': fields.push('file_name = ?'); break; case 'fileSize': fields.push('file_size = ?'); break; case 'mimeType': fields.push('mime_type = ?'); break; case 'processedAt': fields.push('processed_at = ?'); value = value instanceof Date ? value.toISOString() : value; break; case 'metadata': fields.push('metadata = ?'); value = value ? JSON.stringify(value) : null; break; default: fields.push(`${key} = ?`); } values.push(value); } }); if (fields.length === 0) return existing; values.push(id); const stmt = this.db.prepare(`UPDATE documents SET ${fields.join(', ')} WHERE id = ?`); stmt.run(...values); return await this.getDocument(id); } async deleteDocument(id: number): Promise { const stmt = this.db.prepare('DELETE FROM documents WHERE id = ?'); const result = stmt.run(id); return result.changes > 0; } async searchDocuments(request: SearchRequest): Promise { const startTime = Date.now(); let sql = ` SELECT * FROM documents WHERE (title LIKE ? OR content LIKE ?) `; const params: any[] = [`%${request.query}%`, `%${request.query}%`]; // Add source type filter if specified if (request.filters?.sourceTypes?.length) { const placeholders = request.filters.sourceTypes.map(() => '?').join(','); sql += ` AND source_type IN (${placeholders})`; params.push(...request.filters.sourceTypes); } sql += ` ORDER BY CASE WHEN title LIKE ? THEN 1 WHEN content LIKE ? THEN 2 ELSE 3 END, created_at DESC LIMIT ? OFFSET ? `; params.push(`%${request.query}%`, `%${request.query}%`, request.limit, request.offset); const stmt = this.db.prepare(sql); const rows = stmt.all(...params) as any[]; const results = rows.map((row, index) => { const doc = this.mapDocumentRow(row); return { ...doc, relevanceScore: this.calculateRelevanceScore(doc, request.query), snippet: this.extractSnippet(doc.content, request.query), rank: index + 1 }; }); const searchTime = (Date.now() - startTime) / 1000; // Save search query const searchQuery = await this.createSearchQuery({ query: request.query, searchType: request.searchType, filters: request.filters, resultsCount: results.length, searchTime }); // Save search results for (const doc of results) { await this.createSearchResult({ queryId: searchQuery.id, documentId: doc.id, relevanceScore: doc.relevanceScore, snippet: doc.snippet, rank: doc.rank }); } return { results, totalCount: results.length, searchTime, query: request.query, queryId: searchQuery.id }; } private calculateRelevanceScore(doc: Document, query: string): number { const queryLower = query.toLowerCase(); const titleLower = doc.title.toLowerCase(); const contentLower = doc.content.toLowerCase(); let score = 0; // Exact title match gets highest score if (titleLower === queryLower) score += 1.0; else if (titleLower.includes(queryLower)) score += 0.8; // Content matches if (contentLower.includes(queryLower)) score += 0.3; // Word-by-word scoring const queryWords = queryLower.split(' '); queryWords.forEach(word => { if (titleLower.includes(word)) score += 0.2; if (contentLower.includes(word)) score += 0.1; }); return Math.min(score, 1.0); } private extractSnippet(content: string, query: string, maxLength = 200): string { const queryLower = query.toLowerCase(); const contentLower = content.toLowerCase(); const index = contentLower.indexOf(queryLower); if (index === -1) { return content.substring(0, maxLength) + (content.length > maxLength ? '...' : ''); } const start = Math.max(0, index - 50); const end = Math.min(content.length, index + queryLower.length + 150); let snippet = content.substring(start, end); if (start > 0) snippet = '...' + snippet; if (end < content.length) snippet = snippet + '...'; return snippet; } async getDocumentsBySourceType(sourceType: string): Promise { const stmt = this.db.prepare('SELECT * FROM documents WHERE source_type = ? ORDER BY created_at DESC'); const rows = stmt.all(sourceType) as any[]; return rows.map(row => this.mapDocumentRow(row)); } async getDocumentsByProcessingStatus(status: string): Promise { const stmt = this.db.prepare('SELECT * FROM documents WHERE processing_status = ? ORDER BY created_at DESC'); const rows = stmt.all(status) as any[]; return rows.map(row => this.mapDocumentRow(row)); } async createSearchQuery(insertQuery: InsertSearchQuery): Promise { const stmt = this.db.prepare(` INSERT INTO search_queries (query, search_type, filters, results_count, search_time) VALUES (?, ?, ?, ?, ?) `); const result = stmt.run( insertQuery.query, insertQuery.searchType || 'semantic', insertQuery.filters ? JSON.stringify(insertQuery.filters) : null, insertQuery.resultsCount || null, insertQuery.searchTime || null ); const created = this.db.prepare('SELECT * FROM search_queries WHERE id = ?').get(result.lastInsertRowid) as any; return this.mapSearchQueryRow(created); } async getSearchQueries(limit = 50): Promise { const stmt = this.db.prepare('SELECT * FROM search_queries ORDER BY created_at DESC LIMIT ?'); const rows = stmt.all(limit) as any[]; return rows.map(row => this.mapSearchQueryRow(row)); } async createSearchResult(insertResult: InsertSearchResult): Promise { const stmt = this.db.prepare(` INSERT INTO search_results (query_id, document_id, relevance_score, snippet, rank) VALUES (?, ?, ?, ?, ?) `); const result = stmt.run( insertResult.queryId, insertResult.documentId, insertResult.relevanceScore, insertResult.snippet, insertResult.rank ); const created = this.db.prepare('SELECT * FROM search_results WHERE id = ?').get(result.lastInsertRowid) as any; return this.mapSearchResultRow(created); } async getSearchResults(queryId: number): Promise { const stmt = this.db.prepare('SELECT * FROM search_results WHERE query_id = ? ORDER BY rank'); const rows = stmt.all(queryId) as any[]; return rows.map(row => this.mapSearchResultRow(row)); } async createCitation(insertCitation: InsertCitation): Promise { const stmt = this.db.prepare(` INSERT INTO citations (document_id, citation_text, page_number, section) VALUES (?, ?, ?, ?) `); const result = stmt.run( insertCitation.documentId, insertCitation.citationText, insertCitation.pageNumber || null, insertCitation.section || null ); const created = this.db.prepare('SELECT * FROM citations WHERE id = ?').get(result.lastInsertRowid) as any; return this.mapCitationRow(created); } async getCitationsByDocument(documentId: number): Promise { const stmt = this.db.prepare('SELECT * FROM citations WHERE document_id = ? ORDER BY created_at DESC'); const rows = stmt.all(documentId) as any[]; return rows.map(row => this.mapCitationRow(row)); } async deleteCitation(id: number): Promise { const stmt = this.db.prepare('DELETE FROM citations WHERE id = ?'); const result = stmt.run(id); return result.changes > 0; } private mapDocumentRow(row: any): Document { return { id: row.id, title: row.title, content: row.content, source: row.source, sourceType: row.source_type, url: row.url, metadata: row.metadata ? JSON.parse(row.metadata) : null, embedding: row.embedding, createdAt: new Date(row.created_at), filePath: row.file_path, fileName: row.file_name, fileSize: row.file_size, mimeType: row.mime_type, processingStatus: row.processing_status, modalTaskId: row.modal_task_id, processedAt: row.processed_at ? new Date(row.processed_at) : null, } as Document; } private mapSearchQueryRow(row: any): SearchQuery { return { id: row.id, query: row.query, searchType: row.search_type, filters: row.filters ? JSON.parse(row.filters) : null, resultsCount: row.results_count, searchTime: row.search_time, createdAt: new Date(row.created_at) }; } private mapSearchResultRow(row: any): SearchResult { return { id: row.id, queryId: row.query_id, documentId: row.document_id, relevanceScore: row.relevance_score, snippet: row.snippet, rank: row.rank }; } private mapCitationRow(row: any): Citation { return { id: row.id, documentId: row.document_id, citationText: row.citation_text, pageNumber: row.page_number, section: row.section, createdAt: new Date(row.created_at) }; } close() { this.db.close(); } }