Spaces:

Omartificial-Intelligence-Space
/

context-caching-gemini-pdf-qa

Running

File size: 37,514 Bytes

from flask import Flask, request, jsonify, render_template_string
from flask_cors import CORS
from google import genai
from google.genai import types
import os
import io
import httpx
import uuid
from datetime import datetime, timezone, timedelta
from dotenv import load_dotenv
import json

# Load environment variables
load_dotenv()

# Get Google API key from environment
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if GOOGLE_API_KEY is None:
    raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")

app = Flask(__name__)
CORS(app)

# Configure Flask for large file uploads (200MB for substantial documents)
app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024  # 200MB max file size

# Initialize Gemini client with correct API key
client = genai.Client(api_key=GOOGLE_API_KEY)

# In-memory storage for demo (in production, use a database)
document_caches = {}
user_sessions = {}

# HTML template for the web interface
HTML_TEMPLATE = """
<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Smart Document Analysis Platform</title>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        
        body {
            font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            min-height: 100vh;
            color: #333;
        }
        
        .container {
            max-width: 1400px;
            margin: 0 auto;
            padding: 20px;
            min-height: 100vh;
        }
        
        .header {
            text-align: center;
            margin-bottom: 30px;
            color: white;
        }
        
        .header h1 {
            font-size: 2.8em;
            font-weight: 700;
            margin-bottom: 10px;
            text-shadow: 0 2px 4px rgba(0,0,0,0.3);
        }
        
        .header p {
            font-size: 1.2em;
            opacity: 0.9;
            font-weight: 300;
        }
        
        .main-content {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 30px;
            height: calc(100vh - 200px);
        }
        
        .left-panel {
            background: white;
            border-radius: 20px;
            padding: 30px;
            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
            overflow-y: auto;
        }
        
        .right-panel {
            background: white;
            border-radius: 20px;
            padding: 30px;
            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
            display: flex;
            flex-direction: column;
        }
        
        .panel-title {
            font-size: 1.5em;
            font-weight: 600;
            margin-bottom: 20px;
            color: #2d3748;
            display: flex;
            align-items: center;
            gap: 10px;
        }
        
        .upload-section {
            margin-bottom: 30px;
        }
        
        .upload-area {
            border: 2px dashed #667eea;
            border-radius: 15px;
            padding: 40px;
            text-align: center;
            background: #f8fafc;
            transition: all 0.3s ease;
            margin-bottom: 20px;
        }
        
        .upload-area:hover {
            border-color: #764ba2;
            background: #f0f2ff;
            transform: translateY(-2px);
        }
        
        .upload-area.dragover {
            border-color: #764ba2;
            background: #e8f0ff;
            transform: scale(1.02);
        }
        
        .upload-icon {
            font-size: 3em;
            color: #667eea;
            margin-bottom: 15px;
        }
        
        .file-input {
            display: none;
        }
        
        .upload-btn {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            border: none;
            padding: 15px 30px;
            border-radius: 25px;
            cursor: pointer;
            font-size: 1.1em;
            font-weight: 500;
            transition: all 0.3s ease;
            margin: 10px;
        }
        
        .upload-btn:hover {
            transform: translateY(-2px);
            box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
        }
        
        .url-input {
            width: 100%;
            padding: 15px;
            border: 2px solid #e2e8f0;
            border-radius: 10px;
            font-size: 1em;
            margin-bottom: 15px;
            transition: border-color 0.3s ease;
        }
        
        .url-input:focus {
            outline: none;
            border-color: #667eea;
        }
        
        .btn {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            border: none;
            padding: 12px 25px;
            border-radius: 20px;
            cursor: pointer;
            font-size: 1em;
            font-weight: 500;
            transition: all 0.3s ease;
        }
        
        .btn:hover {
            transform: translateY(-1px);
            box-shadow: 0 5px 15px rgba(102, 126, 234, 0.3);
        }
        
        .btn:disabled {
            opacity: 0.6;
            cursor: not-allowed;
            transform: none;
        }
        
        .chat-container {
            flex: 1;
            border: 1px solid #e2e8f0;
            border-radius: 15px;
            overflow-y: auto;
            padding: 20px;
            background: #f8fafc;
            margin-bottom: 20px;
        }
        
        .message {
            margin-bottom: 15px;
            padding: 15px;
            border-radius: 12px;
            max-width: 85%;
            animation: fadeIn 0.3s ease;
        }
        
        @keyframes fadeIn {
            from { opacity: 0; transform: translateY(10px); }
            to { opacity: 1; transform: translateY(0); }
        }
        
        .user-message {
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            color: white;
            margin-left: auto;
            box-shadow: 0 4px 12px rgba(102, 126, 234, 0.3);
        }
        
        .ai-message {
            background: white;
            color: #333;
            border: 1px solid #e2e8f0;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
        }
        
        .input-group {
            display: flex;
            gap: 10px;
        }
        
        .question-input {
            flex: 1;
            padding: 15px;
            border: 2px solid #e2e8f0;
            border-radius: 12px;
            font-size: 1em;
            transition: border-color 0.3s ease;
        }
        
        .question-input:focus {
            outline: none;
            border-color: #667eea;
        }
        
        .cache-info {
            background: linear-gradient(135deg, #48bb78 0%, #38a169 100%);
            border-radius: 12px;
            padding: 20px;
            margin-bottom: 20px;
            color: white;
            box-shadow: 0 4px 12px rgba(72, 187, 120, 0.3);
        }
        
        .cache-info h3 {
            margin-bottom: 10px;
            font-weight: 600;
        }
        
        .loading {
            text-align: center;
            padding: 40px;
            color: #666;
        }
        
        .loading-spinner {
            border: 3px solid #f3f3f3;
            border-top: 3px solid #667eea;
            border-radius: 50%;
            width: 40px;
            height: 40px;
            animation: spin 1s linear infinite;
            margin: 0 auto 20px;
        }
        
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        
        .error {
            background: linear-gradient(135deg, #f56565 0%, #e53e3e 100%);
            border-radius: 12px;
            padding: 15px;
            color: white;
            margin-bottom: 20px;
            box-shadow: 0 4px 12px rgba(245, 101, 101, 0.3);
        }
        
        .success {
            background: linear-gradient(135deg, #48bb78 0%, #38a169 100%);
            border-radius: 12px;
            padding: 15px;
            color: white;
            margin-bottom: 20px;
            box-shadow: 0 4px 12px rgba(72, 187, 120, 0.3);
        }
        
        @media (max-width: 768px) {
            .main-content {
                grid-template-columns: 1fr;
                gap: 20px;
            }
            
            .header h1 {
                font-size: 2em;
            }
        }
    </style>
</head>
<body>
    <div class="container">
        <div class="header">
            <h1>📚 Smart Document Analysis Platform</h1>
            <p>Upload substantial PDF documents (5MB+ recommended) for efficient context caching with Gemini API</p>
            <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">💡 Context caching requires minimum token thresholds - larger documents work better</p>
        </div>
        
        <div class="main-content">
            <!-- Left Panel - Upload Section -->
            <div class="left-panel">
                <div class="panel-title">
                    📤 Upload PDF Document
                </div>
                
                <div class="upload-section">
                    <div class="upload-area" id="uploadArea">
                        <div class="upload-icon">📄</div>
                        <p>Drag and drop your PDF file here, or click to select</p>
                        <p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
                        <p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
                        <input type="file" id="fileInput" class="file-input" accept=".pdf">
                        <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
                            Choose PDF File
                        </button>
                    </div>
                    
                    <div style="margin-top: 20px;">
                        <h3>Or provide a URL:</h3>
                        <input type="url" id="urlInput" class="url-input" placeholder="https://example.com/document.pdf">
                        <button class="btn" onclick="uploadFromUrl()">Upload from URL</button>
                    </div>
                </div>
                
                <div id="loading" class="loading" style="display: none;">
                    <div class="loading-spinner"></div>
                    <p id="loadingText">Processing your PDF... This may take a moment.</p>
                </div>
                
                <div id="error" class="error" style="display: none;"></div>
                <div id="success" class="success" style="display: none;"></div>
            </div>
            
            <!-- Right Panel - Chat Section -->
            <div class="right-panel">
                <div class="panel-title">
                    💬 Ask Questions
                </div>
                
                <div id="cacheInfo" class="cache-info" style="display: none;">
                    <h3>✅ Document Cached Successfully!</h3>
                    <p>Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.</p>
                    <p><strong>Document:</strong> <span id="documentName"></span></p>
                    <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
                    <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
                    <p><strong>Model:</strong> <span id="modelUsed"></span></p>
                    <p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">💡 Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
                </div>
                
                <div class="chat-container" id="chatContainer">
                    <div class="message ai-message">
                        👋 Hello! I'm ready to analyze your PDF documents. Upload a document to get started!
                    </div>
                </div>
                
                <div class="input-group">
                    <input type="text" id="questionInput" class="question-input" placeholder="Ask a question about your document...">
                    <button class="btn" onclick="askQuestion()" id="askBtn">Ask</button>
                </div>
            </div>
        </div>
    </div>

    <script>
        let currentCacheId = null;
        
        // File upload handling
        const uploadArea = document.getElementById('uploadArea');
        const fileInput = document.getElementById('fileInput');
        
        uploadArea.addEventListener('dragover', (e) => {
            e.preventDefault();
            uploadArea.classList.add('dragover');
        });
        
        uploadArea.addEventListener('dragleave', () => {
            uploadArea.classList.remove('dragover');
        });
        
        uploadArea.addEventListener('drop', (e) => {
            e.preventDefault();
            uploadArea.classList.remove('dragover');
            const files = e.dataTransfer.files;
            if (files.length > 0) {
                uploadFile(files[0]);
            }
        });
        
        fileInput.addEventListener('change', (e) => {
            if (e.target.files.length > 0) {
                uploadFile(e.target.files[0]);
            }
        });
        
        async function uploadFile(file) {
            if (!file.type.includes('pdf')) {
                showError('Please select a PDF file.');
                return;
            }
            
            // Check file size on client side (200MB limit)
            const fileSizeMB = file.size / (1024 * 1024);
            if (file.size > 200 * 1024 * 1024) {
                showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
                return;
            }
            
            // Warn about small files that might not cache
            if (file.size < 1024 * 1024) {
                showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
                return;
            }
            
            showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
            
            const formData = new FormData();
            formData.append('file', file);
            
            try {
                const response = await fetch('/upload', {
                    method: 'POST',
                    body: formData
                });
                
                const result = await response.json();
                
                if (result.success) {
                    currentCacheId = result.cache_id;
                    document.getElementById('cacheId').textContent = result.cache_id;
                    document.getElementById('tokenCount').textContent = result.token_count;
                    document.getElementById('documentName').textContent = result.document_name;
                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                    document.getElementById('cacheInfo').style.display = 'block';
                    showSuccess('PDF uploaded and cached successfully!');
                    
                    // Add initial message
                    addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                } else {
                    showError(result.error);
                    if (result.suggestion) {
                        showError(result.suggestion);
                    }
                }
            } catch (error) {
                showError('Error uploading file: ' + error.message);
            } finally {
                hideLoading();
            }
        }
        
        async function uploadFromUrl() {
            const url = document.getElementById('urlInput').value;
            if (!url) {
                showError('Please enter a valid URL.');
                return;
            }
            
            showLoading('Uploading PDF from URL...');
            
            try {
                const response = await fetch('/upload-url', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json'
                    },
                    body: JSON.stringify({ url: url })
                });
                
                const result = await response.json();
                
                if (result.success) {
                    currentCacheId = result.cache_id;
                    document.getElementById('cacheId').textContent = result.cache_id;
                    document.getElementById('tokenCount').textContent = result.token_count;
                    document.getElementById('documentName').textContent = result.document_name;
                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                    document.getElementById('cacheInfo').style.display = 'block';
                    showSuccess('PDF uploaded and cached successfully!');
                    
                    // Add initial message
                    addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                } else {
                    showError(result.error);
                    if (result.suggestion) {
                        showError(result.suggestion);
                    }
                }
            } catch (error) {
                showError('Error uploading from URL: ' + error.message);
            } finally {
                hideLoading();
            }
        }
        
        async function askQuestion() {
            const question = document.getElementById('questionInput').value;
            if (!question.trim()) return;
            
            if (!currentCacheId) {
                showError('Please upload a PDF document first.');
                return;
            }
            
            // Add user message to chat
            addMessage(question, 'user');
            document.getElementById('questionInput').value = '';
            
            // Show loading state
            const askBtn = document.getElementById('askBtn');
            const originalText = askBtn.textContent;
            askBtn.textContent = 'Generating...';
            askBtn.disabled = true;
            
            try {
                const response = await fetch('/ask', {
                    method: 'POST',
                    headers: {
                        'Content-Type': 'application/json'
                    },
                    body: JSON.stringify({
                        question: question,
                        cache_id: currentCacheId
                    })
                });
                
                const result = await response.json();
                
                if (result.success) {
                    addMessage(result.answer, 'ai');
                } else {
                    addMessage('Error: ' + result.error, 'ai');
                }
            } catch (error) {
                addMessage('Error: ' + error.message, 'ai');
            } finally {
                askBtn.textContent = originalText;
                askBtn.disabled = false;
            }
        }
        
        function addMessage(text, sender) {
            const chatContainer = document.getElementById('chatContainer');
            const messageDiv = document.createElement('div');
            messageDiv.className = `message ${sender}-message`;
            messageDiv.textContent = text;
            chatContainer.appendChild(messageDiv);
            chatContainer.scrollTop = chatContainer.scrollHeight;
        }
        
        function showLoading(text = 'Processing...') {
            document.getElementById('loadingText').textContent = text;
            document.getElementById('loading').style.display = 'block';
        }
        
        function hideLoading() {
            document.getElementById('loading').style.display = 'none';
        }
        
        function showError(message) {
            const errorDiv = document.getElementById('error');
            errorDiv.textContent = message;
            errorDiv.style.display = 'block';
            setTimeout(() => {
                errorDiv.style.display = 'none';
            }, 5000);
        }
        
        function showSuccess(message) {
            const successDiv = document.getElementById('success');
            successDiv.textContent = message;
            successDiv.style.display = 'block';
            setTimeout(() => {
                successDiv.style.display = 'none';
            }, 5000);
        }
        
        // Enter key to ask question
        document.getElementById('questionInput').addEventListener('keypress', (e) => {
            if (e.key === 'Enter') {
                askQuestion();
            }
        });
    </script>
</body>
</html>
"""

@app.route('/')
def index():
    return render_template_string(HTML_TEMPLATE)

@app.route('/upload', methods=['POST'])
def upload_file():
    try:
        if 'file' not in request.files:
            return jsonify({'success': False, 'error': 'No file provided'})
        
        file = request.files['file']
        
        if file.filename == '':
            return jsonify({'success': False, 'error': 'No file selected'})
        
        # Check file size (limit to 200MB for large documents needed for caching)
        file.seek(0, 2)  # Seek to end
        file_size = file.tell()
        file.seek(0)  # Reset to beginning
        
        # Convert to MB for display
        file_size_mb = file_size / (1024 * 1024)
        
        if file_size > 200 * 1024 * 1024:  # 200MB limit
            return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
        
        # Warn about small files that might not cache
        if file_size < 1024 * 1024:  # Less than 1MB
            print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
        
        # Read file content
        file_content = file.read()
        if not file_content:
            return jsonify({'success': False, 'error': 'File is empty'})
        
        file_io = io.BytesIO(file_content)
        
        # Upload to Gemini File API
        try:
            # Simple file upload - no config parameter needed
            document = client.files.upload(file=file_io)
            print(f"Document uploaded successfully: {document.name}")
        except Exception as upload_error:
            print(f"Upload error: {upload_error}")
            return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
        
        # Create cache with system instruction
        try:
            system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
            
            # Use the correct model name - try 2.5 Flash first (lower token requirement)
            model = 'gemini-2.5-flash-001'
            
            cache = client.caches.create(
                model=model,
                config=types.CreateCachedContentConfig(
                    display_name=f'PDF document cache - {file.filename}',
                    system_instruction=system_instruction,
                    contents=[document],
                    ttl="3600s",  # 1 hour TTL
                )
            )
            
            print(f"Cache created successfully: {cache.name}")
            
            # Store cache info
            cache_id = str(uuid.uuid4())
            document_caches[cache_id] = {
                'cache_name': cache.name,
                'document_name': file.filename,
                'document_file_name': document.name,
                'created_at': datetime.now().isoformat()
            }
            
            # Get token count safely
            token_count = 'Unknown'
            if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
                if hasattr(cache.usage_metadata, 'total_token_count'):
                    token_count = cache.usage_metadata.total_token_count
                elif hasattr(cache.usage_metadata, 'cached_token_count'):
                    token_count = cache.usage_metadata.cached_token_count
            
            return jsonify({
                'success': True,
                'cache_id': cache_id,
                'token_count': token_count,
                'document_name': file.filename
            })
            
        except Exception as cache_error:
            print(f"Cache error: {cache_error}")
            # Provide more specific error handling for token requirements
            error_msg = str(cache_error).lower()
            if "too small" in error_msg or "minimum" in error_msg:
                return jsonify({
                    'success': False, 
                    'error': f'Document content is insufficient for caching. Gemini 2.5 Flash requires minimum 1,024 tokens (~2-3 pages of text). Your document: {file.filename} ({file_size_mb:.1f}MB)',
                    'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).',
                    'fallback': 'You can still use the document without caching by implementing direct file processing.'
                })
            elif "invalid" in error_msg or "model" in error_msg:
                # Try fallback to 2.0 Flash
                try:
                    cache_fallback = client.caches.create(
                        model='gemini-2.0-flash-001',
                        config=types.CreateCachedContentConfig(
                            display_name=f'PDF document cache - {file.filename}',
                            system_instruction=system_instruction,
                            contents=[document],
                            ttl="3600s",
                        )
                    )
                    print(f"Fallback cache created with 2.0 Flash: {cache_fallback.name}")
                    
                    # Store with fallback model info
                    cache_id = str(uuid.uuid4())
                    document_caches[cache_id] = {
                        'cache_name': cache_fallback.name,
                        'document_name': file.filename,
                        'document_file_name': document.name,
                        'model': 'gemini-2.0-flash-001',
                        'created_at': datetime.now().isoformat()
                    }
                    
                    token_count = 'Unknown'
                    if hasattr(cache_fallback, 'usage_metadata') and cache_fallback.usage_metadata:
                        if hasattr(cache_fallback.usage_metadata, 'total_token_count'):
                            token_count = cache_fallback.usage_metadata.total_token_count
                    
                    return jsonify({
                        'success': True,
                        'cache_id': cache_id,
                        'token_count': token_count,
                        'document_name': file.filename,
                        'model_used': 'gemini-2.0-flash-001'
                    })
                    
                except Exception as fallback_error:
                    print(f"Fallback cache error: {fallback_error}")
                    return jsonify({'success': False, 'error': f'Failed to create cache with both models: {str(fallback_error)}'})
            else:
                return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
        
    except Exception as e:
        print(f"General error: {e}")
        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})

@app.route('/upload-url', methods=['POST'])
def upload_from_url():
    try:
        data = request.get_json()
        url = data.get('url')
        
        if not url:
            return jsonify({'success': False, 'error': 'No URL provided'})
        
        # Download file from URL with timeout and size limits
        try:
            with httpx.Client(timeout=30.0) as client_http:
                response = client_http.get(url)
                response.raise_for_status()
                
                # Check content type
                content_type = response.headers.get('content-type', '').lower()
                if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
                    return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
                
                # Check file size
                content_length = len(response.content)
                content_length_mb = content_length / (1024 * 1024)
                
                if content_length > 200 * 1024 * 1024:  # 200MB limit
                    return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
                
                # Warn about small files
                if content_length < 1024 * 1024:  # Less than 1MB
                    print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
                
                file_io = io.BytesIO(response.content)
                
        except httpx.TimeoutException:
            return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
        except httpx.HTTPError as e:
            return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
        
        # Extract filename from URL
        filename = url.split('/')[-1]
        if not filename.endswith('.pdf'):
            filename += '.pdf'
        
        # Upload to Gemini File API
        try:
            # Simple file upload - no config parameter needed
            document = client.files.upload(file=file_io)
            print(f"Document uploaded successfully: {document.name}")
        except Exception as upload_error:
            print(f"Upload error: {upload_error}")
            return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
        
        # Create cache with system instruction
        try:
            system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
            
            # Use the correct model name - try 2.5 Flash first (lower token requirement)
            model = 'gemini-2.5-flash-001'
            
            cache = client.caches.create(
                model=model,
                config=types.CreateCachedContentConfig(
                    display_name=f'PDF document cache - {filename}',
                    system_instruction=system_instruction,
                    contents=[document],
                    ttl="3600s",  # 1 hour TTL
                )
            )
            
            print(f"Cache created successfully: {cache.name}")
            
            # Store cache info
            cache_id = str(uuid.uuid4())
            document_caches[cache_id] = {
                'cache_name': cache.name,
                'document_name': filename,
                'document_file_name': document.name,
                'source_url': url,
                'created_at': datetime.now().isoformat()
            }
            
            # Get token count safely
            token_count = 'Unknown'
            if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
                if hasattr(cache.usage_metadata, 'total_token_count'):
                    token_count = cache.usage_metadata.total_token_count
                elif hasattr(cache.usage_metadata, 'cached_token_count'):
                    token_count = cache.usage_metadata.cached_token_count
            
            return jsonify({
                'success': True,
                'cache_id': cache_id,
                'token_count': token_count,
                'document_name': filename
            })
            
        except Exception as cache_error:
            print(f"Cache error: {cache_error}")
            # If caching fails due to small content, provide alternative approach
            if "too small" in str(cache_error).lower():
                return jsonify({
                    'success': False, 
                    'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
                    'suggestion': 'Try uploading a longer document or combine multiple documents.'
                })
            else:
                return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
        
    except Exception as e:
        print(f"General error: {e}")
        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})

@app.route('/ask', methods=['POST'])
def ask_question():
    try:
        data = request.get_json()
        question = data.get('question')
        cache_id = data.get('cache_id')
        
        if not question or not cache_id:
            return jsonify({'success': False, 'error': 'Missing question or cache_id'})
        
        if cache_id not in document_caches:
            return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
        
        cache_info = document_caches[cache_id]
        
        # Generate response using cached content with correct model format
        try:
            response = client.models.generate_content(
                model='gemini-2.5-flash-001',  # Use 2.5 Flash for consistency
                contents=question,
                config=types.GenerateContentConfig(
                    cached_content=cache_info['cache_name']
                )
            )
            
            if response and response.text:
                return jsonify({
                    'success': True,
                    'answer': response.text
                })
            else:
                return jsonify({
                    'success': False,
                    'error': 'No response generated from the model'
                })
                
        except Exception as gen_error:
            print(f"Generation error: {gen_error}")
            return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
        
    except Exception as e:
        print(f"General error in ask_question: {e}")
        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})

@app.route('/caches', methods=['GET'])
def list_caches():
    try:
        caches = []
        for cache_id, cache_info in document_caches.items():
            caches.append({
                'cache_id': cache_id,
                'document_name': cache_info['document_name'],
                'created_at': cache_info['created_at']
            })
        
        return jsonify({'success': True, 'caches': caches})
        
    except Exception as e:
        return jsonify({'success': False, 'error': str(e)})

@app.route('/cache/<cache_id>', methods=['DELETE'])
def delete_cache(cache_id):
    try:
        if cache_id not in document_caches:
            return jsonify({'success': False, 'error': 'Cache not found'})
        
        cache_info = document_caches[cache_id]
        
        # Delete from Gemini API
        try:
            client.caches.delete(cache_info['cache_name'])
        except Exception as delete_error:
            print(f"Error deleting cache from Gemini API: {delete_error}")
            # Continue to remove from local storage even if API deletion fails
        
        # Remove from local storage
        del document_caches[cache_id]
        
        return jsonify({'success': True, 'message': 'Cache deleted successfully'})
        
    except Exception as e:
        return jsonify({'success': False, 'error': str(e)})

# Health check endpoint
@app.route('/health', methods=['GET'])
def health_check():
    return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})

# Error handlers
@app.errorhandler(413)
def too_large(e):
    return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413

@app.errorhandler(500)
def internal_error(e):
    return jsonify({'success': False, 'error': 'Internal server error'}), 500

if __name__ == '__main__':
    import os
    port = int(os.environ.get("PORT", 7860))
    print(f"Starting server on port {port}")
    print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
    app.run(debug=False, host='0.0.0.0', port=port)