Spaces:

Omartificial-Intelligence-Space
/

context-caching-gemini-pdf-qa

Running

App Files Files Community

Omartificial-Intelligence-Space commited on 1 day ago

Commit

114ad9c

verified ·

1 Parent(s): a819b46

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -220

app.py CHANGED Viewed

@@ -6,7 +6,6 @@ import os
 import io
 import httpx
 import uuid
-import tempfile
 from datetime import datetime, timezone, timedelta
 from dotenv import load_dotenv
 import json
@@ -14,19 +13,11 @@ import json
 # Load environment variables
 load_dotenv()
-# Get Google API key from environment
-GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
-if GOOGLE_API_KEY is None:
-    raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
 app = Flask(__name__)
 CORS(app)
-# Configure Flask for large file uploads (200MB for substantial documents)
-app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024  # 200MB max file size
-# Initialize Gemini client with correct API key
-client = genai.Client(api_key=GOOGLE_API_KEY)
 # In-memory storage for demo (in production, use a database)
 document_caches = {}
@@ -331,8 +322,7 @@ HTML_TEMPLATE = """
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
-            <p>Upload substantial PDF documents for efficient context caching with Gemini API</p>
-            <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">💡 Context caching requires minimum token thresholds - larger documents work better</p>
         </div>
         <div class="main-content">
@@ -346,8 +336,6 @@ HTML_TEMPLATE = """
                     <div class="upload-area" id="uploadArea">
                         <div class="upload-icon">📄</div>
                         <p>Drag and drop your PDF file here, or click to select</p>
-                        <p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
-                        <p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
                         <input type="file" id="fileInput" class="file-input" accept=".pdf">
                         <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
                             Choose PDF File
@@ -378,12 +366,9 @@ HTML_TEMPLATE = """
                 <div id="cacheInfo" class="cache-info" style="display: none;">
                     <h3>✅ Document Cached Successfully!</h3>
-                    <p>Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.</p>
-                    <p><strong>Document:</strong> <span id="documentName"></span></p>
                     <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
                     <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
-                    <p><strong>Model:</strong> <span id="modelUsed"></span></p>
-                    <p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">💡 Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
                 </div>
                 <div class="chat-container" id="chatContainer">
@@ -437,20 +422,7 @@ HTML_TEMPLATE = """
                 return;
             }
-            // Check file size on client side (200MB limit)
-            const fileSizeMB = file.size / (1024 * 1024);
-            if (file.size > 200 * 1024 * 1024) {
-                showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
-                return;
-            }
-            // Warn about small files that might not cache
-            if (file.size < 1024 * 1024) {
-                showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
-                return;
-            }
-            showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
             const formData = new FormData();
             formData.append('file', file);
@@ -467,8 +439,6 @@ HTML_TEMPLATE = """
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
-                    document.getElementById('documentName').textContent = result.document_name;
-                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -476,9 +446,6 @@ HTML_TEMPLATE = """
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
-                    if (result.suggestion) {
-                        showError(result.suggestion);
-                    }
                 }
             } catch (error) {
                 showError('Error uploading file: ' + error.message);
@@ -511,8 +478,6 @@ HTML_TEMPLATE = """
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
-                    document.getElementById('documentName').textContent = result.document_name;
-                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -520,9 +485,6 @@ HTML_TEMPLATE = """
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
-                    if (result.suggestion) {
-                        showError(result.suggestion);
-                    }
                 }
             } catch (error) {
                 showError('Error uploading from URL: ' + error.message);
@@ -639,98 +601,60 @@ def upload_file():
         if file.filename == '':
             return jsonify({'success': False, 'error': 'No file selected'})
-        # Check file size (limit to 200MB for large documents needed for caching)
-        file.seek(0, 2)  # Seek to end
-        file_size = file.tell()
-        file.seek(0)  # Reset to beginning
-        # Convert to MB for display
-        file_size_mb = file_size / (1024 * 1024)
-        if file_size > 200 * 1024 * 1024:  # 200MB limit
-            return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
-        # Warn about small files that might not cache
-        if file_size < 1024 * 1024:  # Less than 1MB
-            print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
         # Read file content
         file_content = file.read()
-        if not file_content:
-            return jsonify({'success': False, 'error': 'File is empty'})
-        # Create BytesIO from content as shown in documentation
-        doc_io = io.BytesIO(file_content)
-        # Upload to Gemini File API using the exact pattern from documentation
-        try:
-            document = client.files.upload(
-                file=doc_io,
-                config=dict(mime_type='application/pdf')
-            )
-            print(f"Document uploaded successfully: {document.name}")
-        except Exception as upload_error:
-            print(f"Upload error: {upload_error}")
-            return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
-        # Create cache with system instruction using exact pattern from documentation
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the model name format from documentation
-            model_name = "gemini-2.0-flash-001"
-            # Create cached content object exactly as shown in documentation
             cache = client.caches.create(
-                model=model_name,
                 config=types.CreateCachedContentConfig(
                     system_instruction=system_instruction,
-                    contents=[document],  # Direct document reference as in docs
                 )
             )
-            print(f"Cache created successfully: {cache.name}")
             # Store cache info
             cache_id = str(uuid.uuid4())
             document_caches[cache_id] = {
                 'cache_name': cache.name,
                 'document_name': file.filename,
-                'document_file_name': document.name,
                 'created_at': datetime.now().isoformat()
             }
-            # Get token count safely
-            token_count = 'Unknown'
-            if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
-                if hasattr(cache.usage_metadata, 'total_token_count'):
-                    token_count = cache.usage_metadata.total_token_count
-                elif hasattr(cache.usage_metadata, 'cached_token_count'):
-                    token_count = cache.usage_metadata.cached_token_count
             return jsonify({
                 'success': True,
                 'cache_id': cache_id,
-                'token_count': token_count,
-                'document_name': file.filename
             })
         except Exception as cache_error:
-            print(f"Cache error: {cache_error}")
-            # Provide more specific error handling for token requirements
-            error_msg = str(cache_error).lower()
-            if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
-                    'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Your document: {file.filename} ({file_size_mb:.1f}MB)',
-                    'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).'
                 })
             else:
-                return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
     except Exception as e:
-        print(f"General error: {e}")
-        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
 @app.route('/upload-url', methods=['POST'])
 def upload_from_url():
@@ -741,110 +665,62 @@ def upload_from_url():
         if not url:
             return jsonify({'success': False, 'error': 'No URL provided'})
-        # Download file from URL with timeout and size limits
-        try:
-            with httpx.Client(timeout=30.0) as client_http:
-                response = client_http.get(url)
-                response.raise_for_status()
-                # Check content type
-                content_type = response.headers.get('content-type', '').lower()
-                if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
-                    return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
-                # Check file size
-                content_length = len(response.content)
-                content_length_mb = content_length / (1024 * 1024)
-                if content_length > 200 * 1024 * 1024:  # 200MB limit
-                    return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
-                # Warn about small files
-                if content_length < 1024 * 1024:  # Less than 1MB
-                    print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
-        except httpx.TimeoutException:
-            return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
-        except httpx.HTTPError as e:
-            return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
-        # Extract filename from URL
-        filename = url.split('/')[-1]
-        if not filename.endswith('.pdf'):
-            filename += '.pdf'
-        # Create BytesIO from content as shown in documentation
-        doc_io = io.BytesIO(response.content)
-        # Upload to Gemini File API using the exact pattern from documentation
-        try:
-            document = client.files.upload(
-                file=doc_io,
-                config=dict(mime_type='application/pdf')
-            )
-            print(f"Document uploaded successfully: {document.name}")
-        except Exception as upload_error:
-            print(f"Upload error: {upload_error}")
-            return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
-        # Create cache with system instruction using exact pattern from documentation
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the model name format from documentation
-            model_name = "gemini-2.0-flash-001"
-            # Create cached content object exactly as shown in documentation
             cache = client.caches.create(
-                model=model_name,
                 config=types.CreateCachedContentConfig(
                     system_instruction=system_instruction,
-                    contents=[document],  # Direct document reference as in docs
                 )
             )
-            print(f"Cache created successfully: {cache.name}")
             # Store cache info
             cache_id = str(uuid.uuid4())
             document_caches[cache_id] = {
                 'cache_name': cache.name,
-                'document_name': filename,
-                'source_url': url,
                 'created_at': datetime.now().isoformat()
             }
-            # Get token count safely
-            token_count = 'Unknown'
-            if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
-                if hasattr(cache.usage_metadata, 'total_token_count'):
-                    token_count = cache.usage_metadata.total_token_count
-                elif hasattr(cache.usage_metadata, 'cached_token_count'):
-                    token_count = cache.usage_metadata.cached_token_count
             return jsonify({
                 'success': True,
                 'cache_id': cache_id,
-                'token_count': token_count,
-                'document_name': filename
             })
         except Exception as cache_error:
-            print(f"Cache error: {cache_error}")
-            # Provide more specific error handling for token requirements
-            error_msg = str(cache_error).lower()
-            if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
-                    'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Document from URL: {filename} ({content_length_mb:.1f}MB)',
-                    'suggestion': 'Try a longer document with more text content (recommended: 5MB+ with substantial text).'
                 })
             else:
-                return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
     except Exception as e:
-        print(f"General error: {e}")
-        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
 @app.route('/ask', methods=['POST'])
 def ask_question():
@@ -857,38 +733,26 @@ def ask_question():
             return jsonify({'success': False, 'error': 'Missing question or cache_id'})
         if cache_id not in document_caches:
-            return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
         cache_info = document_caches[cache_id]
         # Generate response using cached content with correct model format
-        try:
-            response = client.models.generate_content(
-                model="gemini-2.0-flash-001",  # Use model name format from documentation
-                contents=question,
-                config=types.GenerateContentConfig(
-                    cached_content=cache_info['cache_name']
-                )
             )
-            if response and response.text:
-                return jsonify({
-                    'success': True,
-                    'answer': response.text
-                })
-            else:
-                return jsonify({
-                    'success': False,
-                    'error': 'No response generated from the model'
-                })
-        except Exception as gen_error:
-            print(f"Generation error: {gen_error}")
-            return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
     except Exception as e:
-        print(f"General error in ask_question: {e}")
-        return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
 @app.route('/caches', methods=['GET'])
 def list_caches():
@@ -915,11 +779,7 @@ def delete_cache(cache_id):
         cache_info = document_caches[cache_id]
         # Delete from Gemini API
-        try:
-            client.caches.delete(cache_info['cache_name'])
-        except Exception as delete_error:
-            print(f"Error deleting cache from Gemini API: {delete_error}")
-            # Continue to remove from local storage even if API deletion fails
         # Remove from local storage
         del document_caches[cache_id]
@@ -929,23 +789,7 @@ def delete_cache(cache_id):
     except Exception as e:
         return jsonify({'success': False, 'error': str(e)})
-# Health check endpoint
-@app.route('/health', methods=['GET'])
-def health_check():
-    return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
-# Error handlers
-@app.errorhandler(413)
-def too_large(e):
-    return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
-@app.errorhandler(500)
-def internal_error(e):
-    return jsonify({'success': False, 'error': 'Internal server error'}), 500
 if __name__ == '__main__':
     import os
     port = int(os.environ.get("PORT", 7860))
-    print(f"Starting server on port {port}")
-    print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
-    app.run(debug=False, host='0.0.0.0', port=port)

 import io
 import httpx
 import uuid
 from datetime import datetime, timezone, timedelta
 from dotenv import load_dotenv
 import json
 # Load environment variables
 load_dotenv()
 app = Flask(__name__)
 CORS(app)
+# Initialize Gemini client
+client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY'))
 # In-memory storage for demo (in production, use a database)
 document_caches = {}
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
+            <p>Upload PDF documents once, ask questions forever with Gemini API caching</p>
         </div>
         <div class="main-content">
                     <div class="upload-area" id="uploadArea">
                         <div class="upload-icon">📄</div>
                         <p>Drag and drop your PDF file here, or click to select</p>
                         <input type="file" id="fileInput" class="file-input" accept=".pdf">
                         <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
                             Choose PDF File
                 <div id="cacheInfo" class="cache-info" style="display: none;">
                     <h3>✅ Document Cached Successfully!</h3>
+                    <p>Your PDF has been cached using Gemini API. You can now ask multiple questions without re-uploading.</p>
                     <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
                     <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
                 </div>
                 <div class="chat-container" id="chatContainer">
                 return;
             }
+            showLoading('Uploading PDF...');
             const formData = new FormData();
             formData.append('file', file);
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
                 }
             } catch (error) {
                 showError('Error uploading file: ' + error.message);
                     currentCacheId = result.cache_id;
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
                     addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
                 } else {
                     showError(result.error);
                 }
             } catch (error) {
                 showError('Error uploading from URL: ' + error.message);
         if file.filename == '':
             return jsonify({'success': False, 'error': 'No file selected'})
         # Read file content
         file_content = file.read()
+        file_io = io.BytesIO(file_content)
+        # Upload to Gemini File API
+        document = client.files.upload(
+            file=file_io,
+            config=dict(mime_type='application/pdf')
+        )
+        # Create cache with system instruction
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the correct model format as per documentation
+            model = 'models/gemini-2.0-flash-001'
             cache = client.caches.create(
+                model=model,
                 config=types.CreateCachedContentConfig(
+                    display_name='pdf document cache',
                     system_instruction=system_instruction,
+                    contents=[document],
+                    ttl="3600s",  # 1 hour TTL
                 )
             )
             # Store cache info
             cache_id = str(uuid.uuid4())
             document_caches[cache_id] = {
                 'cache_name': cache.name,
                 'document_name': file.filename,
                 'created_at': datetime.now().isoformat()
             }
             return jsonify({
                 'success': True,
                 'cache_id': cache_id,
+                'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
             })
         except Exception as cache_error:
+            # If caching fails due to small content, provide alternative approach
+            if "Cached content is too small" in str(cache_error):
                 return jsonify({
                     'success': False,
+                    'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
+                    'suggestion': 'Try uploading a longer document or combine multiple documents.'
                 })
             else:
+                raise cache_error
     except Exception as e:
+        return jsonify({'success': False, 'error': str(e)})
 @app.route('/upload-url', methods=['POST'])
 def upload_from_url():
         if not url:
             return jsonify({'success': False, 'error': 'No URL provided'})
+        # Download file from URL
+        response = httpx.get(url)
+        response.raise_for_status()
+        file_io = io.BytesIO(response.content)
+        # Upload to Gemini File API
+        document = client.files.upload(
+            file=file_io,
+            config=dict(mime_type='application/pdf')
+        )
+        # Create cache with system instruction
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the correct model format as per documentation
+            model = 'models/gemini-2.0-flash-001'
             cache = client.caches.create(
+                model=model,
                 config=types.CreateCachedContentConfig(
+                    display_name='pdf document cache',
                     system_instruction=system_instruction,
+                    contents=[document],
+                    ttl="3600s",  # 1 hour TTL
                 )
             )
             # Store cache info
             cache_id = str(uuid.uuid4())
             document_caches[cache_id] = {
                 'cache_name': cache.name,
+                'document_name': url,
                 'created_at': datetime.now().isoformat()
             }
             return jsonify({
                 'success': True,
                 'cache_id': cache_id,
+                'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
             })
         except Exception as cache_error:
+            # If caching fails due to small content, provide alternative approach
+            if "Cached content is too small" in str(cache_error):
                 return jsonify({
                     'success': False,
+                    'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
+                    'suggestion': 'Try uploading a longer document or combine multiple documents.'
                 })
             else:
+                raise cache_error
     except Exception as e:
+        return jsonify({'success': False, 'error': str(e)})
 @app.route('/ask', methods=['POST'])
 def ask_question():
             return jsonify({'success': False, 'error': 'Missing question or cache_id'})
         if cache_id not in document_caches:
+            return jsonify({'success': False, 'error': 'Cache not found'})
         cache_info = document_caches[cache_id]
         # Generate response using cached content with correct model format
+        response = client.models.generate_content(
+            model='models/gemini-2.0-flash-001',
+            contents=question,
+            config=types.GenerateContentConfig(
+                cached_content=cache_info['cache_name']
             )
+        )
+        return jsonify({
+            'success': True,
+            'answer': response.text
+        })
     except Exception as e:
+        return jsonify({'success': False, 'error': str(e)})
 @app.route('/caches', methods=['GET'])
 def list_caches():
         cache_info = document_caches[cache_id]
         # Delete from Gemini API
+        client.caches.delete(cache_info['cache_name'])
         # Remove from local storage
         del document_caches[cache_id]
     except Exception as e:
         return jsonify({'success': False, 'error': str(e)})
 if __name__ == '__main__':
     import os
     port = int(os.environ.get("PORT", 7860))
+    app.run(debug=True, host='0.0.0.0', port=port)