Spaces:

Omartificial-Intelligence-Space
/

context-caching-gemini-pdf-qa

Running

App Files Files Community

Omartificial-Intelligence-Space commited on 1 day ago

Commit

a819b46

verified ·

1 Parent(s): 1ed7d4b

Update app.py

Browse files

Files changed (1) hide show

app.py +40 -99

app.py CHANGED Viewed

@@ -6,6 +6,7 @@ import os
 import io
 import httpx
 import uuid
 from datetime import datetime, timezone, timedelta
 from dotenv import load_dotenv
 import json
@@ -330,7 +331,7 @@ HTML_TEMPLATE = """
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
-            <p>Upload substantial PDF documents (5MB+ recommended) for efficient context caching with Gemini API</p>
             <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">💡 Context caching requires minimum token thresholds - larger documents work better</p>
         </div>
@@ -467,7 +468,7 @@ HTML_TEMPLATE = """
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('documentName').textContent = result.document_name;
-                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -511,7 +512,7 @@ HTML_TEMPLATE = """
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('documentName').textContent = result.document_name;
-                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
@@ -652,48 +653,39 @@ def upload_file():
         # Warn about small files that might not cache
         if file_size < 1024 * 1024:  # Less than 1MB
             print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
         # Read file content
         file_content = file.read()
         if not file_content:
             return jsonify({'success': False, 'error': 'File is empty'})
-        file_io = io.BytesIO(file_content)
-        # Upload to Gemini File API
         try:
-            # Save the BytesIO to a temporary file since the API expects a file path
-            import tempfile
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
-                temp_file.write(file_content)
-                temp_file_path = temp_file.name
-            # Upload using file path
-            document = client.files.upload(path=temp_file_path)
             print(f"Document uploaded successfully: {document.name}")
-            # Clean up temporary file
-            import os
-            os.unlink(temp_file_path)
         except Exception as upload_error:
             print(f"Upload error: {upload_error}")
             return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
-        # Create cache with system instruction
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the correct model name - try 2.5 Flash first (lower token requirement)
-            model = 'gemini-2.5-flash-001'
             cache = client.caches.create(
-                model=model,
                 config=types.CreateCachedContentConfig(
-                    display_name=f'PDF document cache - {file.filename}',
                     system_instruction=system_instruction,
-                    contents=[document],
-                    ttl="3600s",  # 1 hour TTL
                 )
             )
@@ -730,50 +722,9 @@ def upload_file():
             if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
-                    'error': f'Document content is insufficient for caching. Gemini 2.5 Flash requires minimum 1,024 tokens (~2-3 pages of text). Your document: {file.filename} ({file_size_mb:.1f}MB)',
-                    'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).',
-                    'fallback': 'You can still use the document without caching by implementing direct file processing.'
                 })
-            elif "invalid" in error_msg or "model" in error_msg:
-                # Try fallback to 2.0 Flash
-                try:
-                    cache_fallback = client.caches.create(
-                        model='gemini-2.0-flash-001',
-                        config=types.CreateCachedContentConfig(
-                            display_name=f'PDF document cache - {file.filename}',
-                            system_instruction=system_instruction,
-                            contents=[document],
-                            ttl="3600s",
-                        )
-                    )
-                    print(f"Fallback cache created with 2.0 Flash: {cache_fallback.name}")
-                    # Store with fallback model info
-                    cache_id = str(uuid.uuid4())
-                    document_caches[cache_id] = {
-                        'cache_name': cache_fallback.name,
-                        'document_name': file.filename,
-                        'document_file_name': document.name,
-                        'model': 'gemini-2.0-flash-001',
-                        'created_at': datetime.now().isoformat()
-                    }
-                    token_count = 'Unknown'
-                    if hasattr(cache_fallback, 'usage_metadata') and cache_fallback.usage_metadata:
-                        if hasattr(cache_fallback.usage_metadata, 'total_token_count'):
-                            token_count = cache_fallback.usage_metadata.total_token_count
-                    return jsonify({
-                        'success': True,
-                        'cache_id': cache_id,
-                        'token_count': token_count,
-                        'document_name': file.filename,
-                        'model_used': 'gemini-2.0-flash-001'
-                    })
-                except Exception as fallback_error:
-                    print(f"Fallback cache error: {fallback_error}")
-                    return jsonify({'success': False, 'error': f'Failed to create cache with both models: {str(fallback_error)}'})
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
@@ -812,8 +763,6 @@ def upload_from_url():
                 if content_length < 1024 * 1024:  # Less than 1MB
                     print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
-                file_io = io.BytesIO(response.content)
         except httpx.TimeoutException:
             return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
         except httpx.HTTPError as e:
@@ -824,41 +773,33 @@ def upload_from_url():
         if not filename.endswith('.pdf'):
             filename += '.pdf'
-        # Upload to Gemini File API
         try:
-            # Save the BytesIO to a temporary file since the API expects a file path
-            import tempfile
-            import os
-            with tempfile.NamedTemporaryFile(delete=False, suffix='.pdf') as temp_file:
-                temp_file.write(response.content)
-                temp_file_path = temp_file.name
-            # Upload using file path
-            document = client.files.upload(path=temp_file_path)
             print(f"Document uploaded successfully: {document.name}")
-            # Clean up temporary file
-            os.unlink(temp_file_path)
         except Exception as upload_error:
             print(f"Upload error: {upload_error}")
             return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
-        # Create cache with system instruction
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
-            # Use the correct model name - try 2.5 Flash first (lower token requirement)
-            model = 'gemini-2.5-flash-001'
             cache = client.caches.create(
-                model=model,
                 config=types.CreateCachedContentConfig(
-                    display_name=f'PDF document cache - {filename}',
                     system_instruction=system_instruction,
-                    contents=[document],
-                    ttl="3600s",  # 1 hour TTL
                 )
             )
@@ -869,7 +810,6 @@ def upload_from_url():
             document_caches[cache_id] = {
                 'cache_name': cache.name,
                 'document_name': filename,
-                'document_file_name': document.name,
                 'source_url': url,
                 'created_at': datetime.now().isoformat()
             }
@@ -891,12 +831,13 @@ def upload_from_url():
         except Exception as cache_error:
             print(f"Cache error: {cache_error}")
-            # If caching fails due to small content, provide alternative approach
-            if "too small" in str(cache_error).lower():
                 return jsonify({
                     'success': False,
-                    'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
-                    'suggestion': 'Try uploading a longer document or combine multiple documents.'
                 })
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
@@ -923,7 +864,7 @@ def ask_question():
         # Generate response using cached content with correct model format
         try:
             response = client.models.generate_content(
-                model='gemini-2.5-flash-001',  # Use 2.5 Flash for consistency
                 contents=question,
                 config=types.GenerateContentConfig(
                     cached_content=cache_info['cache_name']

 import io
 import httpx
 import uuid
+import tempfile
 from datetime import datetime, timezone, timedelta
 from dotenv import load_dotenv
 import json
     <div class="container">
         <div class="header">
             <h1>📚 Smart Document Analysis Platform</h1>
+            <p>Upload substantial PDF documents for efficient context caching with Gemini API</p>
             <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">💡 Context caching requires minimum token thresholds - larger documents work better</p>
         </div>
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('documentName').textContent = result.document_name;
+                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
                     document.getElementById('cacheId').textContent = result.cache_id;
                     document.getElementById('tokenCount').textContent = result.token_count;
                     document.getElementById('documentName').textContent = result.document_name;
+                    document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.0-flash-001';
                     document.getElementById('cacheInfo').style.display = 'block';
                     showSuccess('PDF uploaded and cached successfully!');
         # Warn about small files that might not cache
         if file_size < 1024 * 1024:  # Less than 1MB
             print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
         # Read file content
         file_content = file.read()
         if not file_content:
             return jsonify({'success': False, 'error': 'File is empty'})
+        # Create BytesIO from content as shown in documentation
+        doc_io = io.BytesIO(file_content)
+        # Upload to Gemini File API using the exact pattern from documentation
         try:
+            document = client.files.upload(
+                file=doc_io,
+                config=dict(mime_type='application/pdf')
+            )
             print(f"Document uploaded successfully: {document.name}")
         except Exception as upload_error:
             print(f"Upload error: {upload_error}")
             return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
+        # Create cache with system instruction using exact pattern from documentation
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the model name format from documentation
+            model_name = "gemini-2.0-flash-001"
+            # Create cached content object exactly as shown in documentation
             cache = client.caches.create(
+                model=model_name,
                 config=types.CreateCachedContentConfig(
                     system_instruction=system_instruction,
+                    contents=[document],  # Direct document reference as in docs
                 )
             )
             if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
+                    'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Your document: {file.filename} ({file_size_mb:.1f}MB)',
+                    'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).'
                 })
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
                 if content_length < 1024 * 1024:  # Less than 1MB
                     print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
         except httpx.TimeoutException:
             return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
         except httpx.HTTPError as e:
         if not filename.endswith('.pdf'):
             filename += '.pdf'
+        # Create BytesIO from content as shown in documentation
+        doc_io = io.BytesIO(response.content)
+        # Upload to Gemini File API using the exact pattern from documentation
         try:
+            document = client.files.upload(
+                file=doc_io,
+                config=dict(mime_type='application/pdf')
+            )
             print(f"Document uploaded successfully: {document.name}")
         except Exception as upload_error:
             print(f"Upload error: {upload_error}")
             return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
+        # Create cache with system instruction using exact pattern from documentation
         try:
             system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
+            # Use the model name format from documentation
+            model_name = "gemini-2.0-flash-001"
+            # Create cached content object exactly as shown in documentation
             cache = client.caches.create(
+                model=model_name,
                 config=types.CreateCachedContentConfig(
                     system_instruction=system_instruction,
+                    contents=[document],  # Direct document reference as in docs
                 )
             )
             document_caches[cache_id] = {
                 'cache_name': cache.name,
                 'document_name': filename,
                 'source_url': url,
                 'created_at': datetime.now().isoformat()
             }
         except Exception as cache_error:
             print(f"Cache error: {cache_error}")
+            # Provide more specific error handling for token requirements
+            error_msg = str(cache_error).lower()
+            if "too small" in error_msg or "minimum" in error_msg:
                 return jsonify({
                     'success': False,
+                    'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Document from URL: {filename} ({content_length_mb:.1f}MB)',
+                    'suggestion': 'Try a longer document with more text content (recommended: 5MB+ with substantial text).'
                 })
             else:
                 return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
         # Generate response using cached content with correct model format
         try:
             response = client.models.generate_content(
+                model="gemini-2.0-flash-001",  # Use model name format from documentation
                 contents=question,
                 config=types.GenerateContentConfig(
                     cached_content=cache_info['cache_name']