from flask import Flask, request, jsonify, render_template_string
from flask_cors import CORS
from google import genai
from google.genai import types
import os
import io
import httpx
import uuid
import tempfile
from datetime import datetime, timezone, timedelta
from dotenv import load_dotenv
import json
# Load environment variables
load_dotenv()
# Get Google API key from environment
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
if GOOGLE_API_KEY is None:
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
app = Flask(__name__)
CORS(app)
# Configure Flask for large file uploads (200MB for substantial documents)
app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024 # 200MB max file size
# Initialize Gemini client with correct API key
client = genai.Client(api_key=GOOGLE_API_KEY)
# In-memory storage for demo (in production, use a database)
document_caches = {}
user_sessions = {}
# HTML template for the web interface
HTML_TEMPLATE = """
Smart Document Analysis Platform
📚 Smart Document Analysis Platform
Upload substantial PDF documents for efficient context caching with Gemini API
Drag and drop your PDF file here, or click to select
For context caching to work: Upload substantial documents (5MB+ recommended)
Maximum file size: 200MB
Or provide a URL:
Processing your PDF... This may take a moment.
💬 Ask Questions
✅ Document Cached Successfully!
Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.
Document:
Cache ID:
Tokens Cached:
Model:
💡 Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.
👋 Hello! I'm ready to analyze your PDF documents. Upload a document to get started!
"""
@app.route('/')
def index():
return render_template_string(HTML_TEMPLATE)
@app.route('/upload', methods=['POST'])
def upload_file():
try:
if 'file' not in request.files:
return jsonify({'success': False, 'error': 'No file provided'})
file = request.files['file']
if file.filename == '':
return jsonify({'success': False, 'error': 'No file selected'})
# Check file size (limit to 200MB for large documents needed for caching)
file.seek(0, 2) # Seek to end
file_size = file.tell()
file.seek(0) # Reset to beginning
# Convert to MB for display
file_size_mb = file_size / (1024 * 1024)
if file_size > 200 * 1024 * 1024: # 200MB limit
return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
# Warn about small files that might not cache
if file_size < 1024 * 1024: # Less than 1MB
print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
# Read file content
file_content = file.read()
if not file_content:
return jsonify({'success': False, 'error': 'File is empty'})
# Create BytesIO from content as shown in documentation
doc_io = io.BytesIO(file_content)
# Upload to Gemini File API using the exact pattern from documentation
try:
document = client.files.upload(
file=doc_io,
config=dict(mime_type='application/pdf')
)
print(f"Document uploaded successfully: {document.name}")
except Exception as upload_error:
print(f"Upload error: {upload_error}")
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
# Create cache with system instruction using exact pattern from documentation
try:
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
# Use the model name format from documentation
model_name = "gemini-2.0-flash-001"
# Create cached content object exactly as shown in documentation
cache = client.caches.create(
model=model_name,
config=types.CreateCachedContentConfig(
system_instruction=system_instruction,
contents=[document], # Direct document reference as in docs
)
)
print(f"Cache created successfully: {cache.name}")
# Store cache info
cache_id = str(uuid.uuid4())
document_caches[cache_id] = {
'cache_name': cache.name,
'document_name': file.filename,
'document_file_name': document.name,
'created_at': datetime.now().isoformat()
}
# Get token count safely
token_count = 'Unknown'
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
if hasattr(cache.usage_metadata, 'total_token_count'):
token_count = cache.usage_metadata.total_token_count
elif hasattr(cache.usage_metadata, 'cached_token_count'):
token_count = cache.usage_metadata.cached_token_count
return jsonify({
'success': True,
'cache_id': cache_id,
'token_count': token_count,
'document_name': file.filename
})
except Exception as cache_error:
print(f"Cache error: {cache_error}")
# Provide more specific error handling for token requirements
error_msg = str(cache_error).lower()
if "too small" in error_msg or "minimum" in error_msg:
return jsonify({
'success': False,
'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Your document: {file.filename} ({file_size_mb:.1f}MB)',
'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).'
})
else:
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
except Exception as e:
print(f"General error: {e}")
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
@app.route('/upload-url', methods=['POST'])
def upload_from_url():
try:
data = request.get_json()
url = data.get('url')
if not url:
return jsonify({'success': False, 'error': 'No URL provided'})
# Download file from URL with timeout and size limits
try:
with httpx.Client(timeout=30.0) as client_http:
response = client_http.get(url)
response.raise_for_status()
# Check content type
content_type = response.headers.get('content-type', '').lower()
if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
# Check file size
content_length = len(response.content)
content_length_mb = content_length / (1024 * 1024)
if content_length > 200 * 1024 * 1024: # 200MB limit
return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
# Warn about small files
if content_length < 1024 * 1024: # Less than 1MB
print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
except httpx.TimeoutException:
return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
except httpx.HTTPError as e:
return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
# Extract filename from URL
filename = url.split('/')[-1]
if not filename.endswith('.pdf'):
filename += '.pdf'
# Create BytesIO from content as shown in documentation
doc_io = io.BytesIO(response.content)
# Upload to Gemini File API using the exact pattern from documentation
try:
document = client.files.upload(
file=doc_io,
config=dict(mime_type='application/pdf')
)
print(f"Document uploaded successfully: {document.name}")
except Exception as upload_error:
print(f"Upload error: {upload_error}")
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
# Create cache with system instruction using exact pattern from documentation
try:
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
# Use the model name format from documentation
model_name = "gemini-2.0-flash-001"
# Create cached content object exactly as shown in documentation
cache = client.caches.create(
model=model_name,
config=types.CreateCachedContentConfig(
system_instruction=system_instruction,
contents=[document], # Direct document reference as in docs
)
)
print(f"Cache created successfully: {cache.name}")
# Store cache info
cache_id = str(uuid.uuid4())
document_caches[cache_id] = {
'cache_name': cache.name,
'document_name': filename,
'source_url': url,
'created_at': datetime.now().isoformat()
}
# Get token count safely
token_count = 'Unknown'
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
if hasattr(cache.usage_metadata, 'total_token_count'):
token_count = cache.usage_metadata.total_token_count
elif hasattr(cache.usage_metadata, 'cached_token_count'):
token_count = cache.usage_metadata.cached_token_count
return jsonify({
'success': True,
'cache_id': cache_id,
'token_count': token_count,
'document_name': filename
})
except Exception as cache_error:
print(f"Cache error: {cache_error}")
# Provide more specific error handling for token requirements
error_msg = str(cache_error).lower()
if "too small" in error_msg or "minimum" in error_msg:
return jsonify({
'success': False,
'error': f'Document content is insufficient for caching. Gemini requires minimum token thresholds. Document from URL: {filename} ({content_length_mb:.1f}MB)',
'suggestion': 'Try a longer document with more text content (recommended: 5MB+ with substantial text).'
})
else:
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
except Exception as e:
print(f"General error: {e}")
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
@app.route('/ask', methods=['POST'])
def ask_question():
try:
data = request.get_json()
question = data.get('question')
cache_id = data.get('cache_id')
if not question or not cache_id:
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
if cache_id not in document_caches:
return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
cache_info = document_caches[cache_id]
# Generate response using cached content with correct model format
try:
response = client.models.generate_content(
model="gemini-2.0-flash-001", # Use model name format from documentation
contents=question,
config=types.GenerateContentConfig(
cached_content=cache_info['cache_name']
)
)
if response and response.text:
return jsonify({
'success': True,
'answer': response.text
})
else:
return jsonify({
'success': False,
'error': 'No response generated from the model'
})
except Exception as gen_error:
print(f"Generation error: {gen_error}")
return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
except Exception as e:
print(f"General error in ask_question: {e}")
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
@app.route('/caches', methods=['GET'])
def list_caches():
try:
caches = []
for cache_id, cache_info in document_caches.items():
caches.append({
'cache_id': cache_id,
'document_name': cache_info['document_name'],
'created_at': cache_info['created_at']
})
return jsonify({'success': True, 'caches': caches})
except Exception as e:
return jsonify({'success': False, 'error': str(e)})
@app.route('/cache/', methods=['DELETE'])
def delete_cache(cache_id):
try:
if cache_id not in document_caches:
return jsonify({'success': False, 'error': 'Cache not found'})
cache_info = document_caches[cache_id]
# Delete from Gemini API
try:
client.caches.delete(cache_info['cache_name'])
except Exception as delete_error:
print(f"Error deleting cache from Gemini API: {delete_error}")
# Continue to remove from local storage even if API deletion fails
# Remove from local storage
del document_caches[cache_id]
return jsonify({'success': True, 'message': 'Cache deleted successfully'})
except Exception as e:
return jsonify({'success': False, 'error': str(e)})
# Health check endpoint
@app.route('/health', methods=['GET'])
def health_check():
return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
# Error handlers
@app.errorhandler(413)
def too_large(e):
return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
@app.errorhandler(500)
def internal_error(e):
return jsonify({'success': False, 'error': 'Internal server error'}), 500
if __name__ == '__main__':
import os
port = int(os.environ.get("PORT", 7860))
print(f"Starting server on port {port}")
print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
app.run(debug=False, host='0.0.0.0', port=port)