Smart Document Analysis Platform

from flask import Flask, request, jsonify, render_template_string from flask_cors import CORS from google import genai from google.genai import types import os import io import httpx import uuid from datetime import datetime, timezone, timedelta from dotenv import load_dotenv import json # Load environment variables load_dotenv() app = Flask(__name__) CORS(app) # Initialize Gemini client client = genai.Client(api_key=os.getenv('GOOGLE_API_KEY')) # In-memory storage for demo (in production, use a database) document_caches = {} user_sessions = {} # HTML template for the web interface HTML_TEMPLATE = """ Smart Document Analysis Platform

📤 Upload PDF Document

📄

Drag and drop your PDF file here, or click to select

Or provide a URL:

💬 Ask Questions

👋 Hello! I'm ready to analyze your PDF documents. Upload a document to get started!

""" @app.route('/') def index(): return render_template_string(HTML_TEMPLATE) @app.route('/upload', methods=['POST']) def upload_file(): try: if 'file' not in request.files: return jsonify({'success': False, 'error': 'No file provided'}) file = request.files['file'] if file.filename == '': return jsonify({'success': False, 'error': 'No file selected'}) # Read file content file_content = file.read() file_io = io.BytesIO(file_content) # Upload to Gemini File API document = client.files.upload( file=file_io, config=dict(mime_type='application/pdf') ) # Create cache with system instruction try: system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses." # Use the correct model format as per documentation model = 'models/gemini-2.0-flash-001' cache = client.caches.create( model=model, config=types.CreateCachedContentConfig( display_name='pdf document cache', system_instruction=system_instruction, contents=[document], ttl="3600s", # 1 hour TTL ) ) # Store cache info cache_id = str(uuid.uuid4()) document_caches[cache_id] = { 'cache_name': cache.name, 'document_name': file.filename, 'created_at': datetime.now().isoformat() } return jsonify({ 'success': True, 'cache_id': cache_id, 'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown') }) except Exception as cache_error: # If caching fails due to small content, provide alternative approach if "Cached content is too small" in str(cache_error): return jsonify({ 'success': False, 'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).', 'suggestion': 'Try uploading a longer document or combine multiple documents.' }) else: raise cache_error except Exception as e: return jsonify({'success': False, 'error': str(e)}) @app.route('/upload-url', methods=['POST']) def upload_from_url(): try: data = request.get_json() url = data.get('url') if not url: return jsonify({'success': False, 'error': 'No URL provided'}) # Download file from URL response = httpx.get(url) response.raise_for_status() file_io = io.BytesIO(response.content) # Upload to Gemini File API document = client.files.upload( file=file_io, config=dict(mime_type='application/pdf') ) # Create cache with system instruction try: system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses." # Use the correct model format as per documentation model = 'models/gemini-2.0-flash-001' cache = client.caches.create( model=model, config=types.CreateCachedContentConfig( display_name='pdf document cache', system_instruction=system_instruction, contents=[document], ttl="3600s", # 1 hour TTL ) ) # Store cache info cache_id = str(uuid.uuid4()) document_caches[cache_id] = { 'cache_name': cache.name, 'document_name': url, 'created_at': datetime.now().isoformat() } return jsonify({ 'success': True, 'cache_id': cache_id, 'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown') }) except Exception as cache_error: # If caching fails due to small content, provide alternative approach if "Cached content is too small" in str(cache_error): return jsonify({ 'success': False, 'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).', 'suggestion': 'Try uploading a longer document or combine multiple documents.' }) else: raise cache_error except Exception as e: return jsonify({'success': False, 'error': str(e)}) @app.route('/ask', methods=['POST']) def ask_question(): try: data = request.get_json() question = data.get('question') cache_id = data.get('cache_id') if not question or not cache_id: return jsonify({'success': False, 'error': 'Missing question or cache_id'}) if cache_id not in document_caches: return jsonify({'success': False, 'error': 'Cache not found'}) cache_info = document_caches[cache_id] # Generate response using cached content with correct model format response = client.models.generate_content( model='models/gemini-2.0-flash-001', contents=question, config=types.GenerateContentConfig( cached_content=cache_info['cache_name'] ) ) return jsonify({ 'success': True, 'answer': response.text }) except Exception as e: return jsonify({'success': False, 'error': str(e)}) @app.route('/caches', methods=['GET']) def list_caches(): try: caches = [] for cache_id, cache_info in document_caches.items(): caches.append({ 'cache_id': cache_id, 'document_name': cache_info['document_name'], 'created_at': cache_info['created_at'] }) return jsonify({'success': True, 'caches': caches}) except Exception as e: return jsonify({'success': False, 'error': str(e)}) @app.route('/cache/', methods=['DELETE']) def delete_cache(cache_id): try: if cache_id not in document_caches: return jsonify({'success': False, 'error': 'Cache not found'}) cache_info = document_caches[cache_id] # Delete from Gemini API client.caches.delete(cache_info['cache_name']) # Remove from local storage del document_caches[cache_id] return jsonify({'success': True, 'message': 'Cache deleted successfully'}) except Exception as e: return jsonify({'success': False, 'error': str(e)}) if __name__ == '__main__': import os port = int(os.environ.get("PORT", 7860)) app.run(debug=True, host='0.0.0.0', port=port)

📚 Smart Document Analysis Platform

Or provide a URL:

✅ Document Cached Successfully!