Omartificial-Intelligence-Space commited on
Commit
548d3b3
Β·
verified Β·
1 Parent(s): 0a6b1c6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -24
app.py CHANGED
@@ -21,8 +21,8 @@ if GOOGLE_API_KEY is None:
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
- # Configure Flask for larger file uploads
25
- app.config['MAX_CONTENT_LENGTH'] = 50 * 1024 * 1024 # 50MB max file size
26
 
27
  # Initialize Gemini client with correct API key
28
  client = genai.Client(api_key=GOOGLE_API_KEY)
@@ -330,7 +330,8 @@ HTML_TEMPLATE = """
330
  <div class="container">
331
  <div class="header">
332
  <h1>πŸ“š Smart Document Analysis Platform</h1>
333
- <p>Upload PDF documents once, ask questions forever with Gemini API caching</p>
 
334
  </div>
335
 
336
  <div class="main-content">
@@ -344,7 +345,8 @@ HTML_TEMPLATE = """
344
  <div class="upload-area" id="uploadArea">
345
  <div class="upload-icon">πŸ“„</div>
346
  <p>Drag and drop your PDF file here, or click to select</p>
347
- <p style="font-size: 0.9em; color: #666; margin-top: 10px;">Maximum file size: 50MB</p>
 
348
  <input type="file" id="fileInput" class="file-input" accept=".pdf">
349
  <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
350
  Choose PDF File
@@ -375,9 +377,12 @@ HTML_TEMPLATE = """
375
 
376
  <div id="cacheInfo" class="cache-info" style="display: none;">
377
  <h3>βœ… Document Cached Successfully!</h3>
378
- <p>Your PDF has been cached using Gemini API. You can now ask multiple questions without re-uploading.</p>
 
379
  <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
380
  <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
 
 
381
  </div>
382
 
383
  <div class="chat-container" id="chatContainer">
@@ -431,13 +436,20 @@ HTML_TEMPLATE = """
431
  return;
432
  }
433
 
434
- // Check file size on client side (50MB limit)
435
- if (file.size > 50 * 1024 * 1024) {
436
- showError('File too large. Maximum size is 50MB.');
 
437
  return;
438
  }
439
 
440
- showLoading('Uploading PDF...');
 
 
 
 
 
 
441
 
442
  const formData = new FormData();
443
  formData.append('file', file);
@@ -454,6 +466,8 @@ HTML_TEMPLATE = """
454
  currentCacheId = result.cache_id;
455
  document.getElementById('cacheId').textContent = result.cache_id;
456
  document.getElementById('tokenCount').textContent = result.token_count;
 
 
457
  document.getElementById('cacheInfo').style.display = 'block';
458
  showSuccess('PDF uploaded and cached successfully!');
459
 
@@ -461,6 +475,9 @@ HTML_TEMPLATE = """
461
  addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
462
  } else {
463
  showError(result.error);
 
 
 
464
  }
465
  } catch (error) {
466
  showError('Error uploading file: ' + error.message);
@@ -493,6 +510,8 @@ HTML_TEMPLATE = """
493
  currentCacheId = result.cache_id;
494
  document.getElementById('cacheId').textContent = result.cache_id;
495
  document.getElementById('tokenCount').textContent = result.token_count;
 
 
496
  document.getElementById('cacheInfo').style.display = 'block';
497
  showSuccess('PDF uploaded and cached successfully!');
498
 
@@ -500,6 +519,9 @@ HTML_TEMPLATE = """
500
  addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
501
  } else {
502
  showError(result.error);
 
 
 
503
  }
504
  } catch (error) {
505
  showError('Error uploading from URL: ' + error.message);
@@ -616,13 +638,20 @@ def upload_file():
616
  if file.filename == '':
617
  return jsonify({'success': False, 'error': 'No file selected'})
618
 
619
- # Check file size (limit to 50MB for PDFs)
620
  file.seek(0, 2) # Seek to end
621
  file_size = file.tell()
622
  file.seek(0) # Reset to beginning
623
 
624
- if file_size > 50 * 1024 * 1024: # 50MB limit
625
- return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'})
 
 
 
 
 
 
 
626
 
627
  # Read file content
628
  file_content = file.read()
@@ -649,8 +678,8 @@ def upload_file():
649
  try:
650
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
651
 
652
- # Use the correct model name (without 'models/' prefix)
653
- model = 'gemini-2.0-flash-001'
654
 
655
  cache = client.caches.create(
656
  model=model,
@@ -690,13 +719,55 @@ def upload_file():
690
 
691
  except Exception as cache_error:
692
  print(f"Cache error: {cache_error}")
693
- # If caching fails due to small content, provide alternative approach
694
- if "too small" in str(cache_error).lower():
 
695
  return jsonify({
696
  'success': False,
697
- 'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
698
- 'suggestion': 'Try uploading a longer document or combine multiple documents.'
 
699
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
700
  else:
701
  return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
702
 
@@ -726,8 +797,14 @@ def upload_from_url():
726
 
727
  # Check file size
728
  content_length = len(response.content)
729
- if content_length > 50 * 1024 * 1024: # 50MB limit
730
- return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'})
 
 
 
 
 
 
731
 
732
  file_io = io.BytesIO(response.content)
733
 
@@ -759,8 +836,8 @@ def upload_from_url():
759
  try:
760
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
761
 
762
- # Use the correct model name (without 'models/' prefix)
763
- model = 'gemini-2.0-flash-001'
764
 
765
  cache = client.caches.create(
766
  model=model,
@@ -833,7 +910,7 @@ def ask_question():
833
  # Generate response using cached content with correct model format
834
  try:
835
  response = client.models.generate_content(
836
- model='gemini-2.0-flash-001', # No 'models/' prefix here
837
  contents=question,
838
  config=types.GenerateContentConfig(
839
  cached_content=cache_info['cache_name']
@@ -906,7 +983,7 @@ def health_check():
906
  # Error handlers
907
  @app.errorhandler(413)
908
  def too_large(e):
909
- return jsonify({'success': False, 'error': 'File too large. Maximum size is 50MB.'}), 413
910
 
911
  @app.errorhandler(500)
912
  def internal_error(e):
 
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
+ # Configure Flask for large file uploads (200MB for substantial documents)
25
+ app.config['MAX_CONTENT_LENGTH'] = 200 * 1024 * 1024 # 200MB max file size
26
 
27
  # Initialize Gemini client with correct API key
28
  client = genai.Client(api_key=GOOGLE_API_KEY)
 
330
  <div class="container">
331
  <div class="header">
332
  <h1>πŸ“š Smart Document Analysis Platform</h1>
333
+ <p>Upload substantial PDF documents (5MB+ recommended) for efficient context caching with Gemini API</p>
334
+ <p style="font-size: 0.9em; opacity: 0.8; margin-top: 5px;">πŸ’‘ Context caching requires minimum token thresholds - larger documents work better</p>
335
  </div>
336
 
337
  <div class="main-content">
 
345
  <div class="upload-area" id="uploadArea">
346
  <div class="upload-icon">πŸ“„</div>
347
  <p>Drag and drop your PDF file here, or click to select</p>
348
+ <p style="font-size: 0.9em; color: #666; margin-top: 5px;">For context caching to work: Upload substantial documents (5MB+ recommended)</p>
349
+ <p style="font-size: 0.8em; color: #888; margin-top: 5px;">Maximum file size: 200MB</p>
350
  <input type="file" id="fileInput" class="file-input" accept=".pdf">
351
  <button class="upload-btn" onclick="document.getElementById('fileInput').click()">
352
  Choose PDF File
 
377
 
378
  <div id="cacheInfo" class="cache-info" style="display: none;">
379
  <h3>βœ… Document Cached Successfully!</h3>
380
+ <p>Your PDF has been cached using Gemini API context caching. You can now ask multiple questions efficiently without re-uploading.</p>
381
+ <p><strong>Document:</strong> <span id="documentName"></span></p>
382
  <p><strong>Cache ID:</strong> <span id="cacheId"></span></p>
383
  <p><strong>Tokens Cached:</strong> <span id="tokenCount"></span></p>
384
+ <p><strong>Model:</strong> <span id="modelUsed"></span></p>
385
+ <p style="font-size: 0.9em; margin-top: 10px; opacity: 0.8;">πŸ’‘ Cache valid for 1 hour. Subsequent questions will use cached content for faster responses.</p>
386
  </div>
387
 
388
  <div class="chat-container" id="chatContainer">
 
436
  return;
437
  }
438
 
439
+ // Check file size on client side (200MB limit)
440
+ const fileSizeMB = file.size / (1024 * 1024);
441
+ if (file.size > 200 * 1024 * 1024) {
442
+ showError(`File too large (${fileSizeMB.toFixed(1)}MB). Maximum size is 200MB.`);
443
  return;
444
  }
445
 
446
+ // Warn about small files that might not cache
447
+ if (file.size < 1024 * 1024) {
448
+ showError(`File might be too small (${fileSizeMB.toFixed(1)}MB) for context caching. For best results, upload documents with substantial text content (>5MB recommended).`);
449
+ return;
450
+ }
451
+
452
+ showLoading(`Uploading PDF (${fileSizeMB.toFixed(1)}MB)...`);
453
 
454
  const formData = new FormData();
455
  formData.append('file', file);
 
466
  currentCacheId = result.cache_id;
467
  document.getElementById('cacheId').textContent = result.cache_id;
468
  document.getElementById('tokenCount').textContent = result.token_count;
469
+ document.getElementById('documentName').textContent = result.document_name;
470
+ document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
471
  document.getElementById('cacheInfo').style.display = 'block';
472
  showSuccess('PDF uploaded and cached successfully!');
473
 
 
475
  addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
476
  } else {
477
  showError(result.error);
478
+ if (result.suggestion) {
479
+ showError(result.suggestion);
480
+ }
481
  }
482
  } catch (error) {
483
  showError('Error uploading file: ' + error.message);
 
510
  currentCacheId = result.cache_id;
511
  document.getElementById('cacheId').textContent = result.cache_id;
512
  document.getElementById('tokenCount').textContent = result.token_count;
513
+ document.getElementById('documentName').textContent = result.document_name;
514
+ document.getElementById('modelUsed').textContent = result.model_used || 'gemini-2.5-flash-001';
515
  document.getElementById('cacheInfo').style.display = 'block';
516
  showSuccess('PDF uploaded and cached successfully!');
517
 
 
519
  addMessage("I've analyzed your PDF document. What would you like to know about it?", 'ai');
520
  } else {
521
  showError(result.error);
522
+ if (result.suggestion) {
523
+ showError(result.suggestion);
524
+ }
525
  }
526
  } catch (error) {
527
  showError('Error uploading from URL: ' + error.message);
 
638
  if file.filename == '':
639
  return jsonify({'success': False, 'error': 'No file selected'})
640
 
641
+ # Check file size (limit to 200MB for large documents needed for caching)
642
  file.seek(0, 2) # Seek to end
643
  file_size = file.tell()
644
  file.seek(0) # Reset to beginning
645
 
646
+ # Convert to MB for display
647
+ file_size_mb = file_size / (1024 * 1024)
648
+
649
+ if file_size > 200 * 1024 * 1024: # 200MB limit
650
+ return jsonify({'success': False, 'error': f'File too large ({file_size_mb:.1f}MB). Maximum size is 200MB.'})
651
+
652
+ # Warn about small files that might not cache
653
+ if file_size < 1024 * 1024: # Less than 1MB
654
+ print(f"Warning: Small file uploaded ({file_size_mb:.1f}MB). May not meet minimum token requirements for caching.")
655
 
656
  # Read file content
657
  file_content = file.read()
 
678
  try:
679
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
680
 
681
+ # Use the correct model name - try 2.5 Flash first (lower token requirement)
682
+ model = 'gemini-2.5-flash-001'
683
 
684
  cache = client.caches.create(
685
  model=model,
 
719
 
720
  except Exception as cache_error:
721
  print(f"Cache error: {cache_error}")
722
+ # Provide more specific error handling for token requirements
723
+ error_msg = str(cache_error).lower()
724
+ if "too small" in error_msg or "minimum" in error_msg:
725
  return jsonify({
726
  'success': False,
727
+ 'error': f'Document content is insufficient for caching. Gemini 2.5 Flash requires minimum 1,024 tokens (~2-3 pages of text). Your document: {file.filename} ({file_size_mb:.1f}MB)',
728
+ 'suggestion': 'Upload a longer document with more text content (recommended: 5MB+ with substantial text).',
729
+ 'fallback': 'You can still use the document without caching by implementing direct file processing.'
730
  })
731
+ elif "invalid" in error_msg or "model" in error_msg:
732
+ # Try fallback to 2.0 Flash
733
+ try:
734
+ cache_fallback = client.caches.create(
735
+ model='gemini-2.0-flash-001',
736
+ config=types.CreateCachedContentConfig(
737
+ display_name=f'PDF document cache - {file.filename}',
738
+ system_instruction=system_instruction,
739
+ contents=[document],
740
+ ttl="3600s",
741
+ )
742
+ )
743
+ print(f"Fallback cache created with 2.0 Flash: {cache_fallback.name}")
744
+
745
+ # Store with fallback model info
746
+ cache_id = str(uuid.uuid4())
747
+ document_caches[cache_id] = {
748
+ 'cache_name': cache_fallback.name,
749
+ 'document_name': file.filename,
750
+ 'document_file_name': document.name,
751
+ 'model': 'gemini-2.0-flash-001',
752
+ 'created_at': datetime.now().isoformat()
753
+ }
754
+
755
+ token_count = 'Unknown'
756
+ if hasattr(cache_fallback, 'usage_metadata') and cache_fallback.usage_metadata:
757
+ if hasattr(cache_fallback.usage_metadata, 'total_token_count'):
758
+ token_count = cache_fallback.usage_metadata.total_token_count
759
+
760
+ return jsonify({
761
+ 'success': True,
762
+ 'cache_id': cache_id,
763
+ 'token_count': token_count,
764
+ 'document_name': file.filename,
765
+ 'model_used': 'gemini-2.0-flash-001'
766
+ })
767
+
768
+ except Exception as fallback_error:
769
+ print(f"Fallback cache error: {fallback_error}")
770
+ return jsonify({'success': False, 'error': f'Failed to create cache with both models: {str(fallback_error)}'})
771
  else:
772
  return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
773
 
 
797
 
798
  # Check file size
799
  content_length = len(response.content)
800
+ content_length_mb = content_length / (1024 * 1024)
801
+
802
+ if content_length > 200 * 1024 * 1024: # 200MB limit
803
+ return jsonify({'success': False, 'error': f'File too large ({content_length_mb:.1f}MB). Maximum size is 200MB.'})
804
+
805
+ # Warn about small files
806
+ if content_length < 1024 * 1024: # Less than 1MB
807
+ print(f"Warning: Small file from URL ({content_length_mb:.1f}MB). May not meet minimum token requirements for caching.")
808
 
809
  file_io = io.BytesIO(response.content)
810
 
 
836
  try:
837
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
838
 
839
+ # Use the correct model name - try 2.5 Flash first (lower token requirement)
840
+ model = 'gemini-2.5-flash-001'
841
 
842
  cache = client.caches.create(
843
  model=model,
 
910
  # Generate response using cached content with correct model format
911
  try:
912
  response = client.models.generate_content(
913
+ model='gemini-2.5-flash-001', # Use 2.5 Flash for consistency
914
  contents=question,
915
  config=types.GenerateContentConfig(
916
  cached_content=cache_info['cache_name']
 
983
  # Error handlers
984
  @app.errorhandler(413)
985
  def too_large(e):
986
+ return jsonify({'success': False, 'error': 'File too large. Maximum size is 200MB for substantial documents needed for context caching.'}), 413
987
 
988
  @app.errorhandler(500)
989
  def internal_error(e):