Omartificial-Intelligence-Space commited on
Commit
7478cb5
·
verified ·
1 Parent(s): 7bb7149

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +160 -53
app.py CHANGED
@@ -9,23 +9,20 @@ import uuid
9
  from datetime import datetime, timezone, timedelta
10
  from dotenv import load_dotenv
11
  import json
12
- from huggingface_hub import HfApi
13
 
14
- # Load Hugging Face token from the environment variable
 
 
 
15
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
16
  if GOOGLE_API_KEY is None:
17
  raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
18
 
19
-
20
- hf_api = HfApi(
21
- token= GOOGLE_API_KEY, # Token is not persisted on the machine.
22
- )
23
-
24
  app = Flask(__name__)
25
  CORS(app)
26
 
27
- # Initialize Gemini client
28
- client = genai.Client(api_key=hf_api)
29
 
30
  # In-memory storage for demo (in production, use a database)
31
  document_caches = {}
@@ -609,60 +606,93 @@ def upload_file():
609
  if file.filename == '':
610
  return jsonify({'success': False, 'error': 'No file selected'})
611
 
 
 
 
 
 
 
 
 
612
  # Read file content
613
  file_content = file.read()
 
 
 
614
  file_io = io.BytesIO(file_content)
615
 
616
  # Upload to Gemini File API
617
- document = client.files.upload(
618
- file=file_io,
619
- config=dict(mime_type='application/pdf')
620
- )
 
 
 
 
 
 
 
 
621
 
622
  # Create cache with system instruction
623
  try:
624
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
625
 
626
- # Use the correct model format as per documentation
627
- model = 'models/gemini-2.0-flash-001'
628
 
629
  cache = client.caches.create(
630
  model=model,
631
  config=types.CreateCachedContentConfig(
632
- display_name='pdf document cache',
633
  system_instruction=system_instruction,
634
  contents=[document],
635
  ttl="3600s", # 1 hour TTL
636
  )
637
  )
638
 
 
 
639
  # Store cache info
640
  cache_id = str(uuid.uuid4())
641
  document_caches[cache_id] = {
642
  'cache_name': cache.name,
643
  'document_name': file.filename,
 
644
  'created_at': datetime.now().isoformat()
645
  }
646
 
 
 
 
 
 
 
 
 
647
  return jsonify({
648
  'success': True,
649
  'cache_id': cache_id,
650
- 'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
 
651
  })
652
 
653
  except Exception as cache_error:
 
654
  # If caching fails due to small content, provide alternative approach
655
- if "Cached content is too small" in str(cache_error):
656
  return jsonify({
657
  'success': False,
658
- 'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
659
  'suggestion': 'Try uploading a longer document or combine multiple documents.'
660
  })
661
  else:
662
- raise cache_error
663
 
664
  except Exception as e:
665
- return jsonify({'success': False, 'error': str(e)})
 
666
 
667
  @app.route('/upload-url', methods=['POST'])
668
  def upload_from_url():
@@ -673,62 +703,107 @@ def upload_from_url():
673
  if not url:
674
  return jsonify({'success': False, 'error': 'No URL provided'})
675
 
676
- # Download file from URL
677
- response = httpx.get(url)
678
- response.raise_for_status()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
679
 
680
- file_io = io.BytesIO(response.content)
 
 
 
681
 
682
  # Upload to Gemini File API
683
- document = client.files.upload(
684
- file=file_io,
685
- config=dict(mime_type='application/pdf')
686
- )
 
 
 
 
 
 
 
 
687
 
688
  # Create cache with system instruction
689
  try:
690
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
691
 
692
- # Use the correct model format as per documentation
693
- model = 'models/gemini-2.0-flash-001'
694
 
695
  cache = client.caches.create(
696
  model=model,
697
  config=types.CreateCachedContentConfig(
698
- display_name='pdf document cache',
699
  system_instruction=system_instruction,
700
  contents=[document],
701
  ttl="3600s", # 1 hour TTL
702
  )
703
  )
704
 
 
 
705
  # Store cache info
706
  cache_id = str(uuid.uuid4())
707
  document_caches[cache_id] = {
708
  'cache_name': cache.name,
709
- 'document_name': url,
 
 
710
  'created_at': datetime.now().isoformat()
711
  }
712
 
 
 
 
 
 
 
 
 
713
  return jsonify({
714
  'success': True,
715
  'cache_id': cache_id,
716
- 'token_count': getattr(cache.usage_metadata, 'cached_token_count', 'Unknown')
 
717
  })
718
 
719
  except Exception as cache_error:
 
720
  # If caching fails due to small content, provide alternative approach
721
- if "Cached content is too small" in str(cache_error):
722
  return jsonify({
723
  'success': False,
724
- 'error': 'PDF is too small for caching. Please upload a larger document (minimum 4,096 tokens required).',
725
  'suggestion': 'Try uploading a longer document or combine multiple documents.'
726
  })
727
  else:
728
- raise cache_error
729
 
730
  except Exception as e:
731
- return jsonify({'success': False, 'error': str(e)})
 
732
 
733
  @app.route('/ask', methods=['POST'])
734
  def ask_question():
@@ -741,26 +816,38 @@ def ask_question():
741
  return jsonify({'success': False, 'error': 'Missing question or cache_id'})
742
 
743
  if cache_id not in document_caches:
744
- return jsonify({'success': False, 'error': 'Cache not found'})
745
 
746
  cache_info = document_caches[cache_id]
747
 
748
  # Generate response using cached content with correct model format
749
- response = client.models.generate_content(
750
- model='models/gemini-2.0-flash-001',
751
- contents=question,
752
- config=types.GenerateContentConfig(
753
- cached_content=cache_info['cache_name']
 
 
754
  )
755
- )
756
-
757
- return jsonify({
758
- 'success': True,
759
- 'answer': response.text
760
- })
 
 
 
 
 
 
 
 
 
761
 
762
  except Exception as e:
763
- return jsonify({'success': False, 'error': str(e)})
 
764
 
765
  @app.route('/caches', methods=['GET'])
766
  def list_caches():
@@ -787,7 +874,11 @@ def delete_cache(cache_id):
787
  cache_info = document_caches[cache_id]
788
 
789
  # Delete from Gemini API
790
- client.caches.delete(cache_info['cache_name'])
 
 
 
 
791
 
792
  # Remove from local storage
793
  del document_caches[cache_id]
@@ -797,7 +888,23 @@ def delete_cache(cache_id):
797
  except Exception as e:
798
  return jsonify({'success': False, 'error': str(e)})
799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
800
  if __name__ == '__main__':
801
  import os
802
  port = int(os.environ.get("PORT", 7860))
803
- app.run(debug=True, host='0.0.0.0', port=port)
 
 
 
9
  from datetime import datetime, timezone, timedelta
10
  from dotenv import load_dotenv
11
  import json
 
12
 
13
+ # Load environment variables
14
+ load_dotenv()
15
+
16
+ # Get Google API key from environment
17
  GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
18
  if GOOGLE_API_KEY is None:
19
  raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
20
 
 
 
 
 
 
21
  app = Flask(__name__)
22
  CORS(app)
23
 
24
+ # Initialize Gemini client with correct API key
25
+ client = genai.Client(api_key=GOOGLE_API_KEY)
26
 
27
  # In-memory storage for demo (in production, use a database)
28
  document_caches = {}
 
606
  if file.filename == '':
607
  return jsonify({'success': False, 'error': 'No file selected'})
608
 
609
+ # Check file size (limit to 10MB for example)
610
+ file.seek(0, 2) # Seek to end
611
+ file_size = file.tell()
612
+ file.seek(0) # Reset to beginning
613
+
614
+ if file_size > 10 * 1024 * 1024: # 10MB limit
615
+ return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
616
+
617
  # Read file content
618
  file_content = file.read()
619
+ if not file_content:
620
+ return jsonify({'success': False, 'error': 'File is empty'})
621
+
622
  file_io = io.BytesIO(file_content)
623
 
624
  # Upload to Gemini File API
625
+ try:
626
+ document = client.files.upload(
627
+ file=file_io,
628
+ config=types.FileUploadConfig(
629
+ mime_type='application/pdf',
630
+ display_name=file.filename
631
+ )
632
+ )
633
+ print(f"Document uploaded successfully: {document.name}")
634
+ except Exception as upload_error:
635
+ print(f"Upload error: {upload_error}")
636
+ return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
637
 
638
  # Create cache with system instruction
639
  try:
640
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
641
 
642
+ # Use the correct model name (without 'models/' prefix)
643
+ model = 'gemini-2.0-flash-001'
644
 
645
  cache = client.caches.create(
646
  model=model,
647
  config=types.CreateCachedContentConfig(
648
+ display_name=f'PDF document cache - {file.filename}',
649
  system_instruction=system_instruction,
650
  contents=[document],
651
  ttl="3600s", # 1 hour TTL
652
  )
653
  )
654
 
655
+ print(f"Cache created successfully: {cache.name}")
656
+
657
  # Store cache info
658
  cache_id = str(uuid.uuid4())
659
  document_caches[cache_id] = {
660
  'cache_name': cache.name,
661
  'document_name': file.filename,
662
+ 'document_file_name': document.name,
663
  'created_at': datetime.now().isoformat()
664
  }
665
 
666
+ # Get token count safely
667
+ token_count = 'Unknown'
668
+ if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
669
+ if hasattr(cache.usage_metadata, 'total_token_count'):
670
+ token_count = cache.usage_metadata.total_token_count
671
+ elif hasattr(cache.usage_metadata, 'cached_token_count'):
672
+ token_count = cache.usage_metadata.cached_token_count
673
+
674
  return jsonify({
675
  'success': True,
676
  'cache_id': cache_id,
677
+ 'token_count': token_count,
678
+ 'document_name': file.filename
679
  })
680
 
681
  except Exception as cache_error:
682
+ print(f"Cache error: {cache_error}")
683
  # If caching fails due to small content, provide alternative approach
684
+ if "too small" in str(cache_error).lower():
685
  return jsonify({
686
  'success': False,
687
+ 'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
688
  'suggestion': 'Try uploading a longer document or combine multiple documents.'
689
  })
690
  else:
691
+ return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
692
 
693
  except Exception as e:
694
+ print(f"General error: {e}")
695
+ return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
696
 
697
  @app.route('/upload-url', methods=['POST'])
698
  def upload_from_url():
 
703
  if not url:
704
  return jsonify({'success': False, 'error': 'No URL provided'})
705
 
706
+ # Download file from URL with timeout and size limits
707
+ try:
708
+ with httpx.Client(timeout=30.0) as client_http:
709
+ response = client_http.get(url)
710
+ response.raise_for_status()
711
+
712
+ # Check content type
713
+ content_type = response.headers.get('content-type', '').lower()
714
+ if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
715
+ return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
716
+
717
+ # Check file size
718
+ content_length = len(response.content)
719
+ if content_length > 10 * 1024 * 1024: # 10MB limit
720
+ return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
721
+
722
+ file_io = io.BytesIO(response.content)
723
+
724
+ except httpx.TimeoutException:
725
+ return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
726
+ except httpx.HTTPError as e:
727
+ return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
728
 
729
+ # Extract filename from URL
730
+ filename = url.split('/')[-1]
731
+ if not filename.endswith('.pdf'):
732
+ filename += '.pdf'
733
 
734
  # Upload to Gemini File API
735
+ try:
736
+ document = client.files.upload(
737
+ file=file_io,
738
+ config=types.FileUploadConfig(
739
+ mime_type='application/pdf',
740
+ display_name=filename
741
+ )
742
+ )
743
+ print(f"Document uploaded successfully: {document.name}")
744
+ except Exception as upload_error:
745
+ print(f"Upload error: {upload_error}")
746
+ return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
747
 
748
  # Create cache with system instruction
749
  try:
750
  system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
751
 
752
+ # Use the correct model name (without 'models/' prefix)
753
+ model = 'gemini-2.0-flash-001'
754
 
755
  cache = client.caches.create(
756
  model=model,
757
  config=types.CreateCachedContentConfig(
758
+ display_name=f'PDF document cache - {filename}',
759
  system_instruction=system_instruction,
760
  contents=[document],
761
  ttl="3600s", # 1 hour TTL
762
  )
763
  )
764
 
765
+ print(f"Cache created successfully: {cache.name}")
766
+
767
  # Store cache info
768
  cache_id = str(uuid.uuid4())
769
  document_caches[cache_id] = {
770
  'cache_name': cache.name,
771
+ 'document_name': filename,
772
+ 'document_file_name': document.name,
773
+ 'source_url': url,
774
  'created_at': datetime.now().isoformat()
775
  }
776
 
777
+ # Get token count safely
778
+ token_count = 'Unknown'
779
+ if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
780
+ if hasattr(cache.usage_metadata, 'total_token_count'):
781
+ token_count = cache.usage_metadata.total_token_count
782
+ elif hasattr(cache.usage_metadata, 'cached_token_count'):
783
+ token_count = cache.usage_metadata.cached_token_count
784
+
785
  return jsonify({
786
  'success': True,
787
  'cache_id': cache_id,
788
+ 'token_count': token_count,
789
+ 'document_name': filename
790
  })
791
 
792
  except Exception as cache_error:
793
+ print(f"Cache error: {cache_error}")
794
  # If caching fails due to small content, provide alternative approach
795
+ if "too small" in str(cache_error).lower():
796
  return jsonify({
797
  'success': False,
798
+ 'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
799
  'suggestion': 'Try uploading a longer document or combine multiple documents.'
800
  })
801
  else:
802
+ return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
803
 
804
  except Exception as e:
805
+ print(f"General error: {e}")
806
+ return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
807
 
808
  @app.route('/ask', methods=['POST'])
809
  def ask_question():
 
816
  return jsonify({'success': False, 'error': 'Missing question or cache_id'})
817
 
818
  if cache_id not in document_caches:
819
+ return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
820
 
821
  cache_info = document_caches[cache_id]
822
 
823
  # Generate response using cached content with correct model format
824
+ try:
825
+ response = client.models.generate_content(
826
+ model='gemini-2.0-flash-001', # No 'models/' prefix here
827
+ contents=question,
828
+ config=types.GenerateContentConfig(
829
+ cached_content=cache_info['cache_name']
830
+ )
831
  )
832
+
833
+ if response and response.text:
834
+ return jsonify({
835
+ 'success': True,
836
+ 'answer': response.text
837
+ })
838
+ else:
839
+ return jsonify({
840
+ 'success': False,
841
+ 'error': 'No response generated from the model'
842
+ })
843
+
844
+ except Exception as gen_error:
845
+ print(f"Generation error: {gen_error}")
846
+ return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
847
 
848
  except Exception as e:
849
+ print(f"General error in ask_question: {e}")
850
+ return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
851
 
852
  @app.route('/caches', methods=['GET'])
853
  def list_caches():
 
874
  cache_info = document_caches[cache_id]
875
 
876
  # Delete from Gemini API
877
+ try:
878
+ client.caches.delete(cache_info['cache_name'])
879
+ except Exception as delete_error:
880
+ print(f"Error deleting cache from Gemini API: {delete_error}")
881
+ # Continue to remove from local storage even if API deletion fails
882
 
883
  # Remove from local storage
884
  del document_caches[cache_id]
 
888
  except Exception as e:
889
  return jsonify({'success': False, 'error': str(e)})
890
 
891
+ # Health check endpoint
892
+ @app.route('/health', methods=['GET'])
893
+ def health_check():
894
+ return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
895
+
896
+ # Error handlers
897
+ @app.errorhandler(413)
898
+ def too_large(e):
899
+ return jsonify({'success': False, 'error': 'File too large'}), 413
900
+
901
+ @app.errorhandler(500)
902
+ def internal_error(e):
903
+ return jsonify({'success': False, 'error': 'Internal server error'}), 500
904
+
905
  if __name__ == '__main__':
906
  import os
907
  port = int(os.environ.get("PORT", 7860))
908
+ print(f"Starting server on port {port}")
909
+ print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
910
+ app.run(debug=False, host='0.0.0.0', port=port)