Update app.py
Browse files
app.py
CHANGED
@@ -9,23 +9,20 @@ import uuid
|
|
9 |
from datetime import datetime, timezone, timedelta
|
10 |
from dotenv import load_dotenv
|
11 |
import json
|
12 |
-
from huggingface_hub import HfApi
|
13 |
|
14 |
-
# Load
|
|
|
|
|
|
|
15 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
16 |
if GOOGLE_API_KEY is None:
|
17 |
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
|
18 |
|
19 |
-
|
20 |
-
hf_api = HfApi(
|
21 |
-
token= GOOGLE_API_KEY, # Token is not persisted on the machine.
|
22 |
-
)
|
23 |
-
|
24 |
app = Flask(__name__)
|
25 |
CORS(app)
|
26 |
|
27 |
-
# Initialize Gemini client
|
28 |
-
client = genai.Client(api_key=
|
29 |
|
30 |
# In-memory storage for demo (in production, use a database)
|
31 |
document_caches = {}
|
@@ -609,60 +606,93 @@ def upload_file():
|
|
609 |
if file.filename == '':
|
610 |
return jsonify({'success': False, 'error': 'No file selected'})
|
611 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
612 |
# Read file content
|
613 |
file_content = file.read()
|
|
|
|
|
|
|
614 |
file_io = io.BytesIO(file_content)
|
615 |
|
616 |
# Upload to Gemini File API
|
617 |
-
|
618 |
-
|
619 |
-
|
620 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
621 |
|
622 |
# Create cache with system instruction
|
623 |
try:
|
624 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
625 |
|
626 |
-
# Use the correct model
|
627 |
-
model = '
|
628 |
|
629 |
cache = client.caches.create(
|
630 |
model=model,
|
631 |
config=types.CreateCachedContentConfig(
|
632 |
-
display_name='
|
633 |
system_instruction=system_instruction,
|
634 |
contents=[document],
|
635 |
ttl="3600s", # 1 hour TTL
|
636 |
)
|
637 |
)
|
638 |
|
|
|
|
|
639 |
# Store cache info
|
640 |
cache_id = str(uuid.uuid4())
|
641 |
document_caches[cache_id] = {
|
642 |
'cache_name': cache.name,
|
643 |
'document_name': file.filename,
|
|
|
644 |
'created_at': datetime.now().isoformat()
|
645 |
}
|
646 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
647 |
return jsonify({
|
648 |
'success': True,
|
649 |
'cache_id': cache_id,
|
650 |
-
'token_count':
|
|
|
651 |
})
|
652 |
|
653 |
except Exception as cache_error:
|
|
|
654 |
# If caching fails due to small content, provide alternative approach
|
655 |
-
if "
|
656 |
return jsonify({
|
657 |
'success': False,
|
658 |
-
'error': 'PDF is too small for caching. Please upload a larger document
|
659 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
660 |
})
|
661 |
else:
|
662 |
-
|
663 |
|
664 |
except Exception as e:
|
665 |
-
|
|
|
666 |
|
667 |
@app.route('/upload-url', methods=['POST'])
|
668 |
def upload_from_url():
|
@@ -673,62 +703,107 @@ def upload_from_url():
|
|
673 |
if not url:
|
674 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
675 |
|
676 |
-
# Download file from URL
|
677 |
-
|
678 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
679 |
|
680 |
-
|
|
|
|
|
|
|
681 |
|
682 |
# Upload to Gemini File API
|
683 |
-
|
684 |
-
|
685 |
-
|
686 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
687 |
|
688 |
# Create cache with system instruction
|
689 |
try:
|
690 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
691 |
|
692 |
-
# Use the correct model
|
693 |
-
model = '
|
694 |
|
695 |
cache = client.caches.create(
|
696 |
model=model,
|
697 |
config=types.CreateCachedContentConfig(
|
698 |
-
display_name='
|
699 |
system_instruction=system_instruction,
|
700 |
contents=[document],
|
701 |
ttl="3600s", # 1 hour TTL
|
702 |
)
|
703 |
)
|
704 |
|
|
|
|
|
705 |
# Store cache info
|
706 |
cache_id = str(uuid.uuid4())
|
707 |
document_caches[cache_id] = {
|
708 |
'cache_name': cache.name,
|
709 |
-
'document_name':
|
|
|
|
|
710 |
'created_at': datetime.now().isoformat()
|
711 |
}
|
712 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
713 |
return jsonify({
|
714 |
'success': True,
|
715 |
'cache_id': cache_id,
|
716 |
-
'token_count':
|
|
|
717 |
})
|
718 |
|
719 |
except Exception as cache_error:
|
|
|
720 |
# If caching fails due to small content, provide alternative approach
|
721 |
-
if "
|
722 |
return jsonify({
|
723 |
'success': False,
|
724 |
-
'error': 'PDF is too small for caching. Please upload a larger document
|
725 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
726 |
})
|
727 |
else:
|
728 |
-
|
729 |
|
730 |
except Exception as e:
|
731 |
-
|
|
|
732 |
|
733 |
@app.route('/ask', methods=['POST'])
|
734 |
def ask_question():
|
@@ -741,26 +816,38 @@ def ask_question():
|
|
741 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
742 |
|
743 |
if cache_id not in document_caches:
|
744 |
-
return jsonify({'success': False, 'error': 'Cache not found'})
|
745 |
|
746 |
cache_info = document_caches[cache_id]
|
747 |
|
748 |
# Generate response using cached content with correct model format
|
749 |
-
|
750 |
-
|
751 |
-
|
752 |
-
|
753 |
-
|
|
|
|
|
754 |
)
|
755 |
-
|
756 |
-
|
757 |
-
|
758 |
-
|
759 |
-
|
760 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
761 |
|
762 |
except Exception as e:
|
763 |
-
|
|
|
764 |
|
765 |
@app.route('/caches', methods=['GET'])
|
766 |
def list_caches():
|
@@ -787,7 +874,11 @@ def delete_cache(cache_id):
|
|
787 |
cache_info = document_caches[cache_id]
|
788 |
|
789 |
# Delete from Gemini API
|
790 |
-
|
|
|
|
|
|
|
|
|
791 |
|
792 |
# Remove from local storage
|
793 |
del document_caches[cache_id]
|
@@ -797,7 +888,23 @@ def delete_cache(cache_id):
|
|
797 |
except Exception as e:
|
798 |
return jsonify({'success': False, 'error': str(e)})
|
799 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
800 |
if __name__ == '__main__':
|
801 |
import os
|
802 |
port = int(os.environ.get("PORT", 7860))
|
803 |
-
|
|
|
|
|
|
9 |
from datetime import datetime, timezone, timedelta
|
10 |
from dotenv import load_dotenv
|
11 |
import json
|
|
|
12 |
|
13 |
+
# Load environment variables
|
14 |
+
load_dotenv()
|
15 |
+
|
16 |
+
# Get Google API key from environment
|
17 |
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
18 |
if GOOGLE_API_KEY is None:
|
19 |
raise ValueError("GOOGLE_API_KEY environment variable is not set. Please set it before running the script.")
|
20 |
|
|
|
|
|
|
|
|
|
|
|
21 |
app = Flask(__name__)
|
22 |
CORS(app)
|
23 |
|
24 |
+
# Initialize Gemini client with correct API key
|
25 |
+
client = genai.Client(api_key=GOOGLE_API_KEY)
|
26 |
|
27 |
# In-memory storage for demo (in production, use a database)
|
28 |
document_caches = {}
|
|
|
606 |
if file.filename == '':
|
607 |
return jsonify({'success': False, 'error': 'No file selected'})
|
608 |
|
609 |
+
# Check file size (limit to 10MB for example)
|
610 |
+
file.seek(0, 2) # Seek to end
|
611 |
+
file_size = file.tell()
|
612 |
+
file.seek(0) # Reset to beginning
|
613 |
+
|
614 |
+
if file_size > 10 * 1024 * 1024: # 10MB limit
|
615 |
+
return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
|
616 |
+
|
617 |
# Read file content
|
618 |
file_content = file.read()
|
619 |
+
if not file_content:
|
620 |
+
return jsonify({'success': False, 'error': 'File is empty'})
|
621 |
+
|
622 |
file_io = io.BytesIO(file_content)
|
623 |
|
624 |
# Upload to Gemini File API
|
625 |
+
try:
|
626 |
+
document = client.files.upload(
|
627 |
+
file=file_io,
|
628 |
+
config=types.FileUploadConfig(
|
629 |
+
mime_type='application/pdf',
|
630 |
+
display_name=file.filename
|
631 |
+
)
|
632 |
+
)
|
633 |
+
print(f"Document uploaded successfully: {document.name}")
|
634 |
+
except Exception as upload_error:
|
635 |
+
print(f"Upload error: {upload_error}")
|
636 |
+
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
637 |
|
638 |
# Create cache with system instruction
|
639 |
try:
|
640 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
641 |
|
642 |
+
# Use the correct model name (without 'models/' prefix)
|
643 |
+
model = 'gemini-2.0-flash-001'
|
644 |
|
645 |
cache = client.caches.create(
|
646 |
model=model,
|
647 |
config=types.CreateCachedContentConfig(
|
648 |
+
display_name=f'PDF document cache - {file.filename}',
|
649 |
system_instruction=system_instruction,
|
650 |
contents=[document],
|
651 |
ttl="3600s", # 1 hour TTL
|
652 |
)
|
653 |
)
|
654 |
|
655 |
+
print(f"Cache created successfully: {cache.name}")
|
656 |
+
|
657 |
# Store cache info
|
658 |
cache_id = str(uuid.uuid4())
|
659 |
document_caches[cache_id] = {
|
660 |
'cache_name': cache.name,
|
661 |
'document_name': file.filename,
|
662 |
+
'document_file_name': document.name,
|
663 |
'created_at': datetime.now().isoformat()
|
664 |
}
|
665 |
|
666 |
+
# Get token count safely
|
667 |
+
token_count = 'Unknown'
|
668 |
+
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
669 |
+
if hasattr(cache.usage_metadata, 'total_token_count'):
|
670 |
+
token_count = cache.usage_metadata.total_token_count
|
671 |
+
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
672 |
+
token_count = cache.usage_metadata.cached_token_count
|
673 |
+
|
674 |
return jsonify({
|
675 |
'success': True,
|
676 |
'cache_id': cache_id,
|
677 |
+
'token_count': token_count,
|
678 |
+
'document_name': file.filename
|
679 |
})
|
680 |
|
681 |
except Exception as cache_error:
|
682 |
+
print(f"Cache error: {cache_error}")
|
683 |
# If caching fails due to small content, provide alternative approach
|
684 |
+
if "too small" in str(cache_error).lower():
|
685 |
return jsonify({
|
686 |
'success': False,
|
687 |
+
'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
|
688 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
689 |
})
|
690 |
else:
|
691 |
+
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
692 |
|
693 |
except Exception as e:
|
694 |
+
print(f"General error: {e}")
|
695 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
696 |
|
697 |
@app.route('/upload-url', methods=['POST'])
|
698 |
def upload_from_url():
|
|
|
703 |
if not url:
|
704 |
return jsonify({'success': False, 'error': 'No URL provided'})
|
705 |
|
706 |
+
# Download file from URL with timeout and size limits
|
707 |
+
try:
|
708 |
+
with httpx.Client(timeout=30.0) as client_http:
|
709 |
+
response = client_http.get(url)
|
710 |
+
response.raise_for_status()
|
711 |
+
|
712 |
+
# Check content type
|
713 |
+
content_type = response.headers.get('content-type', '').lower()
|
714 |
+
if 'pdf' not in content_type and not url.lower().endswith('.pdf'):
|
715 |
+
return jsonify({'success': False, 'error': 'URL does not point to a PDF file'})
|
716 |
+
|
717 |
+
# Check file size
|
718 |
+
content_length = len(response.content)
|
719 |
+
if content_length > 10 * 1024 * 1024: # 10MB limit
|
720 |
+
return jsonify({'success': False, 'error': 'File too large. Maximum size is 10MB.'})
|
721 |
+
|
722 |
+
file_io = io.BytesIO(response.content)
|
723 |
+
|
724 |
+
except httpx.TimeoutException:
|
725 |
+
return jsonify({'success': False, 'error': 'Request timeout. Please try a different URL.'})
|
726 |
+
except httpx.HTTPError as e:
|
727 |
+
return jsonify({'success': False, 'error': f'Failed to download file: {str(e)}'})
|
728 |
|
729 |
+
# Extract filename from URL
|
730 |
+
filename = url.split('/')[-1]
|
731 |
+
if not filename.endswith('.pdf'):
|
732 |
+
filename += '.pdf'
|
733 |
|
734 |
# Upload to Gemini File API
|
735 |
+
try:
|
736 |
+
document = client.files.upload(
|
737 |
+
file=file_io,
|
738 |
+
config=types.FileUploadConfig(
|
739 |
+
mime_type='application/pdf',
|
740 |
+
display_name=filename
|
741 |
+
)
|
742 |
+
)
|
743 |
+
print(f"Document uploaded successfully: {document.name}")
|
744 |
+
except Exception as upload_error:
|
745 |
+
print(f"Upload error: {upload_error}")
|
746 |
+
return jsonify({'success': False, 'error': f'Failed to upload file to Gemini: {str(upload_error)}'})
|
747 |
|
748 |
# Create cache with system instruction
|
749 |
try:
|
750 |
system_instruction = "You are an expert document analyzer. Provide detailed, accurate answers based on the uploaded document content. Always be helpful and thorough in your responses."
|
751 |
|
752 |
+
# Use the correct model name (without 'models/' prefix)
|
753 |
+
model = 'gemini-2.0-flash-001'
|
754 |
|
755 |
cache = client.caches.create(
|
756 |
model=model,
|
757 |
config=types.CreateCachedContentConfig(
|
758 |
+
display_name=f'PDF document cache - {filename}',
|
759 |
system_instruction=system_instruction,
|
760 |
contents=[document],
|
761 |
ttl="3600s", # 1 hour TTL
|
762 |
)
|
763 |
)
|
764 |
|
765 |
+
print(f"Cache created successfully: {cache.name}")
|
766 |
+
|
767 |
# Store cache info
|
768 |
cache_id = str(uuid.uuid4())
|
769 |
document_caches[cache_id] = {
|
770 |
'cache_name': cache.name,
|
771 |
+
'document_name': filename,
|
772 |
+
'document_file_name': document.name,
|
773 |
+
'source_url': url,
|
774 |
'created_at': datetime.now().isoformat()
|
775 |
}
|
776 |
|
777 |
+
# Get token count safely
|
778 |
+
token_count = 'Unknown'
|
779 |
+
if hasattr(cache, 'usage_metadata') and cache.usage_metadata:
|
780 |
+
if hasattr(cache.usage_metadata, 'total_token_count'):
|
781 |
+
token_count = cache.usage_metadata.total_token_count
|
782 |
+
elif hasattr(cache.usage_metadata, 'cached_token_count'):
|
783 |
+
token_count = cache.usage_metadata.cached_token_count
|
784 |
+
|
785 |
return jsonify({
|
786 |
'success': True,
|
787 |
'cache_id': cache_id,
|
788 |
+
'token_count': token_count,
|
789 |
+
'document_name': filename
|
790 |
})
|
791 |
|
792 |
except Exception as cache_error:
|
793 |
+
print(f"Cache error: {cache_error}")
|
794 |
# If caching fails due to small content, provide alternative approach
|
795 |
+
if "too small" in str(cache_error).lower():
|
796 |
return jsonify({
|
797 |
'success': False,
|
798 |
+
'error': 'PDF content is too small for caching. Please upload a larger document with more text content.',
|
799 |
'suggestion': 'Try uploading a longer document or combine multiple documents.'
|
800 |
})
|
801 |
else:
|
802 |
+
return jsonify({'success': False, 'error': f'Failed to create cache: {str(cache_error)}'})
|
803 |
|
804 |
except Exception as e:
|
805 |
+
print(f"General error: {e}")
|
806 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
807 |
|
808 |
@app.route('/ask', methods=['POST'])
|
809 |
def ask_question():
|
|
|
816 |
return jsonify({'success': False, 'error': 'Missing question or cache_id'})
|
817 |
|
818 |
if cache_id not in document_caches:
|
819 |
+
return jsonify({'success': False, 'error': 'Cache not found. Please upload a document first.'})
|
820 |
|
821 |
cache_info = document_caches[cache_id]
|
822 |
|
823 |
# Generate response using cached content with correct model format
|
824 |
+
try:
|
825 |
+
response = client.models.generate_content(
|
826 |
+
model='gemini-2.0-flash-001', # No 'models/' prefix here
|
827 |
+
contents=question,
|
828 |
+
config=types.GenerateContentConfig(
|
829 |
+
cached_content=cache_info['cache_name']
|
830 |
+
)
|
831 |
)
|
832 |
+
|
833 |
+
if response and response.text:
|
834 |
+
return jsonify({
|
835 |
+
'success': True,
|
836 |
+
'answer': response.text
|
837 |
+
})
|
838 |
+
else:
|
839 |
+
return jsonify({
|
840 |
+
'success': False,
|
841 |
+
'error': 'No response generated from the model'
|
842 |
+
})
|
843 |
+
|
844 |
+
except Exception as gen_error:
|
845 |
+
print(f"Generation error: {gen_error}")
|
846 |
+
return jsonify({'success': False, 'error': f'Failed to generate response: {str(gen_error)}'})
|
847 |
|
848 |
except Exception as e:
|
849 |
+
print(f"General error in ask_question: {e}")
|
850 |
+
return jsonify({'success': False, 'error': f'Server error: {str(e)}'})
|
851 |
|
852 |
@app.route('/caches', methods=['GET'])
|
853 |
def list_caches():
|
|
|
874 |
cache_info = document_caches[cache_id]
|
875 |
|
876 |
# Delete from Gemini API
|
877 |
+
try:
|
878 |
+
client.caches.delete(cache_info['cache_name'])
|
879 |
+
except Exception as delete_error:
|
880 |
+
print(f"Error deleting cache from Gemini API: {delete_error}")
|
881 |
+
# Continue to remove from local storage even if API deletion fails
|
882 |
|
883 |
# Remove from local storage
|
884 |
del document_caches[cache_id]
|
|
|
888 |
except Exception as e:
|
889 |
return jsonify({'success': False, 'error': str(e)})
|
890 |
|
891 |
+
# Health check endpoint
|
892 |
+
@app.route('/health', methods=['GET'])
|
893 |
+
def health_check():
|
894 |
+
return jsonify({'status': 'healthy', 'service': 'Smart Document Analysis Platform'})
|
895 |
+
|
896 |
+
# Error handlers
|
897 |
+
@app.errorhandler(413)
|
898 |
+
def too_large(e):
|
899 |
+
return jsonify({'success': False, 'error': 'File too large'}), 413
|
900 |
+
|
901 |
+
@app.errorhandler(500)
|
902 |
+
def internal_error(e):
|
903 |
+
return jsonify({'success': False, 'error': 'Internal server error'}), 500
|
904 |
+
|
905 |
if __name__ == '__main__':
|
906 |
import os
|
907 |
port = int(os.environ.get("PORT", 7860))
|
908 |
+
print(f"Starting server on port {port}")
|
909 |
+
print(f"Google API Key configured: {'Yes' if GOOGLE_API_KEY else 'No'}")
|
910 |
+
app.run(debug=False, host='0.0.0.0', port=port)
|