Spaces:

Agents-MCP-Hackathon
/

KnowledgeBridge

Sleeping

fazeel007 commited on Jun 10

Commit

907a3ac

1 Parent(s): f70a5f4

Fix Modal app: Move imports inside functions

- Moved all package imports inside Modal functions to avoid local import errors
- Fixed hash function usage with hashlib.md5
- Fixed timestamp generation for health check
- Ready for deployment

Files changed (1) hide show

modal_app/main.py +34 -19

modal_app/main.py CHANGED Viewed

@@ -3,18 +3,8 @@ KnowledgeBridge Modal App
 Provides distributed computing capabilities for document processing and vector search
 """
 import modal
-import json
-import numpy as np
 from typing import List, Dict, Any, Optional
 import os
-import requests
-from io import BytesIO
-import PyPDF2
-import pytesseract
-from PIL import Image
-import faiss
-import pickle
-import hashlib
 # Create Modal app
 app = modal.App("knowledgebridge-main")
@@ -50,6 +40,13 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
     """
     Extract text from documents using OCR and PDF parsing
     """
     results = []
     for doc in documents:
@@ -64,7 +61,6 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
                 # Handle PDF content
                 try:
                     # Assume content is base64 encoded PDF
-                    import base64
                     pdf_data = base64.b64decode(content)
                     pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_data))
@@ -78,7 +74,6 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
             elif content_type.startswith('image/'):
                 # Handle image content with OCR
                 try:
-                    import base64
                     image_data = base64.b64decode(content)
                     image = Image.open(BytesIO(image_data))
                     extracted_text = pytesseract.image_to_string(image)
@@ -105,8 +100,11 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
                 'error': str(e)
             })
     return {
-        'task_id': f"extract_{hash(str(documents))[:8]}",
         'status': 'completed',
         'results': results,
         'processed_count': len(results)
@@ -123,6 +121,11 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
     """
     Build FAISS vector index from documents
     """
     try:
         from sentence_transformers import SentenceTransformer
@@ -145,8 +148,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
                 })
         if not texts:
             return {
-                'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
                 'status': 'failed',
                 'error': 'No valid texts to index'
             }
@@ -174,8 +178,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
         volume.commit()
         return {
-            'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
             'status': 'completed',
             'index_name': index_name,
             'document_count': len(doc_metadata),
@@ -184,8 +189,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
         }
     except Exception as e:
         return {
-            'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
             'status': 'failed',
             'error': str(e)
         }
@@ -200,6 +206,10 @@ def vector_search(query: str, index_name: str = "main_index", max_results: int =
     """
     Perform vector search using FAISS index
     """
     try:
         from sentence_transformers import SentenceTransformer
@@ -270,12 +280,15 @@ def batch_process_documents(request: Dict[str, Any]) -> Dict[str, Any]:
     """
     Process multiple documents in batch
     """
     try:
         documents = request.get('documents', [])
         operations = request.get('operations', ['extract_text'])
         results = {
-            'task_id': f"batch_{hash(str(request))[:8]}",
             'status': 'completed',
             'operations_completed': [],
             'document_count': len(documents)
@@ -297,8 +310,9 @@ def batch_process_documents(request: Dict[str, Any]) -> Dict[str, Any]:
         return results
     except Exception as e:
         return {
-            'task_id': f"batch_{hash(str(request))[:8]}",
             'status': 'failed',
             'error': str(e)
         }
@@ -362,11 +376,12 @@ def web_task_status(task_id: str) -> Dict[str, Any]:
 @modal.web_endpoint(method="GET", label="health")
 def health_check() -> Dict[str, Any]:
     """Health check endpoint"""
     return {
         'status': 'healthy',
         'service': 'KnowledgeBridge Modal App',
         'version': '1.0.0',
-        'timestamp': str(modal.functions.current_timestamp())
     }
 if __name__ == "__main__":

 Provides distributed computing capabilities for document processing and vector search
 """
 import modal
 from typing import List, Dict, Any, Optional
 import os
 # Create Modal app
 app = modal.App("knowledgebridge-main")
     """
     Extract text from documents using OCR and PDF parsing
     """
+    import json
+    import base64
+    from io import BytesIO
+    import PyPDF2
+    import pytesseract
+    from PIL import Image
     results = []
     for doc in documents:
                 # Handle PDF content
                 try:
                     # Assume content is base64 encoded PDF
                     pdf_data = base64.b64decode(content)
                     pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_data))
             elif content_type.startswith('image/'):
                 # Handle image content with OCR
                 try:
                     image_data = base64.b64decode(content)
                     image = Image.open(BytesIO(image_data))
                     extracted_text = pytesseract.image_to_string(image)
                 'error': str(e)
             })
+    import hashlib
+    task_id = f"extract_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
     return {
+        'task_id': task_id,
         'status': 'completed',
         'results': results,
         'processed_count': len(results)
     """
     Build FAISS vector index from documents
     """
+    import numpy as np
+    import faiss
+    import pickle
+    import hashlib
     try:
         from sentence_transformers import SentenceTransformer
                 })
         if not texts:
+            task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
             return {
+                'task_id': task_id,
                 'status': 'failed',
                 'error': 'No valid texts to index'
             }
         volume.commit()
+        task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
         return {
+            'task_id': task_id,
             'status': 'completed',
             'index_name': index_name,
             'document_count': len(doc_metadata),
         }
     except Exception as e:
+        task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
         return {
+            'task_id': task_id,
             'status': 'failed',
             'error': str(e)
         }
     """
     Perform vector search using FAISS index
     """
+    import numpy as np
+    import faiss
+    import pickle
     try:
         from sentence_transformers import SentenceTransformer
     """
     Process multiple documents in batch
     """
+    import hashlib
     try:
         documents = request.get('documents', [])
         operations = request.get('operations', ['extract_text'])
+        task_id = f"batch_{hashlib.md5(str(request).encode()).hexdigest()[:8]}"
         results = {
+            'task_id': task_id,
             'status': 'completed',
             'operations_completed': [],
             'document_count': len(documents)
         return results
     except Exception as e:
+        task_id = f"batch_{hashlib.md5(str(request).encode()).hexdigest()[:8]}"
         return {
+            'task_id': task_id,
             'status': 'failed',
             'error': str(e)
         }
 @modal.web_endpoint(method="GET", label="health")
 def health_check() -> Dict[str, Any]:
     """Health check endpoint"""
+    import datetime
     return {
         'status': 'healthy',
         'service': 'KnowledgeBridge Modal App',
         'version': '1.0.0',
+        'timestamp': datetime.datetime.utcnow().isoformat()
     }
 if __name__ == "__main__":