fazeel007 commited on
Commit
907a3ac
·
1 Parent(s): f70a5f4

Fix Modal app: Move imports inside functions

Browse files

- Moved all package imports inside Modal functions to avoid local import errors
- Fixed hash function usage with hashlib.md5
- Fixed timestamp generation for health check
- Ready for deployment

Files changed (1) hide show
  1. modal_app/main.py +34 -19
modal_app/main.py CHANGED
@@ -3,18 +3,8 @@ KnowledgeBridge Modal App
3
  Provides distributed computing capabilities for document processing and vector search
4
  """
5
  import modal
6
- import json
7
- import numpy as np
8
  from typing import List, Dict, Any, Optional
9
  import os
10
- import requests
11
- from io import BytesIO
12
- import PyPDF2
13
- import pytesseract
14
- from PIL import Image
15
- import faiss
16
- import pickle
17
- import hashlib
18
 
19
  # Create Modal app
20
  app = modal.App("knowledgebridge-main")
@@ -50,6 +40,13 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
50
  """
51
  Extract text from documents using OCR and PDF parsing
52
  """
 
 
 
 
 
 
 
53
  results = []
54
 
55
  for doc in documents:
@@ -64,7 +61,6 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
64
  # Handle PDF content
65
  try:
66
  # Assume content is base64 encoded PDF
67
- import base64
68
  pdf_data = base64.b64decode(content)
69
  pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_data))
70
 
@@ -78,7 +74,6 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
78
  elif content_type.startswith('image/'):
79
  # Handle image content with OCR
80
  try:
81
- import base64
82
  image_data = base64.b64decode(content)
83
  image = Image.open(BytesIO(image_data))
84
  extracted_text = pytesseract.image_to_string(image)
@@ -105,8 +100,11 @@ def extract_text_from_documents(documents: List[Dict[str, Any]]) -> Dict[str, An
105
  'error': str(e)
106
  })
107
 
 
 
 
108
  return {
109
- 'task_id': f"extract_{hash(str(documents))[:8]}",
110
  'status': 'completed',
111
  'results': results,
112
  'processed_count': len(results)
@@ -123,6 +121,11 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
123
  """
124
  Build FAISS vector index from documents
125
  """
 
 
 
 
 
126
  try:
127
  from sentence_transformers import SentenceTransformer
128
 
@@ -145,8 +148,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
145
  })
146
 
147
  if not texts:
 
148
  return {
149
- 'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
150
  'status': 'failed',
151
  'error': 'No valid texts to index'
152
  }
@@ -174,8 +178,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
174
 
175
  volume.commit()
176
 
 
177
  return {
178
- 'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
179
  'status': 'completed',
180
  'index_name': index_name,
181
  'document_count': len(doc_metadata),
@@ -184,8 +189,9 @@ def build_vector_index(documents: List[Dict[str, Any]], index_name: str = "main_
184
  }
185
 
186
  except Exception as e:
 
187
  return {
188
- 'task_id': f"index_{index_name}_{hash(str(documents))[:8]}",
189
  'status': 'failed',
190
  'error': str(e)
191
  }
@@ -200,6 +206,10 @@ def vector_search(query: str, index_name: str = "main_index", max_results: int =
200
  """
201
  Perform vector search using FAISS index
202
  """
 
 
 
 
203
  try:
204
  from sentence_transformers import SentenceTransformer
205
 
@@ -270,12 +280,15 @@ def batch_process_documents(request: Dict[str, Any]) -> Dict[str, Any]:
270
  """
271
  Process multiple documents in batch
272
  """
 
 
273
  try:
274
  documents = request.get('documents', [])
275
  operations = request.get('operations', ['extract_text'])
276
 
 
277
  results = {
278
- 'task_id': f"batch_{hash(str(request))[:8]}",
279
  'status': 'completed',
280
  'operations_completed': [],
281
  'document_count': len(documents)
@@ -297,8 +310,9 @@ def batch_process_documents(request: Dict[str, Any]) -> Dict[str, Any]:
297
  return results
298
 
299
  except Exception as e:
 
300
  return {
301
- 'task_id': f"batch_{hash(str(request))[:8]}",
302
  'status': 'failed',
303
  'error': str(e)
304
  }
@@ -362,11 +376,12 @@ def web_task_status(task_id: str) -> Dict[str, Any]:
362
  @modal.web_endpoint(method="GET", label="health")
363
  def health_check() -> Dict[str, Any]:
364
  """Health check endpoint"""
 
365
  return {
366
  'status': 'healthy',
367
  'service': 'KnowledgeBridge Modal App',
368
  'version': '1.0.0',
369
- 'timestamp': str(modal.functions.current_timestamp())
370
  }
371
 
372
  if __name__ == "__main__":
 
3
  Provides distributed computing capabilities for document processing and vector search
4
  """
5
  import modal
 
 
6
  from typing import List, Dict, Any, Optional
7
  import os
 
 
 
 
 
 
 
 
8
 
9
  # Create Modal app
10
  app = modal.App("knowledgebridge-main")
 
40
  """
41
  Extract text from documents using OCR and PDF parsing
42
  """
43
+ import json
44
+ import base64
45
+ from io import BytesIO
46
+ import PyPDF2
47
+ import pytesseract
48
+ from PIL import Image
49
+
50
  results = []
51
 
52
  for doc in documents:
 
61
  # Handle PDF content
62
  try:
63
  # Assume content is base64 encoded PDF
 
64
  pdf_data = base64.b64decode(content)
65
  pdf_reader = PyPDF2.PdfReader(BytesIO(pdf_data))
66
 
 
74
  elif content_type.startswith('image/'):
75
  # Handle image content with OCR
76
  try:
 
77
  image_data = base64.b64decode(content)
78
  image = Image.open(BytesIO(image_data))
79
  extracted_text = pytesseract.image_to_string(image)
 
100
  'error': str(e)
101
  })
102
 
103
+ import hashlib
104
+ task_id = f"extract_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
105
+
106
  return {
107
+ 'task_id': task_id,
108
  'status': 'completed',
109
  'results': results,
110
  'processed_count': len(results)
 
121
  """
122
  Build FAISS vector index from documents
123
  """
124
+ import numpy as np
125
+ import faiss
126
+ import pickle
127
+ import hashlib
128
+
129
  try:
130
  from sentence_transformers import SentenceTransformer
131
 
 
148
  })
149
 
150
  if not texts:
151
+ task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
152
  return {
153
+ 'task_id': task_id,
154
  'status': 'failed',
155
  'error': 'No valid texts to index'
156
  }
 
178
 
179
  volume.commit()
180
 
181
+ task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
182
  return {
183
+ 'task_id': task_id,
184
  'status': 'completed',
185
  'index_name': index_name,
186
  'document_count': len(doc_metadata),
 
189
  }
190
 
191
  except Exception as e:
192
+ task_id = f"index_{index_name}_{hashlib.md5(str(documents).encode()).hexdigest()[:8]}"
193
  return {
194
+ 'task_id': task_id,
195
  'status': 'failed',
196
  'error': str(e)
197
  }
 
206
  """
207
  Perform vector search using FAISS index
208
  """
209
+ import numpy as np
210
+ import faiss
211
+ import pickle
212
+
213
  try:
214
  from sentence_transformers import SentenceTransformer
215
 
 
280
  """
281
  Process multiple documents in batch
282
  """
283
+ import hashlib
284
+
285
  try:
286
  documents = request.get('documents', [])
287
  operations = request.get('operations', ['extract_text'])
288
 
289
+ task_id = f"batch_{hashlib.md5(str(request).encode()).hexdigest()[:8]}"
290
  results = {
291
+ 'task_id': task_id,
292
  'status': 'completed',
293
  'operations_completed': [],
294
  'document_count': len(documents)
 
310
  return results
311
 
312
  except Exception as e:
313
+ task_id = f"batch_{hashlib.md5(str(request).encode()).hexdigest()[:8]}"
314
  return {
315
+ 'task_id': task_id,
316
  'status': 'failed',
317
  'error': str(e)
318
  }
 
376
  @modal.web_endpoint(method="GET", label="health")
377
  def health_check() -> Dict[str, Any]:
378
  """Health check endpoint"""
379
+ import datetime
380
  return {
381
  'status': 'healthy',
382
  'service': 'KnowledgeBridge Modal App',
383
  'version': '1.0.0',
384
+ 'timestamp': datetime.datetime.utcnow().isoformat()
385
  }
386
 
387
  if __name__ == "__main__":