rajsecrets0 commited on
Commit
bbaef50
·
verified ·
1 Parent(s): 649e3d3

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -58
app.py CHANGED
@@ -16,12 +16,6 @@ from reportlab.lib.pagesizes import letter
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
17
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
18
  import io
19
- import pytesseract # Tesseract OCR
20
- from dotenv import load_dotenv # For .env file
21
-
22
-
23
- # Load environment variables
24
- load_dotenv()
25
 
26
  # Configure logging
27
  logging.basicConfig(
@@ -32,8 +26,8 @@ logger = logging.getLogger(__name__)
32
 
33
  # Configuration and Constants
34
  class Config:
35
- GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent"
36
- GEMINI_API_KEY = os.getenv("GEMINI_API_KEY") # Load from .env
37
  MAX_RETRIES = 3
38
  TIMEOUT = 30
39
  MAX_IMAGE_SIZE = (1600, 1600)
@@ -218,20 +212,11 @@ class DocumentProcessor:
218
  def process_document(self, image: Image.Image) -> Dict[str, Any]:
219
  try:
220
  processed_image = self.image_processor.preprocess_image(image)
221
-
222
- # Extract text using Tesseract OCR
223
- tesseract_text = pytesseract.image_to_string(processed_image)
224
-
225
- # Extract text using Gemini API
226
  image_base64 = self.encode_image(processed_image)
227
- gemini_text = self.extract_text_with_gemini(image_base64)
228
-
229
- # Combine results from Tesseract and Gemini
230
- combined_text = self.combine_text_results(tesseract_text, gemini_text)
231
 
232
  results = {
233
- "document_type": self.classify_document(combined_text),
234
- "extracted_text": combined_text,
235
  "structured_data": None
236
  }
237
 
@@ -251,29 +236,8 @@ class DocumentProcessor:
251
  image.save(buffered, format="JPEG", quality=95)
252
  return base64.b64encode(buffered.getvalue()).decode('utf-8')
253
 
254
- @staticmethod
255
- def extract_text_with_gemini(image_base64: str) -> str:
256
  prompt = """
257
- Extract all visible text from this medical document.
258
- Include:
259
- - Headers and titles
260
- - Patient information
261
- - Medical data and values
262
- - Notes and annotations
263
- - Dates and timestamps
264
- Format the output in a clear, structured manner.
265
- """
266
- response = GeminiAPI.call_api(prompt, image_base64)
267
- return response["candidates"][0]["content"]["parts"][0]["text"].strip()
268
-
269
- @staticmethod
270
- def combine_text_results(tesseract_text: str, gemini_text: str) -> str:
271
- # Combine results, prioritizing Gemini's output but adding Tesseract's output for completeness
272
- combined_text = f"Gemini Extracted Text:\n{gemini_text}\n\nTesseract Extracted Text:\n{tesseract_text}"
273
- return combined_text
274
-
275
- def classify_document(self, text: str) -> str:
276
- prompt = f"""
277
  Analyze this medical document and classify it into one of the following categories:
278
  - Lab Report
279
  - Patient Chart
@@ -282,11 +246,22 @@ class DocumentProcessor:
282
  - Medical Certificate
283
  - Other (specify)
284
  Provide only the category name.
 
 
 
285
 
286
- Document Text:
287
- {text}
 
 
 
 
 
 
 
 
288
  """
289
- response = GeminiAPI.call_api(prompt)
290
  return response["candidates"][0]["content"]["parts"][0]["text"].strip()
291
 
292
  def extract_structured_data(self, text: str) -> Dict[str, Any]:
@@ -338,9 +313,6 @@ class DocumentProcessor:
338
  self.correct_medicine_name(med) for med in structured_data.get('medications', [])
339
  ]
340
 
341
- # Improve symptoms extraction
342
- structured_data['symptoms'] = self.extract_symptoms(text)
343
-
344
  return structured_data
345
 
346
  @staticmethod
@@ -364,17 +336,6 @@ class DocumentProcessor:
364
  medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
365
  return medication
366
 
367
- @staticmethod
368
- def extract_symptoms(text: str) -> list[str]:
369
- """Extract symptoms from the text."""
370
- prompt = f"""
371
- Extract all symptoms mentioned in the following medical text. Return only a list of symptoms:
372
- {text}
373
- """
374
- response = GeminiAPI.call_api(prompt)
375
- symptoms = response["candidates"][0]["content"]["parts"][0]["text"].strip().split("\n")
376
- return [symptom.strip() for symptom in symptoms if symptom.strip()]
377
-
378
  @staticmethod
379
  def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
380
  try:
 
16
  from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
17
  from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
18
  import io
 
 
 
 
 
 
19
 
20
  # Configure logging
21
  logging.basicConfig(
 
26
 
27
  # Configuration and Constants
28
  class Config:
29
+ GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent"
30
+ GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_API_KEY")
31
  MAX_RETRIES = 3
32
  TIMEOUT = 30
33
  MAX_IMAGE_SIZE = (1600, 1600)
 
212
  def process_document(self, image: Image.Image) -> Dict[str, Any]:
213
  try:
214
  processed_image = self.image_processor.preprocess_image(image)
 
 
 
 
 
215
  image_base64 = self.encode_image(processed_image)
 
 
 
 
216
 
217
  results = {
218
+ "document_type": self.classify_document(image_base64),
219
+ "extracted_text": self.extract_text(image_base64),
220
  "structured_data": None
221
  }
222
 
 
236
  image.save(buffered, format="JPEG", quality=95)
237
  return base64.b64encode(buffered.getvalue()).decode('utf-8')
238
 
239
+ def classify_document(self, image_base64: str) -> str:
 
240
  prompt = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  Analyze this medical document and classify it into one of the following categories:
242
  - Lab Report
243
  - Patient Chart
 
246
  - Medical Certificate
247
  - Other (specify)
248
  Provide only the category name.
249
+ """
250
+ response = GeminiAPI.call_api(prompt, image_base64)
251
+ return response["candidates"][0]["content"]["parts"][0]["text"].strip()
252
 
253
+ def extract_text(self, image_base64: str) -> str:
254
+ prompt = """
255
+ Extract all visible text from this medical document.
256
+ Include:
257
+ - Headers and titles
258
+ - Patient information
259
+ - Medical data and values
260
+ - Notes and annotations
261
+ - Dates and timestamps
262
+ Format the output in a clear, structured manner.
263
  """
264
+ response = GeminiAPI.call_api(prompt, image_base64)
265
  return response["candidates"][0]["content"]["parts"][0]["text"].strip()
266
 
267
  def extract_structured_data(self, text: str) -> Dict[str, Any]:
 
313
  self.correct_medicine_name(med) for med in structured_data.get('medications', [])
314
  ]
315
 
 
 
 
316
  return structured_data
317
 
318
  @staticmethod
 
336
  medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
337
  return medication
338
 
 
 
 
 
 
 
 
 
 
 
 
339
  @staticmethod
340
  def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
341
  try: