Spaces:

rajsecrets0
/

MedDocDigitizer

Sleeping

App Files Files Community

rajsecrets0 commited on Jan 11

Commit

bbaef50

verified ·

1 Parent(s): 649e3d3

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -58

app.py CHANGED Viewed

@@ -16,12 +16,6 @@ from reportlab.lib.pagesizes import letter
 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 import io
-import pytesseract  # Tesseract OCR
-from dotenv import load_dotenv  # For .env file
-# Load environment variables
-load_dotenv()
 # Configure logging
 logging.basicConfig(
@@ -32,8 +26,8 @@ logger = logging.getLogger(__name__)
 # Configuration and Constants
 class Config:
-    GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash-exp:generateContent"
-    GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")  # Load from .env
     MAX_RETRIES = 3
     TIMEOUT = 30
     MAX_IMAGE_SIZE = (1600, 1600)
@@ -218,20 +212,11 @@ class DocumentProcessor:
     def process_document(self, image: Image.Image) -> Dict[str, Any]:
         try:
             processed_image = self.image_processor.preprocess_image(image)
-            # Extract text using Tesseract OCR
-            tesseract_text = pytesseract.image_to_string(processed_image)
-            # Extract text using Gemini API
             image_base64 = self.encode_image(processed_image)
-            gemini_text = self.extract_text_with_gemini(image_base64)
-            # Combine results from Tesseract and Gemini
-            combined_text = self.combine_text_results(tesseract_text, gemini_text)
             results = {
-                "document_type": self.classify_document(combined_text),
-                "extracted_text": combined_text,
                 "structured_data": None
             }
@@ -251,29 +236,8 @@ class DocumentProcessor:
         image.save(buffered, format="JPEG", quality=95)
         return base64.b64encode(buffered.getvalue()).decode('utf-8')
-    @staticmethod
-    def extract_text_with_gemini(image_base64: str) -> str:
         prompt = """
-        Extract all visible text from this medical document.
-        Include:
-        - Headers and titles
-        - Patient information
-        - Medical data and values
-        - Notes and annotations
-        - Dates and timestamps
-        Format the output in a clear, structured manner.
-        """
-        response = GeminiAPI.call_api(prompt, image_base64)
-        return response["candidates"][0]["content"]["parts"][0]["text"].strip()
-    @staticmethod
-    def combine_text_results(tesseract_text: str, gemini_text: str) -> str:
-        # Combine results, prioritizing Gemini's output but adding Tesseract's output for completeness
-        combined_text = f"Gemini Extracted Text:\n{gemini_text}\n\nTesseract Extracted Text:\n{tesseract_text}"
-        return combined_text
-    def classify_document(self, text: str) -> str:
-        prompt = f"""
         Analyze this medical document and classify it into one of the following categories:
         - Lab Report
         - Patient Chart
@@ -282,11 +246,22 @@ class DocumentProcessor:
         - Medical Certificate
         - Other (specify)
         Provide only the category name.
-        Document Text:
-        {text}
         """
-        response = GeminiAPI.call_api(prompt)
         return response["candidates"][0]["content"]["parts"][0]["text"].strip()
     def extract_structured_data(self, text: str) -> Dict[str, Any]:
@@ -338,9 +313,6 @@ class DocumentProcessor:
             self.correct_medicine_name(med) for med in structured_data.get('medications', [])
         ]
-        # Improve symptoms extraction
-        structured_data['symptoms'] = self.extract_symptoms(text)
         return structured_data
     @staticmethod
@@ -364,17 +336,6 @@ class DocumentProcessor:
         medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
         return medication
-    @staticmethod
-    def extract_symptoms(text: str) -> list[str]:
-        """Extract symptoms from the text."""
-        prompt = f"""
-        Extract all symptoms mentioned in the following medical text. Return only a list of symptoms:
-        {text}
-        """
-        response = GeminiAPI.call_api(prompt)
-        symptoms = response["candidates"][0]["content"]["parts"][0]["text"].strip().split("\n")
-        return [symptom.strip() for symptom in symptoms if symptom.strip()]
     @staticmethod
     def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
         try:

 from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
 from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
 import io
 # Configure logging
 logging.basicConfig(
 # Configuration and Constants
 class Config:
+    GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent"
+    GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_API_KEY")
     MAX_RETRIES = 3
     TIMEOUT = 30
     MAX_IMAGE_SIZE = (1600, 1600)
     def process_document(self, image: Image.Image) -> Dict[str, Any]:
         try:
             processed_image = self.image_processor.preprocess_image(image)
             image_base64 = self.encode_image(processed_image)
             results = {
+                "document_type": self.classify_document(image_base64),
+                "extracted_text": self.extract_text(image_base64),
                 "structured_data": None
             }
         image.save(buffered, format="JPEG", quality=95)
         return base64.b64encode(buffered.getvalue()).decode('utf-8')
+    def classify_document(self, image_base64: str) -> str:
         prompt = """
         Analyze this medical document and classify it into one of the following categories:
         - Lab Report
         - Patient Chart
         - Medical Certificate
         - Other (specify)
         Provide only the category name.
+        """
+        response = GeminiAPI.call_api(prompt, image_base64)
+        return response["candidates"][0]["content"]["parts"][0]["text"].strip()
+    def extract_text(self, image_base64: str) -> str:
+        prompt = """
+        Extract all visible text from this medical document.
+        Include:
+        - Headers and titles
+        - Patient information
+        - Medical data and values
+        - Notes and annotations
+        - Dates and timestamps
+        Format the output in a clear, structured manner.
         """
+        response = GeminiAPI.call_api(prompt, image_base64)
         return response["candidates"][0]["content"]["parts"][0]["text"].strip()
     def extract_structured_data(self, text: str) -> Dict[str, Any]:
             self.correct_medicine_name(med) for med in structured_data.get('medications', [])
         ]
         return structured_data
     @staticmethod
         medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
         return medication
     @staticmethod
     def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
         try: