Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,12 +16,6 @@ from reportlab.lib.pagesizes import letter
|
|
16 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
17 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
18 |
import io
|
19 |
-
import pytesseract # Tesseract OCR
|
20 |
-
from dotenv import load_dotenv # For .env file
|
21 |
-
|
22 |
-
|
23 |
-
# Load environment variables
|
24 |
-
load_dotenv()
|
25 |
|
26 |
# Configure logging
|
27 |
logging.basicConfig(
|
@@ -32,8 +26,8 @@ logger = logging.getLogger(__name__)
|
|
32 |
|
33 |
# Configuration and Constants
|
34 |
class Config:
|
35 |
-
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-
|
36 |
-
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
37 |
MAX_RETRIES = 3
|
38 |
TIMEOUT = 30
|
39 |
MAX_IMAGE_SIZE = (1600, 1600)
|
@@ -218,20 +212,11 @@ class DocumentProcessor:
|
|
218 |
def process_document(self, image: Image.Image) -> Dict[str, Any]:
|
219 |
try:
|
220 |
processed_image = self.image_processor.preprocess_image(image)
|
221 |
-
|
222 |
-
# Extract text using Tesseract OCR
|
223 |
-
tesseract_text = pytesseract.image_to_string(processed_image)
|
224 |
-
|
225 |
-
# Extract text using Gemini API
|
226 |
image_base64 = self.encode_image(processed_image)
|
227 |
-
gemini_text = self.extract_text_with_gemini(image_base64)
|
228 |
-
|
229 |
-
# Combine results from Tesseract and Gemini
|
230 |
-
combined_text = self.combine_text_results(tesseract_text, gemini_text)
|
231 |
|
232 |
results = {
|
233 |
-
"document_type": self.classify_document(
|
234 |
-
"extracted_text":
|
235 |
"structured_data": None
|
236 |
}
|
237 |
|
@@ -251,29 +236,8 @@ class DocumentProcessor:
|
|
251 |
image.save(buffered, format="JPEG", quality=95)
|
252 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
253 |
|
254 |
-
|
255 |
-
def extract_text_with_gemini(image_base64: str) -> str:
|
256 |
prompt = """
|
257 |
-
Extract all visible text from this medical document.
|
258 |
-
Include:
|
259 |
-
- Headers and titles
|
260 |
-
- Patient information
|
261 |
-
- Medical data and values
|
262 |
-
- Notes and annotations
|
263 |
-
- Dates and timestamps
|
264 |
-
Format the output in a clear, structured manner.
|
265 |
-
"""
|
266 |
-
response = GeminiAPI.call_api(prompt, image_base64)
|
267 |
-
return response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
268 |
-
|
269 |
-
@staticmethod
|
270 |
-
def combine_text_results(tesseract_text: str, gemini_text: str) -> str:
|
271 |
-
# Combine results, prioritizing Gemini's output but adding Tesseract's output for completeness
|
272 |
-
combined_text = f"Gemini Extracted Text:\n{gemini_text}\n\nTesseract Extracted Text:\n{tesseract_text}"
|
273 |
-
return combined_text
|
274 |
-
|
275 |
-
def classify_document(self, text: str) -> str:
|
276 |
-
prompt = f"""
|
277 |
Analyze this medical document and classify it into one of the following categories:
|
278 |
- Lab Report
|
279 |
- Patient Chart
|
@@ -282,11 +246,22 @@ class DocumentProcessor:
|
|
282 |
- Medical Certificate
|
283 |
- Other (specify)
|
284 |
Provide only the category name.
|
|
|
|
|
|
|
285 |
|
286 |
-
|
287 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
288 |
"""
|
289 |
-
response = GeminiAPI.call_api(prompt)
|
290 |
return response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
291 |
|
292 |
def extract_structured_data(self, text: str) -> Dict[str, Any]:
|
@@ -338,9 +313,6 @@ class DocumentProcessor:
|
|
338 |
self.correct_medicine_name(med) for med in structured_data.get('medications', [])
|
339 |
]
|
340 |
|
341 |
-
# Improve symptoms extraction
|
342 |
-
structured_data['symptoms'] = self.extract_symptoms(text)
|
343 |
-
|
344 |
return structured_data
|
345 |
|
346 |
@staticmethod
|
@@ -364,17 +336,6 @@ class DocumentProcessor:
|
|
364 |
medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
365 |
return medication
|
366 |
|
367 |
-
@staticmethod
|
368 |
-
def extract_symptoms(text: str) -> list[str]:
|
369 |
-
"""Extract symptoms from the text."""
|
370 |
-
prompt = f"""
|
371 |
-
Extract all symptoms mentioned in the following medical text. Return only a list of symptoms:
|
372 |
-
{text}
|
373 |
-
"""
|
374 |
-
response = GeminiAPI.call_api(prompt)
|
375 |
-
symptoms = response["candidates"][0]["content"]["parts"][0]["text"].strip().split("\n")
|
376 |
-
return [symptom.strip() for symptom in symptoms if symptom.strip()]
|
377 |
-
|
378 |
@staticmethod
|
379 |
def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
|
380 |
try:
|
|
|
16 |
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
|
17 |
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
|
18 |
import io
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
# Configure logging
|
21 |
logging.basicConfig(
|
|
|
26 |
|
27 |
# Configuration and Constants
|
28 |
class Config:
|
29 |
+
GEMINI_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-pro-vision:generateContent"
|
30 |
+
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY", "YOUR_API_KEY")
|
31 |
MAX_RETRIES = 3
|
32 |
TIMEOUT = 30
|
33 |
MAX_IMAGE_SIZE = (1600, 1600)
|
|
|
212 |
def process_document(self, image: Image.Image) -> Dict[str, Any]:
|
213 |
try:
|
214 |
processed_image = self.image_processor.preprocess_image(image)
|
|
|
|
|
|
|
|
|
|
|
215 |
image_base64 = self.encode_image(processed_image)
|
|
|
|
|
|
|
|
|
216 |
|
217 |
results = {
|
218 |
+
"document_type": self.classify_document(image_base64),
|
219 |
+
"extracted_text": self.extract_text(image_base64),
|
220 |
"structured_data": None
|
221 |
}
|
222 |
|
|
|
236 |
image.save(buffered, format="JPEG", quality=95)
|
237 |
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
238 |
|
239 |
+
def classify_document(self, image_base64: str) -> str:
|
|
|
240 |
prompt = """
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
Analyze this medical document and classify it into one of the following categories:
|
242 |
- Lab Report
|
243 |
- Patient Chart
|
|
|
246 |
- Medical Certificate
|
247 |
- Other (specify)
|
248 |
Provide only the category name.
|
249 |
+
"""
|
250 |
+
response = GeminiAPI.call_api(prompt, image_base64)
|
251 |
+
return response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
252 |
|
253 |
+
def extract_text(self, image_base64: str) -> str:
|
254 |
+
prompt = """
|
255 |
+
Extract all visible text from this medical document.
|
256 |
+
Include:
|
257 |
+
- Headers and titles
|
258 |
+
- Patient information
|
259 |
+
- Medical data and values
|
260 |
+
- Notes and annotations
|
261 |
+
- Dates and timestamps
|
262 |
+
Format the output in a clear, structured manner.
|
263 |
"""
|
264 |
+
response = GeminiAPI.call_api(prompt, image_base64)
|
265 |
return response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
266 |
|
267 |
def extract_structured_data(self, text: str) -> Dict[str, Any]:
|
|
|
313 |
self.correct_medicine_name(med) for med in structured_data.get('medications', [])
|
314 |
]
|
315 |
|
|
|
|
|
|
|
316 |
return structured_data
|
317 |
|
318 |
@staticmethod
|
|
|
336 |
medication['name'] = response["candidates"][0]["content"]["parts"][0]["text"].strip()
|
337 |
return medication
|
338 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
339 |
@staticmethod
|
340 |
def parse_json_response(response: Dict[str, Any]) -> Dict[str, Any]:
|
341 |
try:
|