ashhal commited on
Commit
75faa01
Β·
verified Β·
1 Parent(s): 4a91321

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -0
app.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # πŸ“¦ Installations needed locally before deploying:
2
+ # Linux: sudo apt install tesseract-ocr poppler-utils
3
+ # Windows: Install Tesseract from https://github.com/tesseract-ocr/tesseract
4
+
5
+ import gradio as gr
6
+ from pdf2image import convert_from_path
7
+ from PIL import Image
8
+ import pytesseract
9
+ import torch
10
+ from transformers import AutoTokenizer, AutoModelForCausalLM
11
+
12
+ # Load MedAlpaca Model from Hugging Face
13
+ model_name = "medalpaca/medalpaca-7b"
14
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
15
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32)
16
+ device = "cuda" if torch.cuda.is_available() else "cpu"
17
+ model.to(device)
18
+
19
+ # ========== OCR FUNCTIONS ==========
20
+
21
+ def extract_text_from_image(image):
22
+ return pytesseract.image_to_string(image)
23
+
24
+ def extract_text_from_pdf(pdf_file):
25
+ try:
26
+ images = convert_from_path(pdf_file.name)
27
+ text = ""
28
+ for page in images:
29
+ text += pytesseract.image_to_string(page) + "\n"
30
+ return text
31
+ except Exception as e:
32
+ return f"Error reading PDF: {e}"
33
+
34
+ # ========== MEDALPACA RESPONSE ==========
35
+
36
+ def generate_medical_explanation(text):
37
+ prompt = (
38
+ "You are a helpful medical assistant. Analyze the following patient's lab report text "
39
+ "and explain the abnormalities in plain, non-technical language:\n\n" + text +
40
+ "\n\nAlso, highlight abnormal values with flags."
41
+ )
42
+ inputs = tokenizer(prompt, return_tensors="pt").to(device)
43
+ outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
44
+ result = tokenizer.decode(outputs[0], skip_special_tokens=True)
45
+ return result.split(prompt)[-1].strip()
46
+
47
+ # ========== MAIN APP FUNCTION ==========
48
+
49
+ def analyze_file(file):
50
+ if not file:
51
+ return "⚠️ No file uploaded.", ""
52
+
53
+ filename = file.name.lower()
54
+ if filename.endswith(".pdf"):
55
+ extracted_text = extract_text_from_pdf(file)
56
+ else:
57
+ try:
58
+ img = Image.open(file.name)
59
+ extracted_text = extract_text_from_image(img)
60
+ except Exception as e:
61
+ return f"❌ Error loading image: {e}", ""
62
+
63
+ if not extracted_text.strip():
64
+ return "❌ No text found. Try uploading a clearer image or PDF.", ""
65
+
66
+ ai_response = generate_medical_explanation(extracted_text)
67
+ return extracted_text, ai_response
68
+
69
+ # ========== GRADIO INTERFACE ==========
70
+
71
+ gr.Interface(
72
+ fn=analyze_file,
73
+ inputs=gr.File(label="πŸ“„ Upload Lab Report (Image or PDF)"),
74
+ outputs=[
75
+ gr.Textbox(label="πŸ“œ Extracted Text", lines=20),
76
+ gr.Textbox(label="🧠 MedAlpaca Interpretation", lines=20)
77
+ ],
78
+ title="πŸ”¬ AI Lab Report Analyzer with MedAlpaca",
79
+ description="Upload your medical report (image or PDF). This app extracts text using OCR and explains lab values using the MedAlpaca model."
80
+ ).launch()