Spaces:
Runtime error
Runtime error
File size: 2,924 Bytes
7a0cfef 0c0dd0e d37d30b 0c0dd0e d37d30b 7a0cfef d37d30b 75faa01 0c0dd0e 46b466f d37d30b 5aad2ae d37d30b 46b466f d37d30b 5aad2ae d37d30b 5aad2ae d37d30b e75c198 d37d30b 7a0cfef d37d30b 7a0cfef d37d30b 7a0cfef 6b2202e e75c198 d37d30b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import gradio as gr
import fitz # PyMuPDF
import pdfplumber
import pytesseract
from pdf2image import convert_from_bytes
from transformers import pipeline
from PIL import Image
import io
# Load summarizer from Hugging Face (free model)
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
# Optional: Configure Tesseract path for Windows users
# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
def extract_text_from_pdf(pdf_file):
try:
pdf_file.seek(0)
pdf_bytes = pdf_file.read()
# Step 1: Try PyMuPDF
with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
text = ""
for page in doc:
text += page.get_text()
if text.strip():
return "text", text
# Step 2: Try pdfplumber
pdf_file.seek(0)
with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
for page in pdf.pages:
text += page.extract_text() or ""
if text.strip():
return "text", text
# Step 3: OCR via pdf2image + pytesseract
images = convert_from_bytes(pdf_bytes)
ocr_text = ""
for img in images:
ocr_text += pytesseract.image_to_string(img)
if ocr_text.strip():
return "ocr", ocr_text
return "error", "β Could not extract any text from PDF."
except Exception as e:
return "error", f"β Failed to read PDF. Error: {str(e)}"
def process_pdf(pdf_file):
method, extracted_text = extract_text_from_pdf(pdf_file)
if method == "error":
return extracted_text, "Error", "Error"
try:
# Shorten for model input
short_text = extracted_text[:1000]
summary = summarizer(short_text, max_length=120, min_length=30, do_sample=False)[0]["summary_text"]
explanation = (
"This summary simplifies the medical content extracted from your report. "
"If there are specific medical terms or values (e.g. Hemoglobin, WBC), the app tries to interpret them. "
"For full interpretation, consult a doctor."
)
return extracted_text, summary.strip(), explanation
except Exception as e:
return extracted_text, "β Summarization failed.", f"Error: {str(e)}"
# Gradio UI
iface = gr.Interface(
fn=process_pdf,
inputs=gr.File(label="Upload Medical Report (PDF)", type="file"),
outputs=[
gr.Textbox(label="π Extracted Report Text"),
gr.Textbox(label="π§ AI-Generated Summary"),
gr.Textbox(label="π Simplified Explanation")
],
title="π§ͺ Medical Report Reader (Free)",
description=(
"Upload a medical report in PDF (scanned or digital). The app will extract the text, summarize it using AI, "
"and give a simplified explanation."
)
)
if __name__ == "__main__":
iface.launch()
|