Spaces:

ashhal
/

BioWhisper

Runtime error

App Files Files Community

BioWhisper / app.py

ashhal

Update app.py

d37d30b verified 18 days ago

raw

history blame contribute delete

2.92 kB

	import gradio as gr
	import fitz # PyMuPDF
	import pdfplumber
	import pytesseract
	from pdf2image import convert_from_bytes
	from transformers import pipeline
	from PIL import Image
	import io

	# Load summarizer from Hugging Face (free model)
	summarizer = pipeline("summarization", model="Falconsai/text_summarization")

	# Optional: Configure Tesseract path for Windows users
	# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"

	def extract_text_from_pdf(pdf_file):
	try:
	pdf_file.seek(0)
	pdf_bytes = pdf_file.read()

	# Step 1: Try PyMuPDF
	with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
	text = ""
	for page in doc:
	text += page.get_text()
	if text.strip():
	return "text", text

	# Step 2: Try pdfplumber
	pdf_file.seek(0)
	with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
	for page in pdf.pages:
	text += page.extract_text() or ""
	if text.strip():
	return "text", text

	# Step 3: OCR via pdf2image + pytesseract
	images = convert_from_bytes(pdf_bytes)
	ocr_text = ""
	for img in images:
	ocr_text += pytesseract.image_to_string(img)
	if ocr_text.strip():
	return "ocr", ocr_text

	return "error", "❌ Could not extract any text from PDF."

	except Exception as e:
	return "error", f"❌ Failed to read PDF. Error: {str(e)}"

	def process_pdf(pdf_file):
	method, extracted_text = extract_text_from_pdf(pdf_file)

	if method == "error":
	return extracted_text, "Error", "Error"

	try:
	# Shorten for model input
	short_text = extracted_text[:1000]
	summary = summarizer(short_text, max_length=120, min_length=30, do_sample=False)[0]["summary_text"]

	explanation = (
	"This summary simplifies the medical content extracted from your report. "
	"If there are specific medical terms or values (e.g. Hemoglobin, WBC), the app tries to interpret them. "
	"For full interpretation, consult a doctor."
	)

	return extracted_text, summary.strip(), explanation

	except Exception as e:
	return extracted_text, "❌ Summarization failed.", f"Error: {str(e)}"

	# Gradio UI
	iface = gr.Interface(
	fn=process_pdf,
	inputs=gr.File(label="Upload Medical Report (PDF)", type="file"),
	outputs=[
	gr.Textbox(label="📄 Extracted Report Text"),
	gr.Textbox(label="🧠 AI-Generated Summary"),
	gr.Textbox(label="📘 Simplified Explanation")
	],
	title="🧪 Medical Report Reader (Free)",
	description=(
	"Upload a medical report in PDF (scanned or digital). The app will extract the text, summarize it using AI, "
	"and give a simplified explanation."
	)
	)

	if __name__ == "__main__":
	iface.launch()