BioWhisper / app.py
ashhal's picture
Update app.py
46b466f verified
raw
history blame
1.88 kB
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
import re
# Use faster summarization model
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
def extract_text_from_pdf(pdf_file):
try:
doc = fitz.open(pdf_file.name) # FIXED: Use .name instead of .read()
text = ""
for page in doc:
text += page.get_text()
return text
except Exception as e:
return None
def chunk_text(text, max_words=500):
words = text.split()
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
def simplify_summary(summary):
summary = re.sub(r"(Mayo Hospital|Lahore Hospital|Dr\.\s+\w+)+.*", "", summary, flags=re.IGNORECASE)
return "🩺 In simple terms:\n" + summary.strip()
def process_report(pdf_file):
raw_text = extract_text_from_pdf(pdf_file)
if not raw_text:
return "❌ Could not read PDF file.", ""
chunks = chunk_text(raw_text, max_words=600)
summaries = []
for chunk in chunks:
try:
result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
summaries.append(result[0]['summary_text'])
except:
continue
if not summaries:
return "❌ Summarization failed. Try a smaller or clearer PDF.", ""
full_summary = " ".join(summaries)
return full_summary, simplify_summary(full_summary)
demo = gr.Interface(
fn=process_report,
inputs=gr.File(label="Upload Medical Report PDF"),
outputs=[
gr.Textbox(label="AI-Generated Summary", lines=8),
gr.Textbox(label="Simplified Explanation", lines=8)
],
title="πŸ₯ Medical Report Summarizer",
description="Upload a medical report PDF to get an AI summary and non-medical explanation."
)
if __name__ == "__main__":
demo.launch()