Spaces:
Runtime error
Runtime error
import gradio as gr | |
import fitz # PyMuPDF | |
from transformers import pipeline | |
import re | |
# Use a faster and lighter summarization model | |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6") | |
def extract_text_from_pdf(pdf_file): | |
doc = fitz.open(stream=pdf_file.read(), filetype="pdf") | |
text = "".join(page.get_text() + "\n" for page in doc) | |
return text | |
def chunk_text(text, max_words=500): | |
words = text.split() | |
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)] | |
def simplify_summary(summary): | |
# Remove repetitive hospital info | |
summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE) | |
return "π©Ί In simple terms:\n" + summary.strip() | |
def process_report(pdf_file): | |
text = extract_text_from_pdf(pdf_file) | |
if not text.strip(): | |
return "β Couldn't extract text from the PDF.", "" | |
# Remove irrelevant boilerplate | |
header, *rest = text.split("\n\n", 1) | |
core_text = rest[0] if rest else text | |
chunks = chunk_text(core_text, max_words=600) | |
summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text'] | |
for chunk in chunks] | |
final_summary = " ".join(summaries) | |
simple = simplify_summary(final_summary) | |
return final_summary, simple | |
demo = gr.Interface( | |
fn=process_report, | |
inputs=gr.File(label="Upload Medical Report PDF"), | |
outputs=[ | |
gr.Textbox(label="AI-Generated Summary", lines=8), | |
gr.Textbox(label="Simplified Explanation", lines=8) | |
], | |
title="π₯ Medical Report Summarizer", | |
description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings." | |
) | |
if __name__ == "__main__": | |
demo.launch() | |