Spaces:
Runtime error
Runtime error
File size: 1,840 Bytes
7a0cfef 0c0dd0e e75c198 7a0cfef e75c198 75faa01 0c0dd0e e75c198 0c0dd0e 7a0cfef e75c198 0c0dd0e e75c198 7a0cfef 0c0dd0e e75c198 7a0cfef 0c0dd0e e75c198 7a0cfef e75c198 7a0cfef e75c198 7a0cfef 6b2202e e75c198 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
import gradio as gr
import fitz # PyMuPDF
from transformers import pipeline
import re
# Use a faster and lighter summarization model
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
def extract_text_from_pdf(pdf_file):
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
text = "".join(page.get_text() + "\n" for page in doc)
return text
def chunk_text(text, max_words=500):
words = text.split()
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
def simplify_summary(summary):
# Remove repetitive hospital info
summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
return "π©Ί In simple terms:\n" + summary.strip()
def process_report(pdf_file):
text = extract_text_from_pdf(pdf_file)
if not text.strip():
return "β Couldn't extract text from the PDF.", ""
# Remove irrelevant boilerplate
header, *rest = text.split("\n\n", 1)
core_text = rest[0] if rest else text
chunks = chunk_text(core_text, max_words=600)
summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
for chunk in chunks]
final_summary = " ".join(summaries)
simple = simplify_summary(final_summary)
return final_summary, simple
demo = gr.Interface(
fn=process_report,
inputs=gr.File(label="Upload Medical Report PDF"),
outputs=[
gr.Textbox(label="AI-Generated Summary", lines=8),
gr.Textbox(label="Simplified Explanation", lines=8)
],
title="π₯ Medical Report Summarizer",
description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings."
)
if __name__ == "__main__":
demo.launch()
|