Spaces:

ashhal
/

BioWhisper

Runtime error

App Files Files Community

ashhal commited on 19 days ago

Commit

e75c198

verified ·

1 Parent(s): 04c5a08

Update app.py

Browse files

Files changed (1) hide show

app.py +35 -26

app.py CHANGED Viewed

@@ -1,43 +1,52 @@
 import gradio as gr
 import fitz  # PyMuPDF
 from transformers import pipeline
-# Load summarization model
-summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
 def extract_text_from_pdf(pdf_file):
-    doc = fitz.open(pdf_file.name)  # Use .name instead of .read()
-    text = ""
-    for page in doc:
-        text += page.get_text()
     return text
 def simplify_summary(summary):
-    # Convert technical language to a more friendly explanation
-    return "🩺 Here's what the report says in simple words:\n\n" + summary.replace("\n", " ")
 def process_report(pdf_file):
-    try:
-        text = extract_text_from_pdf(pdf_file)
-        if len(text.strip()) == 0:
-            return "❌ Couldn't extract text from the PDF.", ""
-        summary = summarizer(text, max_length=300, min_length=60, do_sample=False)[0]["summary_text"]
-        explanation = simplify_summary(summary)
-        return summary, explanation
-    except Exception as e:
-        return f"❌ Error: {str(e)}", ""
-# Gradio Interface
 demo = gr.Interface(
     fn=process_report,
-    inputs=gr.File(label="Upload your Medical Report PDF"),
     outputs=[
-        gr.Textbox(label="Summarized Report", lines=10),
-        gr.Textbox(label="Explanation in Simple Terms", lines=10)
     ],
-    title="📋 Medical Report Analyzer",
-    description="Upload a medical report and get a simplified summary using GPT-powered summarization.",
 )
-demo.launch()

 import gradio as gr
 import fitz  # PyMuPDF
 from transformers import pipeline
+import re
+# Use a faster and lighter summarization model
+summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
 def extract_text_from_pdf(pdf_file):
+    doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
+    text = "".join(page.get_text() + "\n" for page in doc)
     return text
+def chunk_text(text, max_words=500):
+    words = text.split()
+    return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
 def simplify_summary(summary):
+    # Remove repetitive hospital info
+    summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
+    return "🩺 In simple terms:\n" + summary.strip()
 def process_report(pdf_file):
+    text = extract_text_from_pdf(pdf_file)
+    if not text.strip():
+        return "❌ Couldn't extract text from the PDF.", ""
+    # Remove irrelevant boilerplate
+    header, *rest = text.split("\n\n", 1)
+    core_text = rest[0] if rest else text
+    chunks = chunk_text(core_text, max_words=600)
+    summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
+                 for chunk in chunks]
+    final_summary = " ".join(summaries)
+    simple = simplify_summary(final_summary)
+    return final_summary, simple
 demo = gr.Interface(
     fn=process_report,
+    inputs=gr.File(label="Upload Medical Report PDF"),
     outputs=[
+        gr.Textbox(label="AI-Generated Summary", lines=8),
+        gr.Textbox(label="Simplified Explanation", lines=8)
     ],
+    title="🏥 Medical Report Summarizer",
+    description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings."
 )
+if __name__ == "__main__":
+    demo.launch()