Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,43 +1,52 @@
|
|
1 |
import gradio as gr
|
2 |
import fitz # PyMuPDF
|
3 |
from transformers import pipeline
|
|
|
4 |
|
5 |
-
#
|
6 |
-
summarizer = pipeline("summarization", model="
|
7 |
|
8 |
def extract_text_from_pdf(pdf_file):
|
9 |
-
doc = fitz.open(pdf_file.
|
10 |
-
text = ""
|
11 |
-
for page in doc:
|
12 |
-
text += page.get_text()
|
13 |
return text
|
14 |
|
|
|
|
|
|
|
|
|
15 |
def simplify_summary(summary):
|
16 |
-
#
|
17 |
-
|
|
|
18 |
|
19 |
def process_report(pdf_file):
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
32 |
demo = gr.Interface(
|
33 |
fn=process_report,
|
34 |
-
inputs=gr.File(label="Upload
|
35 |
outputs=[
|
36 |
-
gr.Textbox(label="
|
37 |
-
gr.Textbox(label="Explanation
|
38 |
],
|
39 |
-
title="
|
40 |
-
description="
|
41 |
)
|
42 |
|
43 |
-
|
|
|
|
1 |
import gradio as gr
|
2 |
import fitz # PyMuPDF
|
3 |
from transformers import pipeline
|
4 |
+
import re
|
5 |
|
6 |
+
# Use a faster and lighter summarization model
|
7 |
+
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
8 |
|
9 |
def extract_text_from_pdf(pdf_file):
|
10 |
+
doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
|
11 |
+
text = "".join(page.get_text() + "\n" for page in doc)
|
|
|
|
|
12 |
return text
|
13 |
|
14 |
+
def chunk_text(text, max_words=500):
|
15 |
+
words = text.split()
|
16 |
+
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
|
17 |
+
|
18 |
def simplify_summary(summary):
|
19 |
+
# Remove repetitive hospital info
|
20 |
+
summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
|
21 |
+
return "π©Ί In simple terms:\n" + summary.strip()
|
22 |
|
23 |
def process_report(pdf_file):
|
24 |
+
text = extract_text_from_pdf(pdf_file)
|
25 |
+
if not text.strip():
|
26 |
+
return "β Couldn't extract text from the PDF.", ""
|
27 |
+
|
28 |
+
# Remove irrelevant boilerplate
|
29 |
+
header, *rest = text.split("\n\n", 1)
|
30 |
+
core_text = rest[0] if rest else text
|
31 |
+
|
32 |
+
chunks = chunk_text(core_text, max_words=600)
|
33 |
+
summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
|
34 |
+
for chunk in chunks]
|
35 |
+
|
36 |
+
final_summary = " ".join(summaries)
|
37 |
+
simple = simplify_summary(final_summary)
|
38 |
+
return final_summary, simple
|
39 |
+
|
40 |
demo = gr.Interface(
|
41 |
fn=process_report,
|
42 |
+
inputs=gr.File(label="Upload Medical Report PDF"),
|
43 |
outputs=[
|
44 |
+
gr.Textbox(label="AI-Generated Summary", lines=8),
|
45 |
+
gr.Textbox(label="Simplified Explanation", lines=8)
|
46 |
],
|
47 |
+
title="π₯ Medical Report Summarizer",
|
48 |
+
description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings."
|
49 |
)
|
50 |
|
51 |
+
if __name__ == "__main__":
|
52 |
+
demo.launch()
|