Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -3,39 +3,46 @@ import fitz # PyMuPDF
|
|
3 |
from transformers import pipeline
|
4 |
import re
|
5 |
|
6 |
-
# Use
|
7 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
8 |
|
9 |
def extract_text_from_pdf(pdf_file):
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
def chunk_text(text, max_words=500):
|
15 |
words = text.split()
|
16 |
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
|
17 |
|
18 |
def simplify_summary(summary):
|
19 |
-
|
20 |
-
summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
|
21 |
return "π©Ί In simple terms:\n" + summary.strip()
|
22 |
|
23 |
def process_report(pdf_file):
|
24 |
-
|
25 |
-
if not
|
26 |
-
return "β
|
27 |
|
28 |
-
|
29 |
-
|
30 |
-
|
|
|
|
|
|
|
|
|
|
|
31 |
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
simple = simplify_summary(final_summary)
|
38 |
-
return final_summary, simple
|
39 |
|
40 |
demo = gr.Interface(
|
41 |
fn=process_report,
|
@@ -45,7 +52,7 @@ demo = gr.Interface(
|
|
45 |
gr.Textbox(label="Simplified Explanation", lines=8)
|
46 |
],
|
47 |
title="π₯ Medical Report Summarizer",
|
48 |
-
description="
|
49 |
)
|
50 |
|
51 |
if __name__ == "__main__":
|
|
|
3 |
from transformers import pipeline
|
4 |
import re
|
5 |
|
6 |
+
# Use faster summarization model
|
7 |
summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
|
8 |
|
9 |
def extract_text_from_pdf(pdf_file):
|
10 |
+
try:
|
11 |
+
doc = fitz.open(pdf_file.name) # FIXED: Use .name instead of .read()
|
12 |
+
text = ""
|
13 |
+
for page in doc:
|
14 |
+
text += page.get_text()
|
15 |
+
return text
|
16 |
+
except Exception as e:
|
17 |
+
return None
|
18 |
|
19 |
def chunk_text(text, max_words=500):
|
20 |
words = text.split()
|
21 |
return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
|
22 |
|
23 |
def simplify_summary(summary):
|
24 |
+
summary = re.sub(r"(Mayo Hospital|Lahore Hospital|Dr\.\s+\w+)+.*", "", summary, flags=re.IGNORECASE)
|
|
|
25 |
return "π©Ί In simple terms:\n" + summary.strip()
|
26 |
|
27 |
def process_report(pdf_file):
|
28 |
+
raw_text = extract_text_from_pdf(pdf_file)
|
29 |
+
if not raw_text:
|
30 |
+
return "β Could not read PDF file.", ""
|
31 |
|
32 |
+
chunks = chunk_text(raw_text, max_words=600)
|
33 |
+
summaries = []
|
34 |
+
for chunk in chunks:
|
35 |
+
try:
|
36 |
+
result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
|
37 |
+
summaries.append(result[0]['summary_text'])
|
38 |
+
except:
|
39 |
+
continue
|
40 |
|
41 |
+
if not summaries:
|
42 |
+
return "β Summarization failed. Try a smaller or clearer PDF.", ""
|
43 |
+
|
44 |
+
full_summary = " ".join(summaries)
|
45 |
+
return full_summary, simplify_summary(full_summary)
|
|
|
|
|
46 |
|
47 |
demo = gr.Interface(
|
48 |
fn=process_report,
|
|
|
52 |
gr.Textbox(label="Simplified Explanation", lines=8)
|
53 |
],
|
54 |
title="π₯ Medical Report Summarizer",
|
55 |
+
description="Upload a medical report PDF to get an AI summary and non-medical explanation."
|
56 |
)
|
57 |
|
58 |
if __name__ == "__main__":
|