ashhal commited on
Commit
46b466f
Β·
verified Β·
1 Parent(s): e75c198

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -20
app.py CHANGED
@@ -3,39 +3,46 @@ import fitz # PyMuPDF
3
  from transformers import pipeline
4
  import re
5
 
6
- # Use a faster and lighter summarization model
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
  def extract_text_from_pdf(pdf_file):
10
- doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
11
- text = "".join(page.get_text() + "\n" for page in doc)
12
- return text
 
 
 
 
 
13
 
14
  def chunk_text(text, max_words=500):
15
  words = text.split()
16
  return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
17
 
18
  def simplify_summary(summary):
19
- # Remove repetitive hospital info
20
- summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
21
  return "🩺 In simple terms:\n" + summary.strip()
22
 
23
  def process_report(pdf_file):
24
- text = extract_text_from_pdf(pdf_file)
25
- if not text.strip():
26
- return "❌ Couldn't extract text from the PDF.", ""
27
 
28
- # Remove irrelevant boilerplate
29
- header, *rest = text.split("\n\n", 1)
30
- core_text = rest[0] if rest else text
 
 
 
 
 
31
 
32
- chunks = chunk_text(core_text, max_words=600)
33
- summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
34
- for chunk in chunks]
35
-
36
- final_summary = " ".join(summaries)
37
- simple = simplify_summary(final_summary)
38
- return final_summary, simple
39
 
40
  demo = gr.Interface(
41
  fn=process_report,
@@ -45,7 +52,7 @@ demo = gr.Interface(
45
  gr.Textbox(label="Simplified Explanation", lines=8)
46
  ],
47
  title="πŸ₯ Medical Report Summarizer",
48
- description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings."
49
  )
50
 
51
  if __name__ == "__main__":
 
3
  from transformers import pipeline
4
  import re
5
 
6
+ # Use faster summarization model
7
  summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
  def extract_text_from_pdf(pdf_file):
10
+ try:
11
+ doc = fitz.open(pdf_file.name) # FIXED: Use .name instead of .read()
12
+ text = ""
13
+ for page in doc:
14
+ text += page.get_text()
15
+ return text
16
+ except Exception as e:
17
+ return None
18
 
19
  def chunk_text(text, max_words=500):
20
  words = text.split()
21
  return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
22
 
23
  def simplify_summary(summary):
24
+ summary = re.sub(r"(Mayo Hospital|Lahore Hospital|Dr\.\s+\w+)+.*", "", summary, flags=re.IGNORECASE)
 
25
  return "🩺 In simple terms:\n" + summary.strip()
26
 
27
  def process_report(pdf_file):
28
+ raw_text = extract_text_from_pdf(pdf_file)
29
+ if not raw_text:
30
+ return "❌ Could not read PDF file.", ""
31
 
32
+ chunks = chunk_text(raw_text, max_words=600)
33
+ summaries = []
34
+ for chunk in chunks:
35
+ try:
36
+ result = summarizer(chunk, max_length=150, min_length=30, do_sample=False)
37
+ summaries.append(result[0]['summary_text'])
38
+ except:
39
+ continue
40
 
41
+ if not summaries:
42
+ return "❌ Summarization failed. Try a smaller or clearer PDF.", ""
43
+
44
+ full_summary = " ".join(summaries)
45
+ return full_summary, simplify_summary(full_summary)
 
 
46
 
47
  demo = gr.Interface(
48
  fn=process_report,
 
52
  gr.Textbox(label="Simplified Explanation", lines=8)
53
  ],
54
  title="πŸ₯ Medical Report Summarizer",
55
+ description="Upload a medical report PDF to get an AI summary and non-medical explanation."
56
  )
57
 
58
  if __name__ == "__main__":