ashhal commited on
Commit
e75c198
Β·
verified Β·
1 Parent(s): 04c5a08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -26
app.py CHANGED
@@ -1,43 +1,52 @@
1
  import gradio as gr
2
  import fitz # PyMuPDF
3
  from transformers import pipeline
 
4
 
5
- # Load summarization model
6
- summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
7
 
8
  def extract_text_from_pdf(pdf_file):
9
- doc = fitz.open(pdf_file.name) # Use .name instead of .read()
10
- text = ""
11
- for page in doc:
12
- text += page.get_text()
13
  return text
14
 
 
 
 
 
15
  def simplify_summary(summary):
16
- # Convert technical language to a more friendly explanation
17
- return "🩺 Here's what the report says in simple words:\n\n" + summary.replace("\n", " ")
 
18
 
19
  def process_report(pdf_file):
20
- try:
21
- text = extract_text_from_pdf(pdf_file)
22
- if len(text.strip()) == 0:
23
- return "❌ Couldn't extract text from the PDF.", ""
24
-
25
- summary = summarizer(text, max_length=300, min_length=60, do_sample=False)[0]["summary_text"]
26
- explanation = simplify_summary(summary)
27
- return summary, explanation
28
- except Exception as e:
29
- return f"❌ Error: {str(e)}", ""
30
-
31
- # Gradio Interface
 
 
 
 
32
  demo = gr.Interface(
33
  fn=process_report,
34
- inputs=gr.File(label="Upload your Medical Report PDF"),
35
  outputs=[
36
- gr.Textbox(label="Summarized Report", lines=10),
37
- gr.Textbox(label="Explanation in Simple Terms", lines=10)
38
  ],
39
- title="πŸ“‹ Medical Report Analyzer",
40
- description="Upload a medical report and get a simplified summary using GPT-powered summarization.",
41
  )
42
 
43
- demo.launch()
 
 
1
  import gradio as gr
2
  import fitz # PyMuPDF
3
  from transformers import pipeline
4
+ import re
5
 
6
+ # Use a faster and lighter summarization model
7
+ summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
8
 
9
  def extract_text_from_pdf(pdf_file):
10
+ doc = fitz.open(stream=pdf_file.read(), filetype="pdf")
11
+ text = "".join(page.get_text() + "\n" for page in doc)
 
 
12
  return text
13
 
14
+ def chunk_text(text, max_words=500):
15
+ words = text.split()
16
+ return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
17
+
18
  def simplify_summary(summary):
19
+ # Remove repetitive hospital info
20
+ summary = re.sub(r"\b(?:Mayo Hospital|Lahore Hospital|submitted by Dr\.).+\n?", "", summary, flags=re.IGNORECASE)
21
+ return "🩺 In simple terms:\n" + summary.strip()
22
 
23
  def process_report(pdf_file):
24
+ text = extract_text_from_pdf(pdf_file)
25
+ if not text.strip():
26
+ return "❌ Couldn't extract text from the PDF.", ""
27
+
28
+ # Remove irrelevant boilerplate
29
+ header, *rest = text.split("\n\n", 1)
30
+ core_text = rest[0] if rest else text
31
+
32
+ chunks = chunk_text(core_text, max_words=600)
33
+ summaries = [summarizer(chunk, max_length=150, min_length=30, do_sample=False)[0]['summary_text']
34
+ for chunk in chunks]
35
+
36
+ final_summary = " ".join(summaries)
37
+ simple = simplify_summary(final_summary)
38
+ return final_summary, simple
39
+
40
  demo = gr.Interface(
41
  fn=process_report,
42
+ inputs=gr.File(label="Upload Medical Report PDF"),
43
  outputs=[
44
+ gr.Textbox(label="AI-Generated Summary", lines=8),
45
+ gr.Textbox(label="Simplified Explanation", lines=8)
46
  ],
47
+ title="πŸ₯ Medical Report Summarizer",
48
+ description="Speeds up summarization by chunking text & uses a lighter distil-BART model, focusing on core medical findings."
49
  )
50
 
51
+ if __name__ == "__main__":
52
+ demo.launch()