ashhal commited on
Commit
d37d30b
Β·
verified Β·
1 Parent(s): 127d5cd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +70 -48
app.py CHANGED
@@ -1,67 +1,89 @@
1
  import gradio as gr
2
  import fitz # PyMuPDF
 
 
 
3
  from transformers import pipeline
 
 
4
 
5
- # Load free models from Hugging Face
6
- summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
7
- explainer = pipeline("text2text-generation", model="google/flan-t5-base")
 
 
8
 
9
- # Extract text from PDF
10
  def extract_text_from_pdf(pdf_file):
11
  try:
12
- pdf_file.seek(0) # πŸ‘ˆ Add this to rewind the file
13
- with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
 
 
 
14
  text = ""
15
  for page in doc:
16
  text += page.get_text()
17
- return text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  except Exception as e:
19
- return None
20
-
21
- # Split text into manageable chunks
22
- def chunk_text(text, max_words=500):
23
- words = text.split()
24
- return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
25
-
26
- # Process uploaded PDF
27
- def analyze_report_from_pdf(pdf_file):
28
- raw_text = extract_text_from_pdf(pdf_file)
29
- if not raw_text:
30
- return "❌ Failed to read PDF.", "❌ Error"
31
-
32
- chunks = chunk_text(raw_text)
33
- summaries = []
34
-
35
- for chunk in chunks:
36
- try:
37
- summary = summarizer(chunk, max_length=120, min_length=30, do_sample=False)[0]['summary_text']
38
- summaries.append(summary)
39
- except:
40
- continue
41
-
42
- if not summaries:
43
- return "❌ Summarization failed.", "❌ Error"
44
-
45
- full_summary = " ".join(summaries)
46
- explanation_prompt = f"Explain this medical summary in simple layman terms:\n\n{full_summary}"
47
  try:
48
- explanation = explainer(explanation_prompt, max_length=200)[0]['generated_text']
49
- except:
50
- explanation = "❌ Explanation generation failed."
51
 
52
- return full_summary, explanation
 
 
 
 
 
 
 
 
 
53
 
54
- # Gradio interface
55
- demo = gr.Interface(
56
- fn=analyze_report_from_pdf,
57
- inputs=gr.File(label="Upload Medical Report PDF"),
58
  outputs=[
59
- gr.Textbox(label="AI-Generated Summary", lines=10),
60
- gr.Textbox(label="Simplified Explanation", lines=10)
 
61
  ],
62
- title="🩺 Free Medical Report Analyzer (PDF Upload)",
63
- description="Upload a PDF of your medical report. The app will summarize it and explain in layman terms using free Hugging Face models."
 
 
 
64
  )
65
 
66
  if __name__ == "__main__":
67
- demo.launch()
 
1
  import gradio as gr
2
  import fitz # PyMuPDF
3
+ import pdfplumber
4
+ import pytesseract
5
+ from pdf2image import convert_from_bytes
6
  from transformers import pipeline
7
+ from PIL import Image
8
+ import io
9
 
10
+ # Load summarizer from Hugging Face (free model)
11
+ summarizer = pipeline("summarization", model="Falconsai/text_summarization")
12
+
13
+ # Optional: Configure Tesseract path for Windows users
14
+ # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
15
 
 
16
  def extract_text_from_pdf(pdf_file):
17
  try:
18
+ pdf_file.seek(0)
19
+ pdf_bytes = pdf_file.read()
20
+
21
+ # Step 1: Try PyMuPDF
22
+ with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
23
  text = ""
24
  for page in doc:
25
  text += page.get_text()
26
+ if text.strip():
27
+ return "text", text
28
+
29
+ # Step 2: Try pdfplumber
30
+ pdf_file.seek(0)
31
+ with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
32
+ for page in pdf.pages:
33
+ text += page.extract_text() or ""
34
+ if text.strip():
35
+ return "text", text
36
+
37
+ # Step 3: OCR via pdf2image + pytesseract
38
+ images = convert_from_bytes(pdf_bytes)
39
+ ocr_text = ""
40
+ for img in images:
41
+ ocr_text += pytesseract.image_to_string(img)
42
+ if ocr_text.strip():
43
+ return "ocr", ocr_text
44
+
45
+ return "error", "❌ Could not extract any text from PDF."
46
+
47
  except Exception as e:
48
+ return "error", f"❌ Failed to read PDF. Error: {str(e)}"
49
+
50
+ def process_pdf(pdf_file):
51
+ method, extracted_text = extract_text_from_pdf(pdf_file)
52
+
53
+ if method == "error":
54
+ return extracted_text, "Error", "Error"
55
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  try:
57
+ # Shorten for model input
58
+ short_text = extracted_text[:1000]
59
+ summary = summarizer(short_text, max_length=120, min_length=30, do_sample=False)[0]["summary_text"]
60
 
61
+ explanation = (
62
+ "This summary simplifies the medical content extracted from your report. "
63
+ "If there are specific medical terms or values (e.g. Hemoglobin, WBC), the app tries to interpret them. "
64
+ "For full interpretation, consult a doctor."
65
+ )
66
+
67
+ return extracted_text, summary.strip(), explanation
68
+
69
+ except Exception as e:
70
+ return extracted_text, "❌ Summarization failed.", f"Error: {str(e)}"
71
 
72
+ # Gradio UI
73
+ iface = gr.Interface(
74
+ fn=process_pdf,
75
+ inputs=gr.File(label="Upload Medical Report (PDF)", type="file"),
76
  outputs=[
77
+ gr.Textbox(label="πŸ“„ Extracted Report Text"),
78
+ gr.Textbox(label="🧠 AI-Generated Summary"),
79
+ gr.Textbox(label="πŸ“˜ Simplified Explanation")
80
  ],
81
+ title="πŸ§ͺ Medical Report Reader (Free)",
82
+ description=(
83
+ "Upload a medical report in PDF (scanned or digital). The app will extract the text, summarize it using AI, "
84
+ "and give a simplified explanation."
85
+ )
86
  )
87
 
88
  if __name__ == "__main__":
89
+ iface.launch()