Spaces:

ashhal
/

BioWhisper

Runtime error

App Files Files Community

ashhal commited on 18 days ago

Commit

d37d30b

verified ·

1 Parent(s): 127d5cd

Update app.py

Browse files

Files changed (1) hide show

app.py +70 -48

app.py CHANGED Viewed

@@ -1,67 +1,89 @@
 import gradio as gr
 import fitz  # PyMuPDF
 from transformers import pipeline
-# Load free models from Hugging Face
-summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")
-explainer = pipeline("text2text-generation", model="google/flan-t5-base")
-# Extract text from PDF
 def extract_text_from_pdf(pdf_file):
     try:
-        pdf_file.seek(0)  # 👈 Add this to rewind the file
-        with fitz.open(stream=pdf_file.read(), filetype="pdf") as doc:
             text = ""
             for page in doc:
                 text += page.get_text()
-        return text
     except Exception as e:
-        return None
-# Split text into manageable chunks
-def chunk_text(text, max_words=500):
-    words = text.split()
-    return [" ".join(words[i:i + max_words]) for i in range(0, len(words), max_words)]
-# Process uploaded PDF
-def analyze_report_from_pdf(pdf_file):
-    raw_text = extract_text_from_pdf(pdf_file)
-    if not raw_text:
-        return "❌ Failed to read PDF.", "❌ Error"
-    chunks = chunk_text(raw_text)
-    summaries = []
-    for chunk in chunks:
-        try:
-            summary = summarizer(chunk, max_length=120, min_length=30, do_sample=False)[0]['summary_text']
-            summaries.append(summary)
-        except:
-            continue
-    if not summaries:
-        return "❌ Summarization failed.", "❌ Error"
-    full_summary = " ".join(summaries)
-    explanation_prompt = f"Explain this medical summary in simple layman terms:\n\n{full_summary}"
     try:
-        explanation = explainer(explanation_prompt, max_length=200)[0]['generated_text']
-    except:
-        explanation = "❌ Explanation generation failed."
-    return full_summary, explanation
-# Gradio interface
-demo = gr.Interface(
-    fn=analyze_report_from_pdf,
-    inputs=gr.File(label="Upload Medical Report PDF"),
     outputs=[
-        gr.Textbox(label="AI-Generated Summary", lines=10),
-        gr.Textbox(label="Simplified Explanation", lines=10)
     ],
-    title="🩺 Free Medical Report Analyzer (PDF Upload)",
-    description="Upload a PDF of your medical report. The app will summarize it and explain in layman terms using free Hugging Face models."
 )
 if __name__ == "__main__":
-    demo.launch()

 import gradio as gr
 import fitz  # PyMuPDF
+import pdfplumber
+import pytesseract
+from pdf2image import convert_from_bytes
 from transformers import pipeline
+from PIL import Image
+import io
+# Load summarizer from Hugging Face (free model)
+summarizer = pipeline("summarization", model="Falconsai/text_summarization")
+# Optional: Configure Tesseract path for Windows users
+# pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
 def extract_text_from_pdf(pdf_file):
     try:
+        pdf_file.seek(0)
+        pdf_bytes = pdf_file.read()
+        # Step 1: Try PyMuPDF
+        with fitz.open(stream=pdf_bytes, filetype="pdf") as doc:
             text = ""
             for page in doc:
                 text += page.get_text()
+        if text.strip():
+            return "text", text
+        # Step 2: Try pdfplumber
+        pdf_file.seek(0)
+        with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text() or ""
+        if text.strip():
+            return "text", text
+        # Step 3: OCR via pdf2image + pytesseract
+        images = convert_from_bytes(pdf_bytes)
+        ocr_text = ""
+        for img in images:
+            ocr_text += pytesseract.image_to_string(img)
+        if ocr_text.strip():
+            return "ocr", ocr_text
+        return "error", "❌ Could not extract any text from PDF."
     except Exception as e:
+        return "error", f"❌ Failed to read PDF. Error: {str(e)}"
+def process_pdf(pdf_file):
+    method, extracted_text = extract_text_from_pdf(pdf_file)
+    if method == "error":
+        return extracted_text, "Error", "Error"
     try:
+        # Shorten for model input
+        short_text = extracted_text[:1000]
+        summary = summarizer(short_text, max_length=120, min_length=30, do_sample=False)[0]["summary_text"]
+        explanation = (
+            "This summary simplifies the medical content extracted from your report. "
+            "If there are specific medical terms or values (e.g. Hemoglobin, WBC), the app tries to interpret them. "
+            "For full interpretation, consult a doctor."
+        )
+        return extracted_text, summary.strip(), explanation
+    except Exception as e:
+        return extracted_text, "❌ Summarization failed.", f"Error: {str(e)}"
+# Gradio UI
+iface = gr.Interface(
+    fn=process_pdf,
+    inputs=gr.File(label="Upload Medical Report (PDF)", type="file"),
     outputs=[
+        gr.Textbox(label="📄 Extracted Report Text"),
+        gr.Textbox(label="🧠 AI-Generated Summary"),
+        gr.Textbox(label="📘 Simplified Explanation")
     ],
+    title="🧪 Medical Report Reader (Free)",
+    description=(
+        "Upload a medical report in PDF (scanned or digital). The app will extract the text, summarize it using AI, "
+        "and give a simplified explanation."
+    )
 )
 if __name__ == "__main__":
+    iface.launch()