import gradio as gr import fitz # PyMuPDF import pdfplumber import pytesseract from pdf2image import convert_from_bytes from transformers import pipeline from PIL import Image import io # Load summarizer from Hugging Face (free model) summarizer = pipeline("summarization", model="Falconsai/text_summarization") # Optional: Configure Tesseract path for Windows users # pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" def extract_text_from_pdf(pdf_file): try: pdf_file.seek(0) pdf_bytes = pdf_file.read() # Step 1: Try PyMuPDF with fitz.open(stream=pdf_bytes, filetype="pdf") as doc: text = "" for page in doc: text += page.get_text() if text.strip(): return "text", text # Step 2: Try pdfplumber pdf_file.seek(0) with pdfplumber.open(io.BytesIO(pdf_bytes)) as pdf: for page in pdf.pages: text += page.extract_text() or "" if text.strip(): return "text", text # Step 3: OCR via pdf2image + pytesseract images = convert_from_bytes(pdf_bytes) ocr_text = "" for img in images: ocr_text += pytesseract.image_to_string(img) if ocr_text.strip(): return "ocr", ocr_text return "error", "โŒ Could not extract any text from PDF." except Exception as e: return "error", f"โŒ Failed to read PDF. Error: {str(e)}" def process_pdf(pdf_file): method, extracted_text = extract_text_from_pdf(pdf_file) if method == "error": return extracted_text, "Error", "Error" try: # Shorten for model input short_text = extracted_text[:1000] summary = summarizer(short_text, max_length=120, min_length=30, do_sample=False)[0]["summary_text"] explanation = ( "This summary simplifies the medical content extracted from your report. " "If there are specific medical terms or values (e.g. Hemoglobin, WBC), the app tries to interpret them. " "For full interpretation, consult a doctor." ) return extracted_text, summary.strip(), explanation except Exception as e: return extracted_text, "โŒ Summarization failed.", f"Error: {str(e)}" # Gradio UI iface = gr.Interface( fn=process_pdf, inputs=gr.File(label="Upload Medical Report (PDF)", type="file"), outputs=[ gr.Textbox(label="๐Ÿ“„ Extracted Report Text"), gr.Textbox(label="๐Ÿง  AI-Generated Summary"), gr.Textbox(label="๐Ÿ“˜ Simplified Explanation") ], title="๐Ÿงช Medical Report Reader (Free)", description=( "Upload a medical report in PDF (scanned or digital). The app will extract the text, summarize it using AI, " "and give a simplified explanation." ) ) if __name__ == "__main__": iface.launch()