Spaces:

rijdev
/

OCR

Sleeping

OCR

File size: 2,090 Bytes

65a8134
2a07d13
 
 
 
 
df43f0a
2a07d13
 
 
df43f0a
65a8134
 
2a07d13
65a8134
2a07d13
65a8134
 
 
 
df43f0a
 
2a07d13
 
65a8134
 
 
df43f0a
65a8134
 
 
 
 
 
df43f0a
 
 
65a8134
 
2a07d13
df43f0a
65a8134
df43f0a
65a8134
 
df43f0a
2a07d13
65a8134
 
 
df43f0a
65a8134
df43f0a
 
65a8134
2a07d13
 
df43f0a

import gradio as gr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from PIL import Image
import os
import re

# Load Hugging Face OCR model
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

# Folder to store extracted patient records
PATIENT_RECORDS_DIR = "records"
os.makedirs(PATIENT_RECORDS_DIR, exist_ok=True)

# Extract patient name from filename
def extract_patient_name(file_name):
    match = re.match(r"([A-Za-z]+[A-Za-z]*)_.*\.(jpg|jpeg|png)$", file_name)
    return match.group(1) if match else None

# OCR logic
def perform_ocr(image_path):
    image = Image.open(image_path).convert("RGB")
    pixel_values = processor(images=image, return_tensors="pt").pixel_values
    generated_ids = model.generate(pixel_values)
    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0].strip()
    return text

# Save to patient record
def save_record(patient_name, ocr_text):
    file_path = os.path.join(PATIENT_RECORDS_DIR, f"{patient_name}_records.txt")
    with open(file_path, "a") as f:
        f.write("\n\n===== New Lab Result =====\n")
        f.write(ocr_text)

# Main Gradio handler
def process_lab_result(image_path):
    file_name = os.path.basename(image_path)
    patient_name = extract_patient_name(file_name)

    if not patient_name:
        return "❌ Cannot extract patient name from filename. Use format: JuanDelaCruz_2025-06-13.jpg"

    ocr_text = perform_ocr(image_path)
    save_record(patient_name, ocr_text)

    return f"✅ OCR completed. Lab result saved for `{patient_name}`.\n\n📄 Extracted Text:\n\n{ocr_text}"

# Gradio interface
iface = gr.Interface(
    fn=process_lab_result,
    inputs=gr.File(label="Upload Lab Result (.jpg/.png)", type="filepath"),
    outputs="text",
    title="🩺 Lab Result OCR",
    description="Upload a lab result image named like `JuanDelaCruz_2025-06-13.jpg`. The text will be extracted and saved to the patient's record."
)

if __name__ == "__main__":
    iface.launch()