|
from transformers import TrOCRProcessor, VisionEncoderDecoderModel |
|
from PIL import Image |
|
import os |
|
import re |
|
|
|
|
|
processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") |
|
model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1") |
|
|
|
|
|
PATIENT_RECORDS_DIR = "records/" |
|
|
|
|
|
def extract_patient_name(file_name): |
|
match = re.match(r"([A-Za-z]+[A-Za-z]*)_.*\.(jpg|png|jpeg|pdf)$", file_name) |
|
if match: |
|
return match.group(1) |
|
return None |
|
|
|
|
|
def extract_text_from_image(image_path): |
|
image = Image.open(image_path).convert("RGB") |
|
pixel_values = processor(images=image, return_tensors="pt").pixel_values |
|
generated_ids = model.generate(pixel_values) |
|
generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0] |
|
return generated_text.strip() |
|
|
|
|
|
def save_to_patient_record(patient_name, text): |
|
os.makedirs(PATIENT_RECORDS_DIR, exist_ok=True) |
|
filepath = os.path.join(PATIENT_RECORDS_DIR, f"{patient_name}_records.txt") |
|
with open(filepath, "a") as file: |
|
file.write("\n\n===== New Upload =====\n") |
|
file.write(text) |
|
|
|
|
|
def process_uploaded_lab_result(file_path): |
|
print(f"Processing: {file_path}") |
|
patient_name = extract_patient_name(os.path.basename(file_path)) |
|
if not patient_name: |
|
return "β Could not determine patient name from filename." |
|
|
|
ocr_text = extract_text_from_image(file_path) |
|
save_to_patient_record(patient_name, ocr_text) |
|
return f"β
OCR completed and saved under {patient_name}'s record." |
|
|
|
|
|
if __name__ == "__main__": |
|
file_to_upload = "JuanDelaCruz_2025-06-13.jpg" |
|
result = process_uploaded_lab_result(file_to_upload) |
|
print(result) |