Spaces:

rijdev
/

OCR

Sleeping

App Files Files Community

OCR / app.py

rijdev

Create app.py

2a07d13 verified 2 months ago

raw

history blame

1.91 kB

	from transformers import TrOCRProcessor, VisionEncoderDecoderModel
	from PIL import Image
	import os
	import re

	# Load Hugging Face OCR model
	processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
	model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1")

	# Directory where patient records are stored
	PATIENT_RECORDS_DIR = "records/"

	# Function to extract patient name from filename
	def extract_patient_name(file_name):
	match = re.match(r"([A-Za-z]+[A-Za-z])_.\.(jpg\|png\|jpeg\|pdf)$", file_name)
	if match:
	return match.group(1)
	return None

	# OCR function
	def extract_text_from_image(image_path):
	image = Image.open(image_path).convert("RGB")
	pixel_values = processor(images=image, return_tensors="pt").pixel_values
	generated_ids = model.generate(pixel_values)
	generated_text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
	return generated_text.strip()

	# Save text to patient record
	def save_to_patient_record(patient_name, text):
	os.makedirs(PATIENT_RECORDS_DIR, exist_ok=True)
	filepath = os.path.join(PATIENT_RECORDS_DIR, f"{patient_name}_records.txt")
	with open(filepath, "a") as file:
	file.write("\n\n===== New Upload =====\n")
	file.write(text)

	# Main process
	def process_uploaded_lab_result(file_path):
	print(f"Processing: {file_path}")
	patient_name = extract_patient_name(os.path.basename(file_path))
	if not patient_name:
	return "❌ Could not determine patient name from filename."

	ocr_text = extract_text_from_image(file_path)
	save_to_patient_record(patient_name, ocr_text)
	return f"✅ OCR completed and saved under {patient_name}'s record."

	# Example usage
	if __name__ == "__main__":
	file_to_upload = "JuanDelaCruz_2025-06-13.jpg" # Example uploaded file
	result = process_uploaded_lab_result(file_to_upload)
	print(result)