Spaces:
Sleeping
Sleeping
import cv2 | |
import numpy as np | |
import torch | |
from PIL import Image | |
import gradio as gr | |
import re | |
from ultralytics import YOLO | |
import easyocr | |
from transformers import TrOCRProcessor, VisionEncoderDecoderModel | |
# Load models | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# YOLOv5 for digit detection (Pre-trained model) | |
yolo_model = YOLO("yolov5s.pt") | |
# OCR Models | |
ocr_reader = easyocr.Reader(["en"]) # EasyOCR | |
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") | |
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device) | |
# Image Preprocessing (Sharpen & Threshold) | |
def enhance_image(image): | |
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) | |
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) | |
sharpened = cv2.filter2D(image, -1, kernel) | |
_, thresholded = cv2.threshold(sharpened, 150, 255, cv2.THRESH_BINARY) | |
return thresholded | |
# Detect Digits Using YOLOv5 | |
def detect_digits(image): | |
results = yolo_model(image) | |
detected_numbers = [det.xyxy.tolist()[0] for det in results.pred[0] if det.conf > 0.5] | |
return detected_numbers | |
# Extract Text Using EasyOCR | |
def extract_text_easyocr(image): | |
text = " ".join(ocr_reader.readtext(image, detail=0)) | |
return text | |
# Extract Text Using TrOCR | |
def extract_text_trocr(image): | |
image = Image.fromarray(image) | |
pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device) | |
generated_ids = trocr_model.generate(pixel_values) | |
text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] | |
return text | |
# Extract Weight Using Regex | |
def extract_weight(text): | |
matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight | |
return matches[0] if matches else "Weight not detected" | |
# Full Processing Pipeline | |
def process_image(image): | |
enhanced = enhance_image(image) | |
detected_digits = detect_digits(image) | |
text_easyocr = extract_text_easyocr(enhanced) | |
text_trocr = extract_text_trocr(enhanced) | |
# Prioritize numeric values from OCR | |
weight_easyocr = extract_weight(text_easyocr) | |
weight_trocr = extract_weight(text_trocr) | |
final_weight = weight_easyocr if weight_easyocr != "Weight not detected" else weight_trocr | |
return final_weight or "Weight not detected" | |
# Gradio Interface | |
iface = gr.Interface(fn=process_image, inputs="image", outputs="text") | |
iface.launch() | |