import cv2 import numpy as np import torch from PIL import Image import gradio as gr import re from ultralytics import YOLO import easyocr from transformers import TrOCRProcessor, VisionEncoderDecoderModel from scipy.signal import medfilt # Load models device = "cuda" if torch.cuda.is_available() else "cpu" # YOLOv5 for digital meter detection (Pre-trained model) yolo_model = YOLO("yolov5s.pt") # OCR Models ocr_reader = easyocr.Reader(["en"]) # EasyOCR trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1") trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device) # Image Preprocessing (Adaptive Threshold & Sharpening) def enhance_image(image): image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) # Apply sharpening kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]]) sharpened = cv2.filter2D(image, -1, kernel) # Adaptive thresholding thresholded = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 11, 2) return thresholded # Convert Grayscale to RGB (Fix for TrOCR) def convert_to_rgb(image): if len(image.shape) == 2: # Grayscale image image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB) return image # Detect Digital Meter Using YOLOv5 def detect_meter(image): results = yolo_model(image) detected_meters = [] for result in results: if hasattr(result, "boxes"): # Ensure correct format for box in result.boxes: if box.conf > 0.25: # Lower confidence threshold for better detection detected_meters.append(box.xyxy.tolist()) return detected_meters # Extract Text Using EasyOCR def extract_text_easyocr(image): text = " ".join(ocr_reader.readtext(image, detail=0)) return text # Extract Text Using TrOCR def extract_text_trocr(image): image = convert_to_rgb(image) # Convert grayscale to RGB image = Image.fromarray(image) pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device) generated_ids = trocr_model.generate(pixel_values) text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0] return text # Extract Weight Using Regex def extract_weight(text): matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight return matches[0] if matches else None # Returns None if no weight detected # Apply Statistical Filtering for Stability def filter_weight_values(weights): if len(weights) > 1: weights = [float(w) for w in weights] filtered_weight = medfilt(weights, kernel_size=3)[-1] # Smooth out variations return str(round(filtered_weight, 2)) return weights[0] if weights else None # Full Processing Pipeline (Dynamic Feedback) def process_image(image): enhanced = enhance_image(image) detected_meters = detect_meter(image) # OCR Extraction text_easyocr = extract_text_easyocr(enhanced) text_trocr = extract_text_trocr(enhanced) # Prioritize numeric values from OCR weight_easyocr = extract_weight(text_easyocr) weight_trocr = extract_weight(text_trocr) final_weights = [w for w in [weight_easyocr, weight_trocr] if w] final_weight = filter_weight_values(final_weights) # Handle failed detection cases dynamically if not final_weight: return "Try adjusting image clarity or detection thresholds." return final_weight # Gradio Interface iface = gr.Interface(fn=process_image, inputs="image", outputs="text") iface.launch()