Spaces:
Sleeping
Sleeping
File size: 3,629 Bytes
7bc1e69 0698f2b 2b25983 0698f2b 7bc1e69 6b1fdb1 0698f2b 7bc1e69 2b25983 0698f2b e32d5ab bfa2981 7bc1e69 166e55c 0698f2b 6b1fdb1 0698f2b 6b1fdb1 4eacc77 6b1fdb1 7bc1e69 5d52c44 4eacc77 e32d5ab 7bc1e69 e32d5ab bfa2981 166e55c e32d5ab bfa2981 e32d5ab 7bc1e69 0698f2b 4eacc77 7bc1e69 5d52c44 2b25983 7bc1e69 0698f2b 7bc1e69 4eacc77 0698f2b 4eacc77 6b1fdb1 4eacc77 6b1fdb1 4eacc77 0698f2b e32d5ab 166e55c 7bc1e69 bfa2981 7bc1e69 bfa2981 4eacc77 166e55c 0698f2b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 |
import cv2
import numpy as np
import torch
from PIL import Image
import gradio as gr
import re
from ultralytics import YOLO
import easyocr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
from scipy.signal import medfilt
# Load models
device = "cuda" if torch.cuda.is_available() else "cpu"
# YOLOv5 for digital meter detection (Pre-trained model)
yolo_model = YOLO("yolov5s.pt")
# OCR Models
ocr_reader = easyocr.Reader(["en"]) # EasyOCR
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device)
# Image Preprocessing (Adaptive Threshold & Sharpening)
def enhance_image(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply sharpening
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened = cv2.filter2D(image, -1, kernel)
# Adaptive thresholding
thresholded = cv2.adaptiveThreshold(sharpened, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
cv2.THRESH_BINARY, 11, 2)
return thresholded
# Convert Grayscale to RGB (Fix for TrOCR)
def convert_to_rgb(image):
if len(image.shape) == 2: # Grayscale image
image = cv2.cvtColor(image, cv2.COLOR_GRAY2RGB)
return image
# Detect Digital Meter Using YOLOv5
def detect_meter(image):
results = yolo_model(image)
detected_meters = []
for result in results:
if hasattr(result, "boxes"): # Ensure correct format
for box in result.boxes:
if box.conf > 0.25: # Lower confidence threshold for better detection
detected_meters.append(box.xyxy.tolist())
return detected_meters
# Extract Text Using EasyOCR
def extract_text_easyocr(image):
text = " ".join(ocr_reader.readtext(image, detail=0))
return text
# Extract Text Using TrOCR
def extract_text_trocr(image):
image = convert_to_rgb(image) # Convert grayscale to RGB
image = Image.fromarray(image)
pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device)
generated_ids = trocr_model.generate(pixel_values)
text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text
# Extract Weight Using Regex
def extract_weight(text):
matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight
return matches[0] if matches else None # Returns None if no weight detected
# Apply Statistical Filtering for Stability
def filter_weight_values(weights):
if len(weights) > 1:
weights = [float(w) for w in weights]
filtered_weight = medfilt(weights, kernel_size=3)[-1] # Smooth out variations
return str(round(filtered_weight, 2))
return weights[0] if weights else None
# Full Processing Pipeline (Dynamic Feedback)
def process_image(image):
enhanced = enhance_image(image)
detected_meters = detect_meter(image)
# OCR Extraction
text_easyocr = extract_text_easyocr(enhanced)
text_trocr = extract_text_trocr(enhanced)
# Prioritize numeric values from OCR
weight_easyocr = extract_weight(text_easyocr)
weight_trocr = extract_weight(text_trocr)
final_weights = [w for w in [weight_easyocr, weight_trocr] if w]
final_weight = filter_weight_values(final_weights)
# Handle failed detection cases dynamically
if not final_weight:
return "Try adjusting image clarity or detection thresholds."
return final_weight
# Gradio Interface
iface = gr.Interface(fn=process_image, inputs="image", outputs="text")
iface.launch()
|