Spaces:
Sleeping
Sleeping
| import easyocr | |
| import numpy as np | |
| import cv2 | |
| import re | |
| import logging | |
| from datetime import datetime | |
| import os | |
| from PIL import Image, ImageEnhance | |
| import pytesseract | |
| # Set up logging for detailed debugging | |
| logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') | |
| # Initialize EasyOCR (enable GPU if available) | |
| easyocr_reader = easyocr.Reader(['en'], gpu=False) | |
| # Directory for debug images | |
| DEBUG_DIR = "debug_images" | |
| os.makedirs(DEBUG_DIR, exist_ok=True) | |
| def save_debug_image(img, filename_suffix, prefix=""): | |
| """Saves an image to the debug directory with a timestamp.""" | |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") | |
| filename = os.path.join(DEBUG_DIR, f"{prefix}{timestamp}_{filename_suffix}.png") | |
| if len(img.shape) == 3: # Color image | |
| cv2.imwrite(filename, img) | |
| else: # Grayscale image | |
| cv2.imwrite(filename, img) | |
| logging.debug(f"Saved debug image: {filename}") | |
| def estimate_brightness(img): | |
| """Estimate image brightness to adjust processing""" | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| brightness = np.mean(gray) | |
| logging.debug(f"Estimated brightness: {brightness}") | |
| return brightness | |
| def deblur_image(img): | |
| """Apply iterative sharpening to reduce blur""" | |
| gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) | |
| # Multiple sharpening passes | |
| for _ in range(2): | |
| kernel = np.array([[-1, -1, -1], [-1, 9, -1], [-1, -1, -1]]) | |
| gray = cv2.filter2D(gray, -1, kernel) | |
| gray = np.clip(gray, 0, 255).astype(np.uint8) | |
| save_debug_image(gray, "00_deblurred") | |
| return gray | |
| def preprocess_image(img): | |
| """Enhance image for digit detection under adverse conditions""" | |
| # PIL enhancement | |
| pil_img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) | |
| pil_img = ImageEnhance.Contrast(pil_img).enhance(3.0) # Extreme contrast | |
| pil_img = ImageEnhance.Brightness(pil_img).enhance(1.8) # Strong brightness | |
| img_enhanced = cv2.cvtColor(np.array(pil_img), cv2.COLOR_RGB2BGR) | |
| save_debug_image(img_enhanced, "00_preprocessed_pil") | |
| # Deblur | |
| deblurred = deblur_image(img_enhanced) | |
| # CLAHE for local contrast | |
| clahe = cv2.createCLAHE(clipLimit=4.0, tileGridSize=(8, 8)) | |
| enhanced = clahe.apply(deblurred) | |
| save_debug_image(enhanced, "00_clahe_enhanced") | |
| # Noise reduction | |
| filtered = cv2.bilateralFilter(enhanced, d=17, sigmaColor=200, sigmaSpace=200) | |
| save_debug_image(filtered, "00_bilateral_filtered") | |
| # Morphological cleaning | |
| kernel = np.ones((5, 5), np.uint8) | |
| filtered = cv2.morphologyEx(filtered, cv2.MORPH_OPEN, kernel, iterations=2) | |
| save_debug_image(filtered, "00_morph_cleaned") | |
| return filtered | |
| def normalize_image(img): | |
| """Resize image to ensure digits are detectable""" | |
| h, w = img.shape[:2] | |
| target_height = 1080 # High resolution for small digits | |
| aspect_ratio = w / h | |
| target_width = int(target_height * aspect_ratio) | |
| if target_width < 480: | |
| target_width = 480 | |
| target_height = int(target_width / aspect_ratio) | |
| resized = cv2.resize(img, (target_width, target_height), interpolation=cv2.INTER_CUBIC) | |
| save_debug_image(resized, "00_normalized") | |
| logging.debug(f"Normalized image to {target_width}x{target_height}") | |
| return resized | |
| def tesseract_ocr(img): | |
| """Fallback OCR using Tesseract""" | |
| try: | |
| config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.-' | |
| text = pytesseract.image_to_string(img, config=config).strip() | |
| logging.info(f"Tesseract OCR raw text: {text}") | |
| return text | |
| except Exception as e: | |
| logging.error(f"Tesseract OCR failed: {str(e)}") | |
| return None | |
| def extract_weight_from_image(pil_img): | |
| """Extract the actual weight shown in the image""" | |
| try: | |
| img = np.array(pil_img) | |
| img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) | |
| save_debug_image(img, "00_input_image") | |
| # Normalize image | |
| img = normalize_image(img) | |
| brightness = estimate_brightness(img) | |
| conf_threshold = 0.1 # Very low threshold for blurry images | |
| # Preprocess entire image (bypass ROI detection) | |
| processed_img = preprocess_image(img) | |
| save_debug_image(processed_img, "01_processed_full") | |
| # Try multiple thresholding approaches | |
| if brightness > 100: | |
| thresh = cv2.adaptiveThreshold(processed_img, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, | |
| cv2.THRESH_BINARY_INV, 61, 11) | |
| save_debug_image(thresh, "02_adaptive_threshold") | |
| else: | |
| _, thresh = cv2.threshold(processed_img, 10, 255, cv2.THRESH_BINARY_INV) | |
| save_debug_image(thresh, "02_simple_threshold") | |
| # Morphological operations | |
| kernel = np.ones((7, 7), np.uint8) | |
| thresh = cv2.morphologyEx(thresh, cv2.MORPH_CLOSE, kernel, iterations=3) | |
| save_debug_image(thresh, "02_morph_cleaned") | |
| # EasyOCR attempt | |
| results = easyocr_reader.readtext(thresh, detail=1, paragraph=False, | |
| contrast_ths=0.05, adjust_contrast=1.5, | |
| text_threshold=0.05, mag_ratio=10.0, | |
| allowlist='0123456789.-', y_ths=0.8) | |
| logging.info(f"EasyOCR results: {results}") | |
| recognized_text = "" | |
| if results: | |
| # Sort by x-coordinate for left-to-right reading | |
| sorted_results = sorted(results, key=lambda x: x[0][0][0]) | |
| for _, text, conf in sorted_results: | |
| logging.info(f"EasyOCR detected: {text}, Confidence: {conf}") | |
| if conf > conf_threshold and any(c in '0123456789.-' for c in text): | |
| recognized_text += text | |
| else: | |
| logging.info("EasyOCR found no digits.") | |
| if not recognized_text: | |
| # Tesseract fallback | |
| tesseract_result = tesseract_ocr(thresh) | |
| if tesseract_result: | |
| recognized_text = tesseract_result | |
| logging.info(f"Using Tesseract result: {recognized_text}") | |
| logging.info(f"Raw recognized text: {recognized_text}") | |
| if not recognized_text: | |
| logging.info("No text detected by EasyOCR or Tesseract.") | |
| return "Not detected", 0.0 | |
| # Minimal cleaning to preserve actual weight | |
| text = recognized_text.lower().strip() | |
| text = text.replace(",", ".").replace(";", ".").replace(":", ".").replace(" ", "") | |
| text = text.replace("o", "0").replace("O", "0").replace("q", "0").replace("Q", "0") | |
| text = text.replace("s", "5").replace("S", "5").replace("g", "9").replace("G", "6") | |
| text = text.replace("l", "1").replace("I", "1").replace("|", "1") | |
| text = text.replace("b", "8").replace("B", "8").replace("z", "2").replace("Z", "2") | |
| text = text.replace("a", "4").replace("A", "4").replace("e", "3").replace("t", "7") | |
| text = re.sub(r"(kgs|kg|k|lb|g|gr|pounds|lbs)\b", "", text) | |
| text = re.sub(r"[^\d\.\-]", "", text) | |
| if text.count('.') > 1: | |
| parts = text.split('.') | |
| text = parts[0] + '.' + ''.join(parts[1:]) | |
| text = text.strip('.') | |
| if text.startswith('.'): | |
| text = "0" + text | |
| if text.endswith('.'): | |
| text = text.rstrip('.') | |
| logging.info(f"Cleaned text: {text}") | |
| if not text or text == '.' or text == '-': | |
| logging.warning("Cleaned text is invalid.") | |
| return "Not detected", 0.0 | |
| try: | |
| weight = float(text) | |
| confidence = 80.0 if recognized_text else 50.0 | |
| if weight < -1000 or weight > 2000: | |
| logging.warning(f"Weight {weight} outside typical range, reducing confidence.") | |
| confidence *= 0.5 | |
| if "." in text: | |
| int_part, dec_part = text.split(".") | |
| int_part = int_part.lstrip("0") or "0" | |
| dec_part = dec_part.rstrip('0') | |
| if not dec_part and int_part != "0": | |
| text = int_part | |
| elif not dec_part and int_part == "0": | |
| text = "0" | |
| else: | |
| text = f"{int_part}.{dec_part}" | |
| else: | |
| text = text.lstrip('0') or "0" | |
| logging.info(f"Final detected weight: {text}, Confidence: {confidence}%") | |
| return text, confidence | |
| except ValueError: | |
| logging.warning(f"Could not convert '{text}' to float.") | |
| return "Not detected", 0.0 | |
| except Exception as e: | |
| logging.error(f"Weight extraction failed unexpectedly: {str(e)}") | |
| return "Not detected", 0.0 |