Spaces:
Runtime error
Runtime error
File size: 2,146 Bytes
5d670ae da9f292 5d670ae da9f292 9c1cffc 5d670ae da9f292 363a646 65ed4c1 363a646 da9f292 9c1cffc 38dd73a 9c1cffc 38dd73a 3ca006e 38dd73a ee1d691 da9f292 5d670ae 8211ee7 6257859 9c1cffc 8211ee7 6257859 005d086 9c1cffc 8211ee7 38dd73a 477d4fe 9c1cffc acddb2f 103f82b ee1d691 38dd73a 103f82b 9c1cffc ee1d691 9c1cffc 38dd73a 8fe1b94 65ed4c1 2132698 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
import easyocr
import numpy as np
import cv2
import re
# Load OCR engine
reader = easyocr.Reader(['en'], gpu=False)
def extract_weight_from_image(pil_img):
try:
img = np.array(pil_img)
# Resize large image if needed
max_dim = 1000
height, width = img.shape[:2]
if max(height, width) > max_dim:
scale = max_dim / max(height, width)
img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)
# OCR without heavy preprocessing
results = reader.readtext(img)
print("DEBUG OCR RESULTS:", results)
raw_texts = []
weight_candidates = []
for _, text, conf in results:
original = text
cleaned = text.lower().strip()
# Fix common OCR mistakes
cleaned = cleaned.replace(",", ".")
cleaned = cleaned.replace("o", "0").replace("O", "0")
cleaned = cleaned.replace("s", "5").replace("S", "5")
cleaned = cleaned.replace("g", "9").replace("G", "6")
cleaned = cleaned.replace("kg", "").replace("kgs", "")
cleaned = re.sub(r"[^0-9\.]", "", cleaned)
raw_texts.append(f"{original} → {cleaned} (conf: {round(conf, 2)})")
# Match flexible weight formats like 75.02, 97.2, 102.34
if cleaned.count(".") <= 1 and re.match(r"^\d{2,4}(\.\d{1,3})?$", cleaned):
weight_candidates.append((cleaned, conf))
if not weight_candidates:
return "Not detected", 0.0, "\n".join(raw_texts)
# Get best weight
best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]
# Strip unnecessary leading zeros
if "." in best_weight:
int_part, dec_part = best_weight.split(".")
int_part = int_part.lstrip("0") or "0"
best_weight = f"{int_part}.{dec_part}"
else:
best_weight = best_weight.lstrip("0") or "0"
return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)
except Exception as e:
return f"Error: {str(e)}", 0.0, "OCR failed"
|