File size: 2,146 Bytes
5d670ae
da9f292
5d670ae
da9f292
 
9c1cffc
5d670ae
da9f292
363a646
65ed4c1
363a646
da9f292
9c1cffc
38dd73a
 
 
 
 
 
9c1cffc
38dd73a
3ca006e
 
38dd73a
ee1d691
da9f292
5d670ae
8211ee7
6257859
9c1cffc
 
8211ee7
6257859
 
 
005d086
9c1cffc
8211ee7
38dd73a
477d4fe
9c1cffc
 
acddb2f
103f82b
ee1d691
38dd73a
103f82b
9c1cffc
ee1d691
9c1cffc
 
 
 
 
 
 
 
 
38dd73a
8fe1b94
65ed4c1
2132698
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
import easyocr
import numpy as np
import cv2
import re

# Load OCR engine
reader = easyocr.Reader(['en'], gpu=False)

def extract_weight_from_image(pil_img):
    try:
        img = np.array(pil_img)

        # Resize large image if needed
        max_dim = 1000
        height, width = img.shape[:2]
        if max(height, width) > max_dim:
            scale = max_dim / max(height, width)
            img = cv2.resize(img, None, fx=scale, fy=scale, interpolation=cv2.INTER_AREA)

        # OCR without heavy preprocessing
        results = reader.readtext(img)
        print("DEBUG OCR RESULTS:", results)

        raw_texts = []
        weight_candidates = []

        for _, text, conf in results:
            original = text
            cleaned = text.lower().strip()

            # Fix common OCR mistakes
            cleaned = cleaned.replace(",", ".")
            cleaned = cleaned.replace("o", "0").replace("O", "0")
            cleaned = cleaned.replace("s", "5").replace("S", "5")
            cleaned = cleaned.replace("g", "9").replace("G", "6")
            cleaned = cleaned.replace("kg", "").replace("kgs", "")
            cleaned = re.sub(r"[^0-9\.]", "", cleaned)

            raw_texts.append(f"{original}{cleaned} (conf: {round(conf, 2)})")

            # Match flexible weight formats like 75.02, 97.2, 102.34
            if cleaned.count(".") <= 1 and re.match(r"^\d{2,4}(\.\d{1,3})?$", cleaned):
                weight_candidates.append((cleaned, conf))

        if not weight_candidates:
            return "Not detected", 0.0, "\n".join(raw_texts)

        # Get best weight
        best_weight, best_conf = sorted(weight_candidates, key=lambda x: -x[1])[0]

        # Strip unnecessary leading zeros
        if "." in best_weight:
            int_part, dec_part = best_weight.split(".")
            int_part = int_part.lstrip("0") or "0"
            best_weight = f"{int_part}.{dec_part}"
        else:
            best_weight = best_weight.lstrip("0") or "0"

        return best_weight, round(best_conf * 100, 2), "\n".join(raw_texts)

    except Exception as e:
        return f"Error: {str(e)}", 0.0, "OCR failed"