Spaces:
Runtime error
Runtime error
Update ocr_engine.py
Browse files- ocr_engine.py +16 -9
ocr_engine.py
CHANGED
|
@@ -10,7 +10,7 @@ model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-handwrit
|
|
| 10 |
def clean_ocr_text(text):
|
| 11 |
# Fix common OCR misreads
|
| 12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
| 13 |
-
return re.sub(r"[^\d.kg]", "", text.lower())
|
| 14 |
|
| 15 |
def restore_decimal(text):
|
| 16 |
if re.fullmatch(r"\d{5}", text):
|
|
@@ -25,11 +25,11 @@ def extract_unit_from_text(raw_text):
|
|
| 25 |
return "kg"
|
| 26 |
elif "g" in raw_text:
|
| 27 |
return "g"
|
| 28 |
-
return "g" # fallback if unit
|
| 29 |
|
| 30 |
def extract_weight(image):
|
| 31 |
try:
|
| 32 |
-
#
|
| 33 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
| 34 |
image = image.filter(ImageFilter.SHARPEN)
|
| 35 |
|
|
@@ -40,17 +40,24 @@ def extract_weight(image):
|
|
| 40 |
|
| 41 |
cleaned = clean_ocr_text(raw_text)
|
| 42 |
|
| 43 |
-
#
|
| 44 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
| 45 |
if match:
|
| 46 |
return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
|
| 47 |
|
| 48 |
-
#
|
| 49 |
-
|
| 50 |
-
if
|
| 51 |
-
decimal_fixed = restore_decimal(
|
| 52 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
|
| 54 |
return "Error: No valid weight found", raw_text
|
|
|
|
| 55 |
except Exception as e:
|
| 56 |
return f"Error: {str(e)}", ""
|
|
|
|
| 10 |
def clean_ocr_text(text):
|
| 11 |
# Fix common OCR misreads
|
| 12 |
text = text.replace(",", ".").replace("s", "5").replace("o", "0").replace("O", "0")
|
| 13 |
+
return re.sub(r"[^\d.kg]", "", text.lower())
|
| 14 |
|
| 15 |
def restore_decimal(text):
|
| 16 |
if re.fullmatch(r"\d{5}", text):
|
|
|
|
| 25 |
return "kg"
|
| 26 |
elif "g" in raw_text:
|
| 27 |
return "g"
|
| 28 |
+
return "g" # fallback if no unit
|
| 29 |
|
| 30 |
def extract_weight(image):
|
| 31 |
try:
|
| 32 |
+
# Resize & sharpen image
|
| 33 |
image = image.resize((image.width * 2, image.height * 2), Image.BICUBIC)
|
| 34 |
image = image.filter(ImageFilter.SHARPEN)
|
| 35 |
|
|
|
|
| 40 |
|
| 41 |
cleaned = clean_ocr_text(raw_text)
|
| 42 |
|
| 43 |
+
# Case 1: Match decimal with unit
|
| 44 |
match = re.search(r"(\d{1,3}\.\d{1,3})\s*(kg|g)?", cleaned)
|
| 45 |
if match:
|
| 46 |
return f"{match.group(1)} {match.group(2) or ''}".strip(), raw_text
|
| 47 |
|
| 48 |
+
# Case 2: Large number fallback like 53255 β 52.255
|
| 49 |
+
match = re.search(r"\d{4,5}", cleaned)
|
| 50 |
+
if match:
|
| 51 |
+
decimal_fixed = restore_decimal(match.group())
|
| 52 |
+
unit = extract_unit_from_text(raw_text)
|
| 53 |
+
return f"{decimal_fixed} {unit}", raw_text
|
| 54 |
+
|
| 55 |
+
# Final fallback: plain number
|
| 56 |
+
match = re.search(r"\d+", cleaned)
|
| 57 |
+
if match:
|
| 58 |
+
return f"{match.group()} g", raw_text
|
| 59 |
|
| 60 |
return "Error: No valid weight found", raw_text
|
| 61 |
+
|
| 62 |
except Exception as e:
|
| 63 |
return f"Error: {str(e)}", ""
|