Sanjayraju30 commited on
Commit
a2604f1
Β·
verified Β·
1 Parent(s): 3c2d17a

Update ocr_engine.py

Browse files
Files changed (1) hide show
  1. ocr_engine.py +21 -23
ocr_engine.py CHANGED
@@ -5,37 +5,35 @@ from PIL import Image
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
- # Convert PIL image to OpenCV format
9
- img = pil_img.convert("RGB")
10
- img_np = np.array(img)
11
- img_cv = cv2.cvtColor(img_np, cv2.COLOR_RGB2BGR)
12
 
13
- # Convert to grayscale
14
- gray = cv2.cvtColor(img_cv, cv2.COLOR_BGR2GRAY)
15
 
16
- # Adaptive Thresholding for 7-segment LCD
17
- processed = cv2.adaptiveThreshold(
18
- gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY_INV, 15, 10
19
- )
20
-
21
- # Resize to enhance small text
22
- resized = cv2.resize(processed, None, fx=2, fy=2, interpolation=cv2.INTER_LINEAR)
23
 
24
- # OCR config tuned for digit blocks
25
- config = r'--oem 3 --psm 7 -c tessedit_char_whitelist=0123456789.'
 
 
 
26
 
27
- # Run OCR
28
- text = pytesseract.image_to_string(resized, config=config)
29
 
30
- print("πŸ” RAW OCR OUTPUT:", repr(text))
 
 
31
 
32
- # Clean the text
33
- weight = ''.join(c for c in text if c in '0123456789.')
34
- weight = weight.strip()
35
 
36
  confidence = 95 if weight else 0
37
- return weight, confidence
38
 
39
  except Exception as e:
40
- print("❌ OCR Error:", str(e))
41
  return "", 0
 
5
 
6
  def extract_weight_from_image(pil_img):
7
  try:
8
+ # Step 1: Convert PIL to OpenCV
9
+ img = pil_img.convert("L") # grayscale
10
+ img = np.array(img)
 
11
 
12
+ # Step 2: Resize image for better OCR accuracy
13
+ img = cv2.resize(img, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
14
 
15
+ # Step 3: Apply Gaussian Blur to remove noise
16
+ blur = cv2.GaussianBlur(img, (5, 5), 0)
 
 
 
 
 
17
 
18
+ # Step 4: Apply Adaptive Thresholding
19
+ thresh = cv2.adaptiveThreshold(
20
+ blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
21
+ cv2.THRESH_BINARY_INV, 11, 2
22
+ )
23
 
24
+ # Step 5: OCR Config - digits only
25
+ config = r'--oem 3 --psm 6 -c tessedit_char_whitelist=0123456789.'
26
 
27
+ # Step 6: Run OCR
28
+ text = pytesseract.image_to_string(thresh, config=config)
29
+ print("πŸ” OCR RAW OUTPUT:", repr(text)) # view this in Hugging Face logs
30
 
31
+ # Step 7: Extract numbers
32
+ weight = ''.join(filter(lambda c: c in '0123456789.', text))
 
33
 
34
  confidence = 95 if weight else 0
35
+ return weight.strip(), confidence
36
 
37
  except Exception as e:
38
+ print("❌ OCR Exception:", str(e))
39
  return "", 0