Spaces:

Rammohan0504
/

Weight

Sleeping

App Files Files Community

Rammohan0504 commited on Jun 16

Commit

7bc1e69

verified ·

1 Parent(s): 92996ab

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -17

app.py CHANGED Viewed

@@ -1,42 +1,69 @@
 import torch
-from transformers import BlipProcessor, BlipForConditionalGeneration
 from PIL import Image
 import gradio as gr
 import re
-import cv2
-import numpy as np
-# Load BLIP model
 device = "cuda" if torch.cuda.is_available() else "cpu"
-processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
-model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
-# Image Preprocessing (Sharpen & Deblur)
 def enhance_image(image):
     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
     sharpened = cv2.filter2D(image, -1, kernel)
-    return sharpened
-# Extract Text Using BLIP
-def extract_text(image):
     image = Image.fromarray(image)
-    inputs = processor(images=image, return_tensors="pt").to(device)
-    generated_ids = model.generate(**inputs)
-    text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return text
 # Extract Weight Using Regex
 def extract_weight(text):
-    matches = re.findall(r'\d+\.\d+', text)  # Extract decimal numbers
     return matches[0] if matches else "Weight not detected"
 # Full Processing Pipeline
 def process_image(image):
     enhanced = enhance_image(image)
-    text = extract_text(enhanced)
-    weight = extract_weight(text)
-    return weight or "No weight detected"
 # Gradio Interface
 iface = gr.Interface(fn=process_image, inputs="image", outputs="text")

+import cv2
+import numpy as np
 import torch
 from PIL import Image
 import gradio as gr
 import re
+from ultralytics import YOLO
+import easyocr
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel
+# Load models
 device = "cuda" if torch.cuda.is_available() else "cpu"
+# YOLOv5 for digit detection (Pre-trained model)
+yolo_model = YOLO("yolov5s.pt")
+# OCR Models
+ocr_reader = easyocr.Reader(["en"])  # EasyOCR
+trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
+trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device)
+# Image Preprocessing (Sharpen & Threshold)
 def enhance_image(image):
     image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
     kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
     sharpened = cv2.filter2D(image, -1, kernel)
+    _, thresholded = cv2.threshold(sharpened, 150, 255, cv2.THRESH_BINARY)
+    return thresholded
+# Detect Digits Using YOLOv5
+def detect_digits(image):
+    results = yolo_model(image)
+    detected_numbers = [det.xyxy.tolist()[0] for det in results.pred[0] if det.conf > 0.5]
+    return detected_numbers
+# Extract Text Using EasyOCR
+def extract_text_easyocr(image):
+    text = " ".join(ocr_reader.readtext(image, detail=0))
+    return text
+# Extract Text Using TrOCR
+def extract_text_trocr(image):
     image = Image.fromarray(image)
+    pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device)
+    generated_ids = trocr_model.generate(pixel_values)
+    text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
     return text
 # Extract Weight Using Regex
 def extract_weight(text):
+    matches = re.findall(r'\d+\.\d+|\d+', text)  # Extract numeric weight
     return matches[0] if matches else "Weight not detected"
 # Full Processing Pipeline
 def process_image(image):
     enhanced = enhance_image(image)
+    detected_digits = detect_digits(image)
+    text_easyocr = extract_text_easyocr(enhanced)
+    text_trocr = extract_text_trocr(enhanced)
+    # Prioritize numeric values from OCR
+    weight_easyocr = extract_weight(text_easyocr)
+    weight_trocr = extract_weight(text_trocr)
+    final_weight = weight_easyocr if weight_easyocr != "Weight not detected" else weight_trocr
+    return final_weight or "Weight not detected"
 # Gradio Interface
 iface = gr.Interface(fn=process_image, inputs="image", outputs="text")