Weight / app.py
Rammohan0504's picture
Update app.py
7bc1e69 verified
raw
history blame
2.43 kB
import cv2
import numpy as np
import torch
from PIL import Image
import gradio as gr
import re
from ultralytics import YOLO
import easyocr
from transformers import TrOCRProcessor, VisionEncoderDecoderModel
# Load models
device = "cuda" if torch.cuda.is_available() else "cpu"
# YOLOv5 for digit detection (Pre-trained model)
yolo_model = YOLO("yolov5s.pt")
# OCR Models
ocr_reader = easyocr.Reader(["en"]) # EasyOCR
trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device)
# Image Preprocessing (Sharpen & Threshold)
def enhance_image(image):
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
sharpened = cv2.filter2D(image, -1, kernel)
_, thresholded = cv2.threshold(sharpened, 150, 255, cv2.THRESH_BINARY)
return thresholded
# Detect Digits Using YOLOv5
def detect_digits(image):
results = yolo_model(image)
detected_numbers = [det.xyxy.tolist()[0] for det in results.pred[0] if det.conf > 0.5]
return detected_numbers
# Extract Text Using EasyOCR
def extract_text_easyocr(image):
text = " ".join(ocr_reader.readtext(image, detail=0))
return text
# Extract Text Using TrOCR
def extract_text_trocr(image):
image = Image.fromarray(image)
pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device)
generated_ids = trocr_model.generate(pixel_values)
text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return text
# Extract Weight Using Regex
def extract_weight(text):
matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight
return matches[0] if matches else "Weight not detected"
# Full Processing Pipeline
def process_image(image):
enhanced = enhance_image(image)
detected_digits = detect_digits(image)
text_easyocr = extract_text_easyocr(enhanced)
text_trocr = extract_text_trocr(enhanced)
# Prioritize numeric values from OCR
weight_easyocr = extract_weight(text_easyocr)
weight_trocr = extract_weight(text_trocr)
final_weight = weight_easyocr if weight_easyocr != "Weight not detected" else weight_trocr
return final_weight or "Weight not detected"
# Gradio Interface
iface = gr.Interface(fn=process_image, inputs="image", outputs="text")
iface.launch()