Rammohan0504 commited on
Commit
7bc1e69
·
verified ·
1 Parent(s): 92996ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -17
app.py CHANGED
@@ -1,42 +1,69 @@
 
 
1
  import torch
2
- from transformers import BlipProcessor, BlipForConditionalGeneration
3
  from PIL import Image
4
  import gradio as gr
5
  import re
6
- import cv2
7
- import numpy as np
 
8
 
9
- # Load BLIP model
10
  device = "cuda" if torch.cuda.is_available() else "cpu"
11
- processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
12
- model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base").to(device)
13
 
14
- # Image Preprocessing (Sharpen & Deblur)
 
 
 
 
 
 
 
 
15
  def enhance_image(image):
16
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
17
  kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
18
  sharpened = cv2.filter2D(image, -1, kernel)
19
- return sharpened
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- # Extract Text Using BLIP
22
- def extract_text(image):
23
  image = Image.fromarray(image)
24
- inputs = processor(images=image, return_tensors="pt").to(device)
25
- generated_ids = model.generate(**inputs)
26
- text = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
27
  return text
28
 
29
  # Extract Weight Using Regex
30
  def extract_weight(text):
31
- matches = re.findall(r'\d+\.\d+', text) # Extract decimal numbers
32
  return matches[0] if matches else "Weight not detected"
33
 
34
  # Full Processing Pipeline
35
  def process_image(image):
36
  enhanced = enhance_image(image)
37
- text = extract_text(enhanced)
38
- weight = extract_weight(text)
39
- return weight or "No weight detected"
 
 
 
 
 
 
 
40
 
41
  # Gradio Interface
42
  iface = gr.Interface(fn=process_image, inputs="image", outputs="text")
 
1
+ import cv2
2
+ import numpy as np
3
  import torch
 
4
  from PIL import Image
5
  import gradio as gr
6
  import re
7
+ from ultralytics import YOLO
8
+ import easyocr
9
+ from transformers import TrOCRProcessor, VisionEncoderDecoderModel
10
 
11
+ # Load models
12
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
13
 
14
+ # YOLOv5 for digit detection (Pre-trained model)
15
+ yolo_model = YOLO("yolov5s.pt")
16
+
17
+ # OCR Models
18
+ ocr_reader = easyocr.Reader(["en"]) # EasyOCR
19
+ trocr_processor = TrOCRProcessor.from_pretrained("microsoft/trocr-base-stage1")
20
+ trocr_model = VisionEncoderDecoderModel.from_pretrained("microsoft/trocr-base-stage1").to(device)
21
+
22
+ # Image Preprocessing (Sharpen & Threshold)
23
  def enhance_image(image):
24
  image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
25
  kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])
26
  sharpened = cv2.filter2D(image, -1, kernel)
27
+ _, thresholded = cv2.threshold(sharpened, 150, 255, cv2.THRESH_BINARY)
28
+ return thresholded
29
+
30
+ # Detect Digits Using YOLOv5
31
+ def detect_digits(image):
32
+ results = yolo_model(image)
33
+ detected_numbers = [det.xyxy.tolist()[0] for det in results.pred[0] if det.conf > 0.5]
34
+ return detected_numbers
35
+
36
+ # Extract Text Using EasyOCR
37
+ def extract_text_easyocr(image):
38
+ text = " ".join(ocr_reader.readtext(image, detail=0))
39
+ return text
40
 
41
+ # Extract Text Using TrOCR
42
+ def extract_text_trocr(image):
43
  image = Image.fromarray(image)
44
+ pixel_values = trocr_processor(images=image, return_tensors="pt").pixel_values.to(device)
45
+ generated_ids = trocr_model.generate(pixel_values)
46
+ text = trocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
47
  return text
48
 
49
  # Extract Weight Using Regex
50
  def extract_weight(text):
51
+ matches = re.findall(r'\d+\.\d+|\d+', text) # Extract numeric weight
52
  return matches[0] if matches else "Weight not detected"
53
 
54
  # Full Processing Pipeline
55
  def process_image(image):
56
  enhanced = enhance_image(image)
57
+ detected_digits = detect_digits(image)
58
+ text_easyocr = extract_text_easyocr(enhanced)
59
+ text_trocr = extract_text_trocr(enhanced)
60
+
61
+ # Prioritize numeric values from OCR
62
+ weight_easyocr = extract_weight(text_easyocr)
63
+ weight_trocr = extract_weight(text_trocr)
64
+
65
+ final_weight = weight_easyocr if weight_easyocr != "Weight not detected" else weight_trocr
66
+ return final_weight or "Weight not detected"
67
 
68
  # Gradio Interface
69
  iface = gr.Interface(fn=process_image, inputs="image", outputs="text")