File size: 5,263 Bytes
1bc005f
b906a32
 
1bc005f
000ab38
59bbd68
b906a32
 
 
 
 
 
 
192bd09
 
 
56c372d
 
 
 
192bd09
 
 
b906a32
192bd09
 
 
 
 
b906a32
 
 
 
 
 
56c372d
 
 
192bd09
 
b906a32
192bd09
 
 
6f3669a
 
56c372d
6f3669a
56c372d
 
1ba8fb7
 
56c372d
1ba8fb7
 
 
 
 
56c372d
1ba8fb7
 
56c372d
1ba8fb7
 
56c372d
1ba8fb7
 
 
56c372d
 
1ba8fb7
56c372d
 
 
 
1ba8fb7
 
56c372d
 
 
 
 
 
 
 
1ba8fb7
56c372d
1ba8fb7
56c372d
 
 
 
 
 
 
192bd09
 
b906a32
56c372d
1bc005f
b6ab115
b906a32
 
56c372d
 
192bd09
56c372d
892412c
 
b6ab115
1bc005f
 
56c372d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import gradio as gr
from ultralytics import YOLO
import cv2
import numpy as np
from paddleocr import PaddleOCR
import re

# Load models once
model_path = 'yolo11x.pt'
model2_path = r"yolo11_anpr_ghd.pt"
model = YOLO(model_path)
model2 = YOLO(model2_path)

# Initialize PaddleOCR globally to avoid re-initializing on each call
# For Hugging Face Spaces, ensure necessary PaddleOCR models are downloaded (e.g., within Dockerfile or setup script)
# Using 'en' for English license plates and disabling angle classification for simplicity
ocr = PaddleOCR(use_doc_orientation_classify=False,
                use_doc_unwarping=False,
                use_textline_orientation=False,
                lang='en') # Added lang='en' for better English OCR

# Example images paths - these might need adjustment for Hugging Face Spaces
# If these are locally stored, they need to be part of the uploaded files in your Space.
example_images = [
    r"examples/car1.png",
    r"examples/car2.png",
    r"examples/car3.png",
    r"examples/car4.png",
    r"examples/car5.png",
]

def detect_and_annotate(image):
    # Convert PIL image to OpenCV format
    img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)

    # Initialize message
    output_message = ""
    
    # Start with a copy of the original image for annotations
    annotated_frame = img.copy()

    # Vehicle detection
    results = model(img, classes=[2, 5, 7, 3])  # vehicle classes

    # IMPORTANT FIX: The .plot() method on results[0] already returns a new image with annotations.
    # We should use this directly as our base annotated_frame.
    if results and results[0].boxes and len(results[0].boxes) > 0: # Check if vehicle results exist and have boxes
        # Plot vehicle detections. This returns an annotated NumPy array (BGR format).
        annotated_frame = results[0].plot(line_width=3)  
        
        # License plate detection only if vehicle is detected
        license_plate_results = model2(img)

        if license_plate_results and len(license_plate_results[0].boxes) > 0:
            # Iterate through detected license plates if there are multiple
            for license_box_data in license_plate_results[0].boxes:
                license_box = license_box_data.xyxy[0].cpu().numpy()
                x1, y1, x2, y2 = map(int, license_box)

                # Draw rectangle for license plate on the annotated frame
                cv2.rectangle(annotated_frame, (x1, y1), (x2, y2), (255, 0, 0), 3)

                # OCR on license plate region
                license_plate_region = img[y1:y2, x1:x2]

                try:
                    # IMPORTANT FIX: Pass the NumPy array directly to PaddleOCR's ocr method
                    # This avoids the file read/write error.
                    ocr_result = ocr.ocr(img=license_plate_region, cls=True) # Changed from .predict to .ocr

                    license_plate_text = ""
                    if ocr_result and ocr_result[0] and ocr_result[0][0]: # Check for valid OCR result structure
                        # The OCR result structure for ocr.ocr is different: it's a list of blocks, each containing [bbox, (text, score)]
                        # We are interested in the text from the first block's first item
                        text = ocr_result[0][0][1][0] 
                        text = re.sub(r'[^a-zA-Z0-9]', '', text)
                        license_plate_text = text
                        output_message += f"Detected License Plate Text: {license_plate_text}\n"
                    else:
                        output_message += "No text found on license plate.\n"
                        
                    # Put text on the annotated frame (still show on image for visual feedback)
                    display_text_on_image = license_plate_text if license_plate_text else "No text found"
                    cv2.putText(annotated_frame, display_text_on_image, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 0, 0), 2)

                except Exception as e:
                    output_message += f"OCR Error: {e}\n"
                    print(f"OCR error on region: {e}")
        else:
            output_message = "No number plate detected in vehicle.\n"
    else:         
        output_message = "No vehicles detected."
        # If no vehicles are detected, simply return the original image (or a copy)
        # as there's no need to annotate it with a "No vehicles detected" message on the image itself.
        annotated_frame = img.copy() # Ensure we always return an image

    # Convert back to RGB for display in Gradio
    annotated_frame = cv2.cvtColor(annotated_frame, cv2.COLOR_BGR2RGB)
    return annotated_frame, output_message

iface = gr.Interface(
    fn=detect_and_annotate,
    inputs=gr.Image(type="pil"),
    outputs=[gr.Image(type="numpy", label="Annotated Image"), 
             gr.Textbox(label="Detection Messages")], # Added Textbox for messages
    title="Vehicle & License Plate Detection with OCR",
    description="Upload an image. The app will detect vehicles and license plates, perform OCR, and display the result with annotations and messages.",
    examples=example_images,
    cache_examples=False
)

if __name__ == "__main__":
    iface.launch(share=True)