File size: 3,246 Bytes
4622b44
 
 
 
d885a52
4622b44
d144786
4622b44
 
 
 
 
d144786
d885a52
 
 
 
 
 
 
 
 
 
 
4622b44
 
 
 
d144786
4622b44
 
d144786
 
 
 
 
14565fe
 
 
d144786
 
4622b44
d144786
af1f46a
 
 
 
 
 
04dd650
4622b44
d144786
4622b44
d144786
4622b44
 
 
 
 
 
 
04dd650
 
4622b44
 
d144786
4622b44
04dd650
af1f46a
04dd650
 
 
 
 
 
af1f46a
 
4622b44
04dd650
d144786
af1f46a
 
 
 
d144786
04dd650
 
4622b44
04dd650
 
 
 
 
 
 
4622b44
 
af1f46a
d144786
4622b44
 
 
14565fe
d144786
 
4622b44
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import gradio as gr
import torch
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from ultralytics import YOLO
import json


model_path = "best.pt" 
model = YOLO(model_path)

def preprocess_image(image):
    image = Image.fromarray(np.array(image))

    image = ImageEnhance.Sharpness(image).enhance(2.0)  # Increase sharpness
    image = ImageEnhance.Contrast(image).enhance(1.5)   # Increase contrast
    image = ImageEnhance.Brightness(image).enhance(0.8) # Reduce brightness

    # Resize image to 800px width while maintaining aspect ratio
    width = 800
    aspect_ratio = image.height / image.width
    height = int(width * aspect_ratio)
    image = image.resize((width, height))

    return image

def imageRotation(image):
    """Dummy function for now."""
    return image

def vision_ai_api(image, label):
    """Dummy function simulating API call. Returns dummy JSON response."""
    return {
        "label": label,
        "extracted_data": {
            "name": "-------",
            "dob": "-------",
            "id_number": "-------"
        }
    }

def predict(image):
    # Convert PIL image to NumPy array
    if isinstance(image, Image.Image):
        image = np.array(image)

    image = preprocess_image(image)  # Apply preprocessing

    results = model(image, conf=0.80)

    detected_classes = set()
    labels = []
    cropped_images = {}

    for result in results:
        for box in result.boxes:
            x1, y1, x2, y2 = map(int, box.xyxy[0])
            conf = box.conf[0]
            cls = int(box.cls[0])
            class_name = model.names[cls]
            
            print(f"Detected: {class_name} ({conf:.2f}) at [{x1}, {y1}, {x2}, {y2}]")

            detected_classes.add(class_name)
            labels.append(f"{class_name} {conf:.2f}")

            # Ensure bounding boxes are within the image
            height, width = image.shape[:2]  # ✅ This now works
            x1, y1, x2, y2 = max(0, x1), max(0, y1), min(width, x2), min(height, y2)

            if x1 >= x2 or y1 >= y2:
                print("Invalid bounding box, skipping.")
                continue

            cropped = image[y1:y2, x1:x2]  # Crop the detected region
            cropped_pil = Image.fromarray(cropped)  # Convert back to PIL

            # Call API
            api_response = vision_ai_api(cropped_pil, class_name)
            cropped_images[class_name] = {
                "image": cropped_pil,
                "api_response": json.dumps(api_response, indent=4)
            }

    if not cropped_images:
        return None, "No front detected", None, "No back detected", ["No valid detections"]

    return (
        cropped_images.get("front", {}).get("image", None),
        cropped_images.get("front", {}).get("api_response", "{}"),
        cropped_images.get("back", {}).get("image", None),
        cropped_images.get("back", {}).get("api_response", "{}"),
        labels
    )



# Gradio Interface
iface = gr.Interface(
    fn=predict, 
    inputs="image", 
    outputs=["image", "text"],  
    title="License Field Detection (Front & Back Card)",
    description="Detect front & back of a license card, crop the images, and call Vision AI API separately for each."
)

iface.launch()