File size: 3,123 Bytes
a3555b5
 
 
 
2cf5554
a3555b5
 
 
 
 
 
ad3e283
 
 
 
 
 
 
 
 
 
 
 
 
 
 
eab0e5c
a3555b5
 
 
e224bb7
a3555b5
e224bb7
a3555b5
e224bb7
a3555b5
355835e
4694dbb
4335d9a
 
ad3e283
 
 
 
 
 
4694dbb
2cf5554
4694dbb
 
 
4c87af3
ad3e283
4694dbb
 
041a7e2
 
ad3e283
a3555b5
 
4694dbb
a3555b5
d05911d
e224bb7
a3555b5
ad3e283
a3555b5
 
 
 
ad3e283
 
 
 
 
 
a3555b5
ad3e283
a3555b5
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import numpy as np
from PIL import Image
import gradio as gr
from ultralytics import YOLO
import cv2

# Load the YOLO model
m_raw_model = YOLO("M-Raw.pt")
n_raw_model = YOLO("N-Raw.pt")
s_raw_model = YOLO("S-Raw.pt")

# Class to syllable map
class_mapping = {0: 'Baybayin Character', 1: 'a', 2: 'b', 3: 'ba', 4: 'be', 5: 'bi', 6: 'bo', 7: 'bu', 8: 'd', 9: 'da', 10: 'di', 11: 'do', 12: 'du', 13: 'e', 14: 'g', 15: 'ga', 16: 'gi', 17: 'go', 18: 'gu', 19: 'ha', 20: 'he', 21: 'hi', 22: 'ho', 23: 'hu', 24: 'i', 25: 'k', 26: 'ka', 27: 'ki', 28: 'ko', 29: 'ku', 30: 'l', 31: 'la', 32: 'le', 33: 'li', 34: 'lo', 35: 'lu', 36: 'm', 37: 'ma', 38: 'me', 39: 'mi', 40: 'mo', 41: 'mu', 42: 'n', 43: 'na', 44: 'ng', 45: 'nga', 46: 'ngi', 47: 'ngo', 48: 'ngu', 49: 'ni', 50: 'no', 51: 'nu', 52: 'o', 53: 'p', 54: 'pa', 55: 'pe', 56: 'pi', 57: 'po', 58: 'pu', 59: 'r', 60: 'ra', 61: 're', 62: 'ri', 63: 'ro', 64: 'ru', 65: 's', 66: 'sa', 67: 'se', 68: 'si', 69: 'so', 70: 'su', 71: 't', 72: 'ta', 73: 'te', 74: 'ti', 75: 'to', 76: 'tu', 77: 'u', 78: 'w', 79: 'wa', 80: 'we', 81: 'wi', 82: 'y', 83: 'ya', 84: 'yi', 85: 'yo', 86: 'yu'}

def snap(webcam, upload, model, conf, iou):

    if webcam is not None:
        image = webcam
    elif upload is not None:
        image = upload
    else:
        image = webcam

    # If no model selected, use M-Raw
    if model == None:
        model = "M-Raw"
    
    # Run the selected model
    results = None
    if model == "M-Raw":
        results = m_raw_model(image, conf=conf, iou=iou)
    elif model == "N-Raw":
        results = n_raw_model(image, conf=conf, iou=iou)
    elif model == "S-Raw":
        results = s_raw_model(image, conf=conf, iou=iou)

    # Convert the results list into an output image
    result = results[0]

    if result.boxes.cls.cpu().numpy().size == 0:
        print("No detections.")
        return image
    
    
    classes = result.boxes.cls.cpu().numpy()
    probs = result.boxes.conf.cpu().numpy()
    boxes = result.boxes.xyxy.cpu().numpy()

    print(classes)
    print(probs)
    print(boxes)

    # print(f"Detected {classes} with {probs:.2f} confidence.")
    for i in range(len(boxes)):
        x1, y1, x2, y2 = boxes[i]
        x1, y1, x2, y2 = int(x1), int(y1), int(x2), int(y2)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)
        cv2.putText(image, f"{class_mapping[int(classes[i])]} {probs[i]:.2f}", (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

    # Get the labels
    # labels = results.pandas().xyxy[0]["name"].values

    # Sort the labels by their x-value first and then by their y-value
    # print(labels)

    return image


demo = gr.Interface(
    snap,
    [gr.Webcam(type="numpy", label="Webcam"), 
     gr.Image(source="upload", type="numpy", label="Baybayin Image"),
     gr.Radio(["M-Raw", "S-Raw", "N-Raw"]), 
     gr.Slider(0, 1, value=0.6, label="Classifier Confidence Threshold"), 
     gr.Slider(0, 1, value=0.7, label="IoU Threshold")],
    [gr.Image(type="numpy", label="Detected Baybayin")],
    title="Baybayin Instance Detection"
).queue()

if __name__ == "__main__":
    demo.launch()