File size: 2,246 Bytes
6baec95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
import pickle
import cv2
import mediapipe as mp
import numpy as np
from PIL import Image
import requests
from io import BytesIO
import gradio as gr

model_dict = pickle.load(open('stacked_model_new.p', 'rb')) 

labels = ['A','B','C','D','E','F','G','H','I','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y']

model = model_dict['model']


# get url from backend

def predict(url):
    response = requests.get(url)
    print(response)
    img = Image.open(BytesIO(response.content))
    img.save('image.jpg')
    mp_hands = mp.solutions.hands
    mp_drawing = mp.solutions.drawing_utils
    mp_drawing_styles = mp.solutions.drawing_styles

    hands = mp_hands.Hands(static_image_mode=False, min_detection_confidence=0.3)
    hands.maxHands = 1

    data_aux = []
    x_ = []
    y_ = []

    frame = cv2.imread('image.jpg')

    H,W, _ = frame.shape
    frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    results = hands.process(frame_rgb)
    if results.multi_hand_landmarks:
        if(len(results.multi_hand_landmarks) == 1):

            for hand_landmarks in results.multi_hand_landmarks:
                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y

                    x_.append(x)
                    y_.append(y)

                for i in range(len(hand_landmarks.landmark)):
                    x = hand_landmarks.landmark[i].x
                    y = hand_landmarks.landmark[i].y
                    data_aux.append(x - min(x_))
                    data_aux.append(y - min(y_))

            x1 = int(min(x_) * W) - 10
            y1 = int(min(y_) * H) - 10

            x2 = int(max(x_) * W) - 10
            y2 = int(max(y_) * H) - 10

            if(len(data_aux) == 42):
                prediction = model.predict([np.asarray(data_aux)])

                predicted_character = labels[prediction[0]]

                return {"prediction":predicted_character}
        else:

            return {"prediction": "Too many Hands"}


iface = gr.Interface(fn=predict, inputs="image", outputs="text", title="Image to Text Model")
iface.launch()