JosephCatrambone commited on
Commit
db8bba4
·
1 Parent(s): f29dbf2

First import.

Browse files
Files changed (2) hide show
  1. app.py +91 -0
  2. tiny_doodle_embedding.onnx +3 -0
app.py ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import onnxruntime as ort
2
+ import numpy
3
+ import gradio as gr
4
+ from PIL import Image
5
+
6
+ ort_sess = ort.InferenceSession('tiny_doodle_embedding_v14.onnx')
7
+
8
+ # force reload now!
9
+
10
+ def get_bounds(img):
11
+ # Assumes a BLACK BACKGROUND!
12
+ # White letters on a black background!
13
+ left = img.shape[1]
14
+ right = 0
15
+ top = img.shape[0]
16
+ bottom = 0
17
+ min_color = numpy.min(img)
18
+ max_color = numpy.max(img)
19
+ mean_color = 0.5*(min_color+max_color)
20
+ # Do this the dumb way.
21
+ for y in range(0, img.shape[0]):
22
+ for x in range(0, img.shape[1]):
23
+ if img[y,x] > mean_color:
24
+ left = min(left, x)
25
+ right = max(right, x)
26
+ top = min(top, y)
27
+ bottom = max(bottom, y)
28
+ return (top, bottom, left, right)
29
+
30
+ def resize_maxpool(img, out_width: int, out_height: int):
31
+ out = numpy.zeros((out_height, out_width), dtype=img.dtype)
32
+ scale_factor_y = img.shape[0] // out_height
33
+ scale_factor_x = img.shape[1] // out_width
34
+ for y in range(0, out.shape[0]):
35
+ for x in range(0, out.shape[1]):
36
+ out[y,x] = numpy.max(img[y*scale_factor_y:(y+1)*scale_factor_y, x*scale_factor_x:(x+1)*scale_factor_x])
37
+ return out
38
+
39
+ def process_input(input_msg):
40
+ img = input_msg["composite"]
41
+ # Image is inverted. 255 is white, 0 is what's drawn.
42
+ img_mean = 0.5 * (numpy.max(img) + numpy.min(img))
43
+ img = 1.0 * (img < img_mean) # Invert the image and convert to a float.
44
+ crop_area = get_bounds(img)
45
+ img = img[crop_area[0]:crop_area[1], crop_area[2]:crop_area[3]]
46
+ img = resize_maxpool(img, 32, 32)
47
+ #img_a = numpy.resize(img_a, (32, 32))
48
+ img = numpy.expand_dims(img, axis=0) # Unsqueeze
49
+ return img
50
+
51
+
52
+ def compare(input_img_a, input_img_b):
53
+ text_out = ""
54
+
55
+ img_a = process_input(input_img_a)
56
+ img_b = process_input(input_img_b)
57
+
58
+ # We could vcat these and run them in parallel.
59
+ a_embedding = ort_sess.run(None, {'input': img_a.astype(numpy.float32)})[0]
60
+ b_embedding = ort_sess.run(None, {'input': img_b.astype(numpy.float32)})[0]
61
+ a_mag = 1.0#+numpy.dot(a_embedding, a_embedding.T)
62
+ b_mag = 1.0#+numpy.dot(b_embedding, b_embedding.T)
63
+ a_embedding /= a_mag
64
+ b_embedding /= b_mag
65
+ text_out += f"img_a_embedding: {a_embedding}\n"
66
+ text_out += f"img_b_embedding: {b_embedding}\n"
67
+ sim = numpy.dot(a_embedding , b_embedding.T)
68
+ print(sim)
69
+ print(text_out)
70
+ return Image.fromarray(numpy.clip((numpy.hstack([img_a[0], img_b[0]]) * 254), 0, 255).astype(numpy.uint8)), sim[0][0], text_out
71
+ #return sim[0][0], text_out
72
+
73
+
74
+ demo = gr.Interface(
75
+ fn=compare,
76
+ inputs=[
77
+ gr.Sketchpad(image_mode='L', type='numpy'),
78
+ gr.Sketchpad(image_mode='L', type='numpy'),
79
+ #gr.ImageEditor(
80
+ # width=320, height=320,
81
+ # canvas_size=(320, 320),
82
+ # sources = ["upload", "clipboard"], # Webcam
83
+ # layers=False,
84
+ # image_mode='L', type='numpy',
85
+ #),
86
+ ],
87
+ outputs=["image", "number", "text"],
88
+ )
89
+
90
+ demo.launch(share=True)
91
+
tiny_doodle_embedding.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398b48fcb1ced8b55d3e99300094f5e8c4974d87a3052d61f92111b44bbd2b5f
3
+ size 42218748