Spaces:
Runtime error
Runtime error
Commit
·
32162b0
1
Parent(s):
9c96e70
Upload 120 files
Browse filesAdding our own model
This view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +3 -0
- Inception/PretrainedInceptionLSTM/Model/keras_metadata.pb +3 -0
- Inception/PretrainedInceptionLSTM/Model/saved_model.pb +3 -0
- Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 +3 -0
- Inception/PretrainedInceptionLSTM/Model/variables/variables.index +0 -0
- Inception/PretrainedInceptionLSTM/index2Word.npy +0 -0
- Inception/PretrainedInceptionLSTM/variable_params.npy +0 -0
- Inception/PretrainedInceptionLSTM/word2Index.npy +0 -0
- Inception/RetrainedInceptionFeatureExtraction/Model/keras_metadata.pb +3 -0
- Inception/RetrainedInceptionFeatureExtraction/Model/saved_model.pb +3 -0
- Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001 +3 -0
- Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.index +0 -0
- Inception/RetrainedInceptionLSTM/Model/keras_metadata.pb +3 -0
- Inception/RetrainedInceptionLSTM/Model/saved_model.pb +3 -0
- Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 +3 -0
- Inception/RetrainedInceptionLSTM/Model/variables/variables.index +0 -0
- Inception/RetrainedInceptionLSTM/index2Word.npy +0 -0
- Inception/RetrainedInceptionLSTM/variable_params.npy +0 -0
- Inception/RetrainedInceptionLSTM/word2Index.npy +0 -0
- app.py +27 -32
- model.py +149 -57
- samples/ROCO_00001.jpg +0 -0
- samples/ROCO_00006.jpg +0 -0
- samples/ROCO_00016.jpg +0 -0
- samples/ROCO_00025.jpg +0 -0
- samples/ROCO_00031.jpg +0 -0
- samples/ROCO_00036.jpg +0 -0
- samples/ROCO_00061.jpg +0 -0
- samples/ROCO_00084.jpg +0 -0
- samples/ROCO_00138.jpg +0 -0
- samples/ROCO_00153.jpg +0 -0
- samples/ROCO_00176.jpg +0 -0
- samples/ROCO_00185.jpg +0 -0
- samples/ROCO_00190.jpg +0 -0
- samples/ROCO_00206.jpg +0 -0
- samples/ROCO_00218.jpg +0 -0
- samples/ROCO_00251.jpg +0 -0
- samples/ROCO_00258.jpg +0 -0
- samples/ROCO_00261.jpg +0 -0
- samples/ROCO_00264.jpg +0 -0
- samples/ROCO_00271.jpg +0 -0
- samples/ROCO_00300.jpg +0 -0
- samples/ROCO_00302.jpg +0 -0
- samples/ROCO_00303.jpg +0 -0
- samples/ROCO_00307.jpg +0 -0
- samples/ROCO_00316.jpg +0 -0
- samples/ROCO_00319.jpg +0 -0
- samples/ROCO_00328.jpg +0 -0
- samples/ROCO_00332.jpg +0 -0
- samples/ROCO_00333.jpg +0 -0
.gitattributes
CHANGED
@@ -14,3 +14,6 @@
|
|
14 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
14 |
*.pb filter=lfs diff=lfs merge=lfs -text
|
15 |
*.pt filter=lfs diff=lfs merge=lfs -text
|
16 |
*.pth filter=lfs diff=lfs merge=lfs -text
|
17 |
+
Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
18 |
+
Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
19 |
+
Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
|
Inception/PretrainedInceptionLSTM/Model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:90fe3518b5f0e26908c460bc876abaef2017a5252faea2854e19e6bbc80c1abb
|
3 |
+
size 19875
|
Inception/PretrainedInceptionLSTM/Model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5ac9410ec5d75b446ba1913ce546556b276f4f7243c6b84692dfe71d04785eb1
|
3 |
+
size 2728089
|
Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:607eba2255866ff15c9be9dbc271e24c643b9c5650b5b36bd22c6f1ad461c443
|
3 |
+
size 23853510
|
Inception/PretrainedInceptionLSTM/Model/variables/variables.index
ADDED
Binary file (2.07 kB). View file
|
|
Inception/PretrainedInceptionLSTM/index2Word.npy
ADDED
Binary file (91.1 kB). View file
|
|
Inception/PretrainedInceptionLSTM/variable_params.npy
ADDED
Binary file (327 Bytes). View file
|
|
Inception/PretrainedInceptionLSTM/word2Index.npy
ADDED
Binary file (91.1 kB). View file
|
|
Inception/RetrainedInceptionFeatureExtraction/Model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b304413d09ac695dc11a96b0305ffb4e41f34f145b90a536ed4c929c11c7306
|
3 |
+
size 974015
|
Inception/RetrainedInceptionFeatureExtraction/Model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57d9834d47ee681be13d8ecf60b93770a30feb9d655dea12b78c0f0f7e1c845a
|
3 |
+
size 6312206
|
Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:12cf113be83ae0bc7024191ae51b1e41c2c016d5543c3711e0bab928904eaeab
|
3 |
+
size 279976841
|
Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.index
ADDED
Binary file (50.2 kB). View file
|
|
Inception/RetrainedInceptionLSTM/Model/keras_metadata.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e29ab07736ef18245cac5040bf1dd2100d21e8084ed51db859064026a1a0fba4
|
3 |
+
size 19858
|
Inception/RetrainedInceptionLSTM/Model/saved_model.pb
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e40821682b6a5e4b88848c9ec60bd8400cf2a37065137871f59112d77d027c65
|
3 |
+
size 2727709
|
Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4ecad5c20713acfd90563bc562f048e9cc302936b162e2e196f37d38922a0dca
|
3 |
+
size 18577366
|
Inception/RetrainedInceptionLSTM/Model/variables/variables.index
ADDED
Binary file (2.07 kB). View file
|
|
Inception/RetrainedInceptionLSTM/index2Word.npy
ADDED
Binary file (91.1 kB). View file
|
|
Inception/RetrainedInceptionLSTM/variable_params.npy
ADDED
Binary file (327 Bytes). View file
|
|
Inception/RetrainedInceptionLSTM/word2Index.npy
ADDED
Binary file (91.1 kB). View file
|
|
app.py
CHANGED
@@ -4,21 +4,19 @@ import io
|
|
4 |
|
5 |
|
6 |
# Designing the interface
|
7 |
-
st.title("
|
8 |
-
st.write("[Yih-Dar SHIEH](https://huggingface.co/ydshieh)")
|
9 |
|
10 |
st.sidebar.markdown(
|
11 |
"""
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
The
|
16 |
-
The model is
|
17 |
-
[Follow-up work of [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/).]\n
|
18 |
"""
|
19 |
)
|
20 |
|
21 |
-
with st.spinner('Loading
|
22 |
from model import *
|
23 |
|
24 |
random_image_id = get_random_image_id()
|
@@ -29,7 +27,17 @@ sample_image_id = st.sidebar.selectbox(
|
|
29 |
sample_image_ids
|
30 |
)
|
31 |
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
random_image_id = get_random_image_id()
|
34 |
sample_image_id = "None"
|
35 |
|
@@ -51,47 +59,34 @@ else:
|
|
51 |
assert type(sample_image_id) == int
|
52 |
image_id = sample_image_id
|
53 |
|
54 |
-
sample_name = f"
|
55 |
sample_path = os.path.join(sample_dir, sample_name)
|
56 |
|
57 |
if bytes_data is not None:
|
58 |
image = Image.open(bytes_data)
|
59 |
elif os.path.isfile(sample_path):
|
60 |
image = Image.open(sample_path)
|
61 |
-
else:
|
62 |
-
url = f"http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg"
|
63 |
-
image = Image.open(requests.get(url, stream=True).raw)
|
64 |
|
65 |
-
width, height =
|
66 |
resized = image.resize(size=(width, height))
|
67 |
-
if height > 384:
|
68 |
-
width = int(width / height * 384)
|
69 |
-
height = 384
|
70 |
-
resized = resized.resize(size=(width, height))
|
71 |
-
width, height = resized.size
|
72 |
-
if width > 512:
|
73 |
-
width = 512
|
74 |
-
height = int(height / width * 512)
|
75 |
-
resized = resized.resize(size=(width, height))
|
76 |
|
77 |
if bytes_data is None:
|
78 |
-
st.markdown(f"
|
79 |
show = st.image(resized)
|
80 |
show.image(resized, '\n\nSelected Image')
|
81 |
-
resized.close()
|
82 |
|
83 |
# For newline
|
84 |
st.sidebar.write('\n')
|
85 |
|
86 |
with st.spinner('Generating image caption ...'):
|
87 |
-
|
88 |
-
caption = predict(image)
|
89 |
-
|
90 |
-
caption_en = caption
|
91 |
st.header(f'Predicted caption:\n\n')
|
92 |
-
st.subheader(caption_en)
|
93 |
|
94 |
-
|
|
|
|
|
|
|
|
|
|
|
95 |
st.sidebar.write(f"{caption}")
|
96 |
|
97 |
image.close()
|
|
|
4 |
|
5 |
|
6 |
# Designing the interface
|
7 |
+
st.title("Medical Image Captioning")
|
|
|
8 |
|
9 |
st.sidebar.markdown(
|
10 |
"""
|
11 |
+
This project features 3 different Medical image captioning models.
|
12 |
+
Two of the use the InceptionV3 architecture to do feature extraction and then generate the captions using an LSTM model.
|
13 |
+
The difference between these two is that the first one uses InceptionV3 trained on ImageNet data and outputs 2048 features.
|
14 |
+
The second one is based on a retrained version of InceptionV3 that uses the CUI data from the ROCO dataset to extract 745 features from the images.
|
15 |
+
The final model is transformer based on...
|
|
|
16 |
"""
|
17 |
)
|
18 |
|
19 |
+
with st.spinner('Loading objects ...'):
|
20 |
from model import *
|
21 |
|
22 |
random_image_id = get_random_image_id()
|
|
|
27 |
sample_image_ids
|
28 |
)
|
29 |
|
30 |
+
st.sidebar.title("Select a model Type")
|
31 |
+
model_type = st.sidebar.selectbox(
|
32 |
+
"Please choose a model",
|
33 |
+
['Pretrained Inception', 'Retrained Inception', 'Transformer']
|
34 |
+
)
|
35 |
+
|
36 |
+
inception, lstm = fetch_model(model_type)
|
37 |
+
word2Index, index2Word, variable_params = fetch_auxiliary_files(model_type)
|
38 |
+
max_len = variable_params['max_caption_len']
|
39 |
+
|
40 |
+
if st.sidebar.button("Random ROCO (test) images"):
|
41 |
random_image_id = get_random_image_id()
|
42 |
sample_image_id = "None"
|
43 |
|
|
|
59 |
assert type(sample_image_id) == int
|
60 |
image_id = sample_image_id
|
61 |
|
62 |
+
sample_name = f"ROCO_{str(image_id).zfill(5)}.jpg"
|
63 |
sample_path = os.path.join(sample_dir, sample_name)
|
64 |
|
65 |
if bytes_data is not None:
|
66 |
image = Image.open(bytes_data)
|
67 |
elif os.path.isfile(sample_path):
|
68 |
image = Image.open(sample_path)
|
|
|
|
|
|
|
69 |
|
70 |
+
width, height = 299, 299
|
71 |
resized = image.resize(size=(width, height))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
if bytes_data is None:
|
74 |
+
st.markdown(f"ROCO_{str(image_id).zfill(5)}.jpg")
|
75 |
show = st.image(resized)
|
76 |
show.image(resized, '\n\nSelected Image')
|
|
|
77 |
|
78 |
# For newline
|
79 |
st.sidebar.write('\n')
|
80 |
|
81 |
with st.spinner('Generating image caption ...'):
|
|
|
|
|
|
|
|
|
82 |
st.header(f'Predicted caption:\n\n')
|
|
|
83 |
|
84 |
+
preprocessed_img = preprocess_image_inception(resized)
|
85 |
+
features = extract_features(inception, preprocessed_img)
|
86 |
+
caption = generate_caption(lstm, features, max_len, word2Index, index2Word)
|
87 |
+
st.subheader(caption)
|
88 |
+
|
89 |
+
st.sidebar.header("Model predicts: ")
|
90 |
st.sidebar.write(f"{caption}")
|
91 |
|
92 |
image.close()
|
model.py
CHANGED
@@ -1,68 +1,160 @@
|
|
1 |
import json
|
2 |
import os, shutil
|
3 |
import random
|
4 |
-
|
|
|
|
|
|
|
5 |
|
6 |
from PIL import Image
|
7 |
-
import
|
8 |
-
from
|
9 |
-
from
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
if image.mode != "RGB":
|
50 |
image = image.convert(mode="RGB")
|
51 |
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
|
61 |
def _compile():
|
62 |
|
63 |
-
image_path = 'samples/
|
64 |
image = Image.open(image_path)
|
65 |
-
predict(image)
|
66 |
image.close()
|
67 |
|
68 |
|
@@ -70,13 +162,13 @@ _compile()
|
|
70 |
|
71 |
|
72 |
sample_dir = './samples/'
|
73 |
-
sample_image_ids = tuple(["None"] + [int(f.replace('
|
74 |
|
75 |
-
with open(os.path.join(sample_dir, "
|
76 |
-
|
77 |
|
78 |
|
79 |
def get_random_image_id():
|
80 |
|
81 |
-
image_id = random.sample(
|
82 |
return image_id
|
|
|
1 |
import json
|
2 |
import os, shutil
|
3 |
import random
|
4 |
+
import streamlit as st
|
5 |
+
import os
|
6 |
+
from pathlib import Path
|
7 |
+
import numpy as np
|
8 |
|
9 |
from PIL import Image
|
10 |
+
import tensorflow as tf
|
11 |
+
from tensorflow.keras.applications.inception_v3 import preprocess_input
|
12 |
+
from tensorflow.keras.preprocessing import image
|
13 |
+
from tensorflow.keras.applications.inception_v3 import InceptionV3
|
14 |
+
from tensorflow.keras.models import Model
|
15 |
+
from tensorflow.keras.preprocessing.sequence import pad_sequences
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
root = Path(os.getcwd())
|
20 |
+
aux_pre = root / 'Inception' / 'PretrainedInceptionLSTM'
|
21 |
+
aux_re = root / 'Inception' / 'RetrainedInceptionLSTM'
|
22 |
+
|
23 |
+
model_re_path = root / 'Inception' / 'RetrainedInceptionLSTM' / 'Model'
|
24 |
+
model_inception_path = root / 'Inception' / 'RetrainedInceptionFeatureExtraction' / 'Model'
|
25 |
+
model_pre_path = root / 'Inception' / 'PretrainedInceptionLSTM' / 'Model'
|
26 |
+
|
27 |
+
# Must create
|
28 |
+
|
29 |
+
def get_pretrained_inceptionV3():
|
30 |
+
model = InceptionV3(weights='imagenet')
|
31 |
+
model2 = Model(model.input, model.layers[-2].output)
|
32 |
+
return model2
|
33 |
+
|
34 |
+
def fetch_auxiliary_files(type):
|
35 |
+
if type == 'Pretrained Inception':
|
36 |
+
word2Index = np.load(aux_pre / "word2Index.npy", allow_pickle=True).item()
|
37 |
+
index2Word = np.load(aux_pre / "index2Word.npy", allow_pickle=True).item()
|
38 |
+
variable_params = np.load(aux_pre / "variable_params.npy", allow_pickle=True).item()
|
39 |
+
return word2Index, index2Word, variable_params
|
40 |
+
if type == 'Retrained Inception':
|
41 |
+
word2Index = np.load(aux_re / "word2Index.npy", allow_pickle=True).item()
|
42 |
+
index2Word = np.load(aux_re / "index2Word.npy", allow_pickle=True).item()
|
43 |
+
variable_params = np.load(aux_re / "variable_params.npy", allow_pickle=True).item()
|
44 |
+
return word2Index, index2Word, variable_params
|
45 |
+
|
46 |
+
@st.cache(allow_output_mutation=True, show_spinner=False)
|
47 |
+
def fetch_model(type):
|
48 |
+
with st.spinner(text="Fetching Model"):
|
49 |
+
if type == 'Pretrained Inception':
|
50 |
+
model_pre = tf.keras.models.load_model(model_pre_path)
|
51 |
+
model_inc = get_pretrained_inceptionV3()
|
52 |
+
return model_inc, model_pre
|
53 |
+
if type == 'Retrained Inception':
|
54 |
+
model_re = tf.keras.models.load_model(model_re_path)
|
55 |
+
model_inc = tf.keras.models.load_model(model_inception_path)
|
56 |
+
return model_inc, model_re
|
57 |
+
|
58 |
+
def preprocess_image_inception(image):
|
59 |
if image.mode != "RGB":
|
60 |
image = image.convert(mode="RGB")
|
61 |
|
62 |
+
x = np.array(image)
|
63 |
+
x = np.expand_dims(x, axis = 0)
|
64 |
+
x = preprocess_input(x)
|
65 |
+
x = x.reshape(1, 299, 299, 3)
|
66 |
+
|
67 |
+
return x
|
68 |
+
|
69 |
+
def extract_features(model, image):
|
70 |
+
features = model.predict(image, verbose = 0)
|
71 |
+
return features
|
72 |
+
|
73 |
+
def generate_caption(model, features, max_len, word2Index, index2Word, beam_index = 3):
|
74 |
+
caption = beam_search(model, features, max_len, word2Index, index2Word, beam_index)
|
75 |
+
return caption
|
76 |
+
|
77 |
+
def beam_search(model, features, max_len, word2Index, index2Word, beam_index):
|
78 |
+
start = [word2Index["startseq"]]
|
79 |
+
start_word = [[start, 1]]
|
80 |
+
|
81 |
+
final_preds = []
|
82 |
+
live_seqs = beam_index
|
83 |
+
features = np.tile(features, (beam_index,1))
|
84 |
+
count = 0
|
85 |
+
while len(start_word) > 0:
|
86 |
+
#print(count)
|
87 |
+
count+=1
|
88 |
+
temp = []
|
89 |
+
padded_seqs = []
|
90 |
+
#Get padded seqs for each of the starting seqs so far, misnamed as start_word
|
91 |
+
for s in start_word:
|
92 |
+
par_caps = pad_sequences([s[0]], maxlen=max_len, padding='post')
|
93 |
+
padded_seqs.append(par_caps)
|
94 |
+
|
95 |
+
#Formatting input so that it can be used for a prediction
|
96 |
+
padded_seqs = np.array(padded_seqs).reshape(len(start_word), max_len)
|
97 |
+
|
98 |
+
preds = model.predict([features[:len(start_word)],padded_seqs], verbose=0)
|
99 |
+
|
100 |
+
#Getting the best branches for each of the start seqs that we had
|
101 |
+
for index, pred in enumerate(preds):
|
102 |
+
word_preds = np.argsort(pred)[-live_seqs:]
|
103 |
+
for w in word_preds:
|
104 |
+
next_cap, prob = start_word[index][0][:], start_word[index][1]
|
105 |
+
next_cap.append(w)
|
106 |
+
prob *= pred[w]
|
107 |
+
temp.append([next_cap, prob])
|
108 |
+
|
109 |
+
start_word = temp
|
110 |
+
# Sorting according to the probabilities
|
111 |
+
start_word = sorted(start_word, reverse=False, key=lambda l: l[1])
|
112 |
+
# Getting the top words from all branches
|
113 |
+
start_word = start_word[-live_seqs:]
|
114 |
+
|
115 |
+
for pair in start_word:
|
116 |
+
if index2Word[pair[0][-1]] == 'endseq':
|
117 |
+
final_preds.append([pair[0][:-1], pair[1]])
|
118 |
+
start_word = start_word[:-1]
|
119 |
+
live_seqs -= 1
|
120 |
+
if len(pair[0]) == max_len:
|
121 |
+
final_preds.append(pair)
|
122 |
+
start_word = start_word[:-1]
|
123 |
+
live_seqs -= 1
|
124 |
+
|
125 |
+
# Between all the finished sequences (either max len or predicted endseq), decide which is best
|
126 |
+
max_prob = 0
|
127 |
+
for index, pred in enumerate(final_preds):
|
128 |
+
if pred[1] > max_prob:
|
129 |
+
best_index = index
|
130 |
+
max_prob = pred[1]
|
131 |
+
|
132 |
+
# Convert to readable text
|
133 |
+
final_pred = final_preds[best_index]
|
134 |
+
final_caption = [index2Word[i] for i in final_pred[0]]
|
135 |
+
final_caption = ' '.join(final_caption[1:])
|
136 |
+
return final_caption
|
137 |
+
|
138 |
+
# # create target model directory
|
139 |
+
# model_dir = './models/'
|
140 |
+
# os.makedirs(model_dir, exist_ok=True)
|
141 |
+
#
|
142 |
+
# files_to_download = [
|
143 |
+
# "config.json",
|
144 |
+
# "flax_model.msgpack",
|
145 |
+
# "merges.txt",
|
146 |
+
# "special_tokens_map.json",
|
147 |
+
# "tokenizer.json",
|
148 |
+
# "tokenizer_config.json",
|
149 |
+
# "vocab.json",
|
150 |
+
# "preprocessor_config.json",
|
151 |
+
# ]
|
152 |
|
153 |
def _compile():
|
154 |
|
155 |
+
image_path = 'samples/ROCO_00929.jpg'
|
156 |
image = Image.open(image_path)
|
157 |
+
#predict(image)
|
158 |
image.close()
|
159 |
|
160 |
|
|
|
162 |
|
163 |
|
164 |
sample_dir = './samples/'
|
165 |
+
sample_image_ids = tuple(["None"] + [int(f.replace('ROCO_', '').replace('.jpg', '')) for f in os.listdir(sample_dir) if f.startswith('ROCO_')])
|
166 |
|
167 |
+
with open(os.path.join(sample_dir, "Roco-img-ids.json"), "r", encoding="UTF-8") as fp:
|
168 |
+
roco_image_ids = json.load(fp)
|
169 |
|
170 |
|
171 |
def get_random_image_id():
|
172 |
|
173 |
+
image_id = random.sample(roco_image_ids, k=1)[0]
|
174 |
return image_id
|
samples/ROCO_00001.jpg
ADDED
![]() |
samples/ROCO_00006.jpg
ADDED
![]() |
samples/ROCO_00016.jpg
ADDED
![]() |
samples/ROCO_00025.jpg
ADDED
![]() |
samples/ROCO_00031.jpg
ADDED
![]() |
samples/ROCO_00036.jpg
ADDED
![]() |
samples/ROCO_00061.jpg
ADDED
![]() |
samples/ROCO_00084.jpg
ADDED
![]() |
samples/ROCO_00138.jpg
ADDED
![]() |
samples/ROCO_00153.jpg
ADDED
![]() |
samples/ROCO_00176.jpg
ADDED
![]() |
samples/ROCO_00185.jpg
ADDED
![]() |
samples/ROCO_00190.jpg
ADDED
![]() |
samples/ROCO_00206.jpg
ADDED
![]() |
samples/ROCO_00218.jpg
ADDED
![]() |
samples/ROCO_00251.jpg
ADDED
![]() |
samples/ROCO_00258.jpg
ADDED
![]() |
samples/ROCO_00261.jpg
ADDED
![]() |
samples/ROCO_00264.jpg
ADDED
![]() |
samples/ROCO_00271.jpg
ADDED
![]() |
samples/ROCO_00300.jpg
ADDED
![]() |
samples/ROCO_00302.jpg
ADDED
![]() |
samples/ROCO_00303.jpg
ADDED
![]() |
samples/ROCO_00307.jpg
ADDED
![]() |
samples/ROCO_00316.jpg
ADDED
![]() |
samples/ROCO_00319.jpg
ADDED
![]() |
samples/ROCO_00328.jpg
ADDED
![]() |
samples/ROCO_00332.jpg
ADDED
![]() |
samples/ROCO_00333.jpg
ADDED
![]() |