NicolasVana commited on
Commit
32162b0
·
1 Parent(s): 9c96e70

Upload 120 files

Browse files

Adding our own model

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. Inception/PretrainedInceptionLSTM/Model/keras_metadata.pb +3 -0
  3. Inception/PretrainedInceptionLSTM/Model/saved_model.pb +3 -0
  4. Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 +3 -0
  5. Inception/PretrainedInceptionLSTM/Model/variables/variables.index +0 -0
  6. Inception/PretrainedInceptionLSTM/index2Word.npy +0 -0
  7. Inception/PretrainedInceptionLSTM/variable_params.npy +0 -0
  8. Inception/PretrainedInceptionLSTM/word2Index.npy +0 -0
  9. Inception/RetrainedInceptionFeatureExtraction/Model/keras_metadata.pb +3 -0
  10. Inception/RetrainedInceptionFeatureExtraction/Model/saved_model.pb +3 -0
  11. Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001 +3 -0
  12. Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.index +0 -0
  13. Inception/RetrainedInceptionLSTM/Model/keras_metadata.pb +3 -0
  14. Inception/RetrainedInceptionLSTM/Model/saved_model.pb +3 -0
  15. Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 +3 -0
  16. Inception/RetrainedInceptionLSTM/Model/variables/variables.index +0 -0
  17. Inception/RetrainedInceptionLSTM/index2Word.npy +0 -0
  18. Inception/RetrainedInceptionLSTM/variable_params.npy +0 -0
  19. Inception/RetrainedInceptionLSTM/word2Index.npy +0 -0
  20. app.py +27 -32
  21. model.py +149 -57
  22. samples/ROCO_00001.jpg +0 -0
  23. samples/ROCO_00006.jpg +0 -0
  24. samples/ROCO_00016.jpg +0 -0
  25. samples/ROCO_00025.jpg +0 -0
  26. samples/ROCO_00031.jpg +0 -0
  27. samples/ROCO_00036.jpg +0 -0
  28. samples/ROCO_00061.jpg +0 -0
  29. samples/ROCO_00084.jpg +0 -0
  30. samples/ROCO_00138.jpg +0 -0
  31. samples/ROCO_00153.jpg +0 -0
  32. samples/ROCO_00176.jpg +0 -0
  33. samples/ROCO_00185.jpg +0 -0
  34. samples/ROCO_00190.jpg +0 -0
  35. samples/ROCO_00206.jpg +0 -0
  36. samples/ROCO_00218.jpg +0 -0
  37. samples/ROCO_00251.jpg +0 -0
  38. samples/ROCO_00258.jpg +0 -0
  39. samples/ROCO_00261.jpg +0 -0
  40. samples/ROCO_00264.jpg +0 -0
  41. samples/ROCO_00271.jpg +0 -0
  42. samples/ROCO_00300.jpg +0 -0
  43. samples/ROCO_00302.jpg +0 -0
  44. samples/ROCO_00303.jpg +0 -0
  45. samples/ROCO_00307.jpg +0 -0
  46. samples/ROCO_00316.jpg +0 -0
  47. samples/ROCO_00319.jpg +0 -0
  48. samples/ROCO_00328.jpg +0 -0
  49. samples/ROCO_00332.jpg +0 -0
  50. samples/ROCO_00333.jpg +0 -0
.gitattributes CHANGED
@@ -14,3 +14,6 @@
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
 
 
 
 
14
  *.pb filter=lfs diff=lfs merge=lfs -text
15
  *.pt filter=lfs diff=lfs merge=lfs -text
16
  *.pth filter=lfs diff=lfs merge=lfs -text
17
+ Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
18
+ Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
19
+ Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
Inception/PretrainedInceptionLSTM/Model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:90fe3518b5f0e26908c460bc876abaef2017a5252faea2854e19e6bbc80c1abb
3
+ size 19875
Inception/PretrainedInceptionLSTM/Model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ac9410ec5d75b446ba1913ce546556b276f4f7243c6b84692dfe71d04785eb1
3
+ size 2728089
Inception/PretrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607eba2255866ff15c9be9dbc271e24c643b9c5650b5b36bd22c6f1ad461c443
3
+ size 23853510
Inception/PretrainedInceptionLSTM/Model/variables/variables.index ADDED
Binary file (2.07 kB). View file
 
Inception/PretrainedInceptionLSTM/index2Word.npy ADDED
Binary file (91.1 kB). View file
 
Inception/PretrainedInceptionLSTM/variable_params.npy ADDED
Binary file (327 Bytes). View file
 
Inception/PretrainedInceptionLSTM/word2Index.npy ADDED
Binary file (91.1 kB). View file
 
Inception/RetrainedInceptionFeatureExtraction/Model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b304413d09ac695dc11a96b0305ffb4e41f34f145b90a536ed4c929c11c7306
3
+ size 974015
Inception/RetrainedInceptionFeatureExtraction/Model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57d9834d47ee681be13d8ecf60b93770a30feb9d655dea12b78c0f0f7e1c845a
3
+ size 6312206
Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12cf113be83ae0bc7024191ae51b1e41c2c016d5543c3711e0bab928904eaeab
3
+ size 279976841
Inception/RetrainedInceptionFeatureExtraction/Model/variables/variables.index ADDED
Binary file (50.2 kB). View file
 
Inception/RetrainedInceptionLSTM/Model/keras_metadata.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e29ab07736ef18245cac5040bf1dd2100d21e8084ed51db859064026a1a0fba4
3
+ size 19858
Inception/RetrainedInceptionLSTM/Model/saved_model.pb ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40821682b6a5e4b88848c9ec60bd8400cf2a37065137871f59112d77d027c65
3
+ size 2727709
Inception/RetrainedInceptionLSTM/Model/variables/variables.data-00000-of-00001 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ecad5c20713acfd90563bc562f048e9cc302936b162e2e196f37d38922a0dca
3
+ size 18577366
Inception/RetrainedInceptionLSTM/Model/variables/variables.index ADDED
Binary file (2.07 kB). View file
 
Inception/RetrainedInceptionLSTM/index2Word.npy ADDED
Binary file (91.1 kB). View file
 
Inception/RetrainedInceptionLSTM/variable_params.npy ADDED
Binary file (327 Bytes). View file
 
Inception/RetrainedInceptionLSTM/word2Index.npy ADDED
Binary file (91.1 kB). View file
 
app.py CHANGED
@@ -4,21 +4,19 @@ import io
4
 
5
 
6
  # Designing the interface
7
- st.title("🖼️ Image Captioning Demo 📝")
8
- st.write("[Yih-Dar SHIEH](https://huggingface.co/ydshieh)")
9
 
10
  st.sidebar.markdown(
11
  """
12
- An image captioning model by combining ViT model with GPT2 model.
13
- The encoder (ViT) and decoder (GPT2) are combined using Hugging Face transformers' [Vision-To-Text Encoder-Decoder
14
- framework](https://huggingface.co/transformers/master/model_doc/visionencoderdecoder.html).
15
- The pretrained weights of both models are loaded, with a set of randomly initialized cross-attention weights.
16
- The model is trained on the COCO 2017 dataset for about 6900 steps (batch_size=256).
17
- [Follow-up work of [Huggingface JAX/Flax event](https://discuss.huggingface.co/t/open-to-the-community-community-week-using-jax-flax-for-nlp-cv/).]\n
18
  """
19
  )
20
 
21
- with st.spinner('Loading and compiling ViT-GPT2 model ...'):
22
  from model import *
23
 
24
  random_image_id = get_random_image_id()
@@ -29,7 +27,17 @@ sample_image_id = st.sidebar.selectbox(
29
  sample_image_ids
30
  )
31
 
32
- if st.sidebar.button("Random COCO 2017 (val) images"):
 
 
 
 
 
 
 
 
 
 
33
  random_image_id = get_random_image_id()
34
  sample_image_id = "None"
35
 
@@ -51,47 +59,34 @@ else:
51
  assert type(sample_image_id) == int
52
  image_id = sample_image_id
53
 
54
- sample_name = f"COCO_val2017_{str(image_id).zfill(12)}.jpg"
55
  sample_path = os.path.join(sample_dir, sample_name)
56
 
57
  if bytes_data is not None:
58
  image = Image.open(bytes_data)
59
  elif os.path.isfile(sample_path):
60
  image = Image.open(sample_path)
61
- else:
62
- url = f"http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg"
63
- image = Image.open(requests.get(url, stream=True).raw)
64
 
65
- width, height = image.size
66
  resized = image.resize(size=(width, height))
67
- if height > 384:
68
- width = int(width / height * 384)
69
- height = 384
70
- resized = resized.resize(size=(width, height))
71
- width, height = resized.size
72
- if width > 512:
73
- width = 512
74
- height = int(height / width * 512)
75
- resized = resized.resize(size=(width, height))
76
 
77
  if bytes_data is None:
78
- st.markdown(f"[{str(image_id).zfill(12)}.jpg](http://images.cocodataset.org/val2017/{str(image_id).zfill(12)}.jpg)")
79
  show = st.image(resized)
80
  show.image(resized, '\n\nSelected Image')
81
- resized.close()
82
 
83
  # For newline
84
  st.sidebar.write('\n')
85
 
86
  with st.spinner('Generating image caption ...'):
87
-
88
- caption = predict(image)
89
-
90
- caption_en = caption
91
  st.header(f'Predicted caption:\n\n')
92
- st.subheader(caption_en)
93
 
94
- st.sidebar.header("ViT-GPT2 predicts: ")
 
 
 
 
 
95
  st.sidebar.write(f"{caption}")
96
 
97
  image.close()
 
4
 
5
 
6
  # Designing the interface
7
+ st.title("Medical Image Captioning")
 
8
 
9
  st.sidebar.markdown(
10
  """
11
+ This project features 3 different Medical image captioning models.
12
+ Two of the use the InceptionV3 architecture to do feature extraction and then generate the captions using an LSTM model.
13
+ The difference between these two is that the first one uses InceptionV3 trained on ImageNet data and outputs 2048 features.
14
+ The second one is based on a retrained version of InceptionV3 that uses the CUI data from the ROCO dataset to extract 745 features from the images.
15
+ The final model is transformer based on...
 
16
  """
17
  )
18
 
19
+ with st.spinner('Loading objects ...'):
20
  from model import *
21
 
22
  random_image_id = get_random_image_id()
 
27
  sample_image_ids
28
  )
29
 
30
+ st.sidebar.title("Select a model Type")
31
+ model_type = st.sidebar.selectbox(
32
+ "Please choose a model",
33
+ ['Pretrained Inception', 'Retrained Inception', 'Transformer']
34
+ )
35
+
36
+ inception, lstm = fetch_model(model_type)
37
+ word2Index, index2Word, variable_params = fetch_auxiliary_files(model_type)
38
+ max_len = variable_params['max_caption_len']
39
+
40
+ if st.sidebar.button("Random ROCO (test) images"):
41
  random_image_id = get_random_image_id()
42
  sample_image_id = "None"
43
 
 
59
  assert type(sample_image_id) == int
60
  image_id = sample_image_id
61
 
62
+ sample_name = f"ROCO_{str(image_id).zfill(5)}.jpg"
63
  sample_path = os.path.join(sample_dir, sample_name)
64
 
65
  if bytes_data is not None:
66
  image = Image.open(bytes_data)
67
  elif os.path.isfile(sample_path):
68
  image = Image.open(sample_path)
 
 
 
69
 
70
+ width, height = 299, 299
71
  resized = image.resize(size=(width, height))
 
 
 
 
 
 
 
 
 
72
 
73
  if bytes_data is None:
74
+ st.markdown(f"ROCO_{str(image_id).zfill(5)}.jpg")
75
  show = st.image(resized)
76
  show.image(resized, '\n\nSelected Image')
 
77
 
78
  # For newline
79
  st.sidebar.write('\n')
80
 
81
  with st.spinner('Generating image caption ...'):
 
 
 
 
82
  st.header(f'Predicted caption:\n\n')
 
83
 
84
+ preprocessed_img = preprocess_image_inception(resized)
85
+ features = extract_features(inception, preprocessed_img)
86
+ caption = generate_caption(lstm, features, max_len, word2Index, index2Word)
87
+ st.subheader(caption)
88
+
89
+ st.sidebar.header("Model predicts: ")
90
  st.sidebar.write(f"{caption}")
91
 
92
  image.close()
model.py CHANGED
@@ -1,68 +1,160 @@
1
  import json
2
  import os, shutil
3
  import random
4
-
 
 
 
5
 
6
  from PIL import Image
7
- import jax
8
- from transformers import FlaxVisionEncoderDecoderModel, ViTFeatureExtractor, AutoTokenizer
9
- from huggingface_hub import hf_hub_download
10
-
11
-
12
- # create target model directory
13
- model_dir = './models/'
14
- os.makedirs(model_dir, exist_ok=True)
15
-
16
- files_to_download = [
17
- "config.json",
18
- "flax_model.msgpack",
19
- "merges.txt",
20
- "special_tokens_map.json",
21
- "tokenizer.json",
22
- "tokenizer_config.json",
23
- "vocab.json",
24
- "preprocessor_config.json",
25
- ]
26
-
27
- # copy files from checkpoint hub:
28
- for fn in files_to_download:
29
- file_path = hf_hub_download("ydshieh/vit-gpt2-coco-en-ckpts", f"ckpt_epoch_3_step_6900/{fn}")
30
- shutil.copyfile(file_path, os.path.join(model_dir, fn))
31
-
32
- model = FlaxVisionEncoderDecoderModel.from_pretrained(model_dir)
33
- feature_extractor = ViTFeatureExtractor.from_pretrained(model_dir)
34
- tokenizer = AutoTokenizer.from_pretrained(model_dir)
35
-
36
- max_length = 16
37
- num_beams = 4
38
- gen_kwargs = {"max_length": max_length, "num_beams": num_beams}
39
-
40
-
41
- @jax.jit
42
- def generate(pixel_values):
43
- output_ids = model.generate(pixel_values, **gen_kwargs).sequences
44
- return output_ids
45
-
46
-
47
- def predict(image):
48
-
 
 
 
 
 
 
 
49
  if image.mode != "RGB":
50
  image = image.convert(mode="RGB")
51
 
52
- pixel_values = feature_extractor(images=image, return_tensors="np").pixel_values
53
-
54
- output_ids = generate(pixel_values)
55
- preds = tokenizer.batch_decode(output_ids, skip_special_tokens=True)
56
- preds = [pred.strip() for pred in preds]
57
-
58
- return preds[0]
59
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  def _compile():
62
 
63
- image_path = 'samples/val_000000039769.jpg'
64
  image = Image.open(image_path)
65
- predict(image)
66
  image.close()
67
 
68
 
@@ -70,13 +162,13 @@ _compile()
70
 
71
 
72
  sample_dir = './samples/'
73
- sample_image_ids = tuple(["None"] + [int(f.replace('COCO_val2017_', '').replace('.jpg', '')) for f in os.listdir(sample_dir) if f.startswith('COCO_val2017_')])
74
 
75
- with open(os.path.join(sample_dir, "coco-val2017-img-ids.json"), "r", encoding="UTF-8") as fp:
76
- coco_2017_val_image_ids = json.load(fp)
77
 
78
 
79
  def get_random_image_id():
80
 
81
- image_id = random.sample(coco_2017_val_image_ids, k=1)[0]
82
  return image_id
 
1
  import json
2
  import os, shutil
3
  import random
4
+ import streamlit as st
5
+ import os
6
+ from pathlib import Path
7
+ import numpy as np
8
 
9
  from PIL import Image
10
+ import tensorflow as tf
11
+ from tensorflow.keras.applications.inception_v3 import preprocess_input
12
+ from tensorflow.keras.preprocessing import image
13
+ from tensorflow.keras.applications.inception_v3 import InceptionV3
14
+ from tensorflow.keras.models import Model
15
+ from tensorflow.keras.preprocessing.sequence import pad_sequences
16
+
17
+
18
+
19
+ root = Path(os.getcwd())
20
+ aux_pre = root / 'Inception' / 'PretrainedInceptionLSTM'
21
+ aux_re = root / 'Inception' / 'RetrainedInceptionLSTM'
22
+
23
+ model_re_path = root / 'Inception' / 'RetrainedInceptionLSTM' / 'Model'
24
+ model_inception_path = root / 'Inception' / 'RetrainedInceptionFeatureExtraction' / 'Model'
25
+ model_pre_path = root / 'Inception' / 'PretrainedInceptionLSTM' / 'Model'
26
+
27
+ # Must create
28
+
29
+ def get_pretrained_inceptionV3():
30
+ model = InceptionV3(weights='imagenet')
31
+ model2 = Model(model.input, model.layers[-2].output)
32
+ return model2
33
+
34
+ def fetch_auxiliary_files(type):
35
+ if type == 'Pretrained Inception':
36
+ word2Index = np.load(aux_pre / "word2Index.npy", allow_pickle=True).item()
37
+ index2Word = np.load(aux_pre / "index2Word.npy", allow_pickle=True).item()
38
+ variable_params = np.load(aux_pre / "variable_params.npy", allow_pickle=True).item()
39
+ return word2Index, index2Word, variable_params
40
+ if type == 'Retrained Inception':
41
+ word2Index = np.load(aux_re / "word2Index.npy", allow_pickle=True).item()
42
+ index2Word = np.load(aux_re / "index2Word.npy", allow_pickle=True).item()
43
+ variable_params = np.load(aux_re / "variable_params.npy", allow_pickle=True).item()
44
+ return word2Index, index2Word, variable_params
45
+
46
+ @st.cache(allow_output_mutation=True, show_spinner=False)
47
+ def fetch_model(type):
48
+ with st.spinner(text="Fetching Model"):
49
+ if type == 'Pretrained Inception':
50
+ model_pre = tf.keras.models.load_model(model_pre_path)
51
+ model_inc = get_pretrained_inceptionV3()
52
+ return model_inc, model_pre
53
+ if type == 'Retrained Inception':
54
+ model_re = tf.keras.models.load_model(model_re_path)
55
+ model_inc = tf.keras.models.load_model(model_inception_path)
56
+ return model_inc, model_re
57
+
58
+ def preprocess_image_inception(image):
59
  if image.mode != "RGB":
60
  image = image.convert(mode="RGB")
61
 
62
+ x = np.array(image)
63
+ x = np.expand_dims(x, axis = 0)
64
+ x = preprocess_input(x)
65
+ x = x.reshape(1, 299, 299, 3)
66
+
67
+ return x
68
+
69
+ def extract_features(model, image):
70
+ features = model.predict(image, verbose = 0)
71
+ return features
72
+
73
+ def generate_caption(model, features, max_len, word2Index, index2Word, beam_index = 3):
74
+ caption = beam_search(model, features, max_len, word2Index, index2Word, beam_index)
75
+ return caption
76
+
77
+ def beam_search(model, features, max_len, word2Index, index2Word, beam_index):
78
+ start = [word2Index["startseq"]]
79
+ start_word = [[start, 1]]
80
+
81
+ final_preds = []
82
+ live_seqs = beam_index
83
+ features = np.tile(features, (beam_index,1))
84
+ count = 0
85
+ while len(start_word) > 0:
86
+ #print(count)
87
+ count+=1
88
+ temp = []
89
+ padded_seqs = []
90
+ #Get padded seqs for each of the starting seqs so far, misnamed as start_word
91
+ for s in start_word:
92
+ par_caps = pad_sequences([s[0]], maxlen=max_len, padding='post')
93
+ padded_seqs.append(par_caps)
94
+
95
+ #Formatting input so that it can be used for a prediction
96
+ padded_seqs = np.array(padded_seqs).reshape(len(start_word), max_len)
97
+
98
+ preds = model.predict([features[:len(start_word)],padded_seqs], verbose=0)
99
+
100
+ #Getting the best branches for each of the start seqs that we had
101
+ for index, pred in enumerate(preds):
102
+ word_preds = np.argsort(pred)[-live_seqs:]
103
+ for w in word_preds:
104
+ next_cap, prob = start_word[index][0][:], start_word[index][1]
105
+ next_cap.append(w)
106
+ prob *= pred[w]
107
+ temp.append([next_cap, prob])
108
+
109
+ start_word = temp
110
+ # Sorting according to the probabilities
111
+ start_word = sorted(start_word, reverse=False, key=lambda l: l[1])
112
+ # Getting the top words from all branches
113
+ start_word = start_word[-live_seqs:]
114
+
115
+ for pair in start_word:
116
+ if index2Word[pair[0][-1]] == 'endseq':
117
+ final_preds.append([pair[0][:-1], pair[1]])
118
+ start_word = start_word[:-1]
119
+ live_seqs -= 1
120
+ if len(pair[0]) == max_len:
121
+ final_preds.append(pair)
122
+ start_word = start_word[:-1]
123
+ live_seqs -= 1
124
+
125
+ # Between all the finished sequences (either max len or predicted endseq), decide which is best
126
+ max_prob = 0
127
+ for index, pred in enumerate(final_preds):
128
+ if pred[1] > max_prob:
129
+ best_index = index
130
+ max_prob = pred[1]
131
+
132
+ # Convert to readable text
133
+ final_pred = final_preds[best_index]
134
+ final_caption = [index2Word[i] for i in final_pred[0]]
135
+ final_caption = ' '.join(final_caption[1:])
136
+ return final_caption
137
+
138
+ # # create target model directory
139
+ # model_dir = './models/'
140
+ # os.makedirs(model_dir, exist_ok=True)
141
+ #
142
+ # files_to_download = [
143
+ # "config.json",
144
+ # "flax_model.msgpack",
145
+ # "merges.txt",
146
+ # "special_tokens_map.json",
147
+ # "tokenizer.json",
148
+ # "tokenizer_config.json",
149
+ # "vocab.json",
150
+ # "preprocessor_config.json",
151
+ # ]
152
 
153
  def _compile():
154
 
155
+ image_path = 'samples/ROCO_00929.jpg'
156
  image = Image.open(image_path)
157
+ #predict(image)
158
  image.close()
159
 
160
 
 
162
 
163
 
164
  sample_dir = './samples/'
165
+ sample_image_ids = tuple(["None"] + [int(f.replace('ROCO_', '').replace('.jpg', '')) for f in os.listdir(sample_dir) if f.startswith('ROCO_')])
166
 
167
+ with open(os.path.join(sample_dir, "Roco-img-ids.json"), "r", encoding="UTF-8") as fp:
168
+ roco_image_ids = json.load(fp)
169
 
170
 
171
  def get_random_image_id():
172
 
173
+ image_id = random.sample(roco_image_ids, k=1)[0]
174
  return image_id
samples/ROCO_00001.jpg ADDED
samples/ROCO_00006.jpg ADDED
samples/ROCO_00016.jpg ADDED
samples/ROCO_00025.jpg ADDED
samples/ROCO_00031.jpg ADDED
samples/ROCO_00036.jpg ADDED
samples/ROCO_00061.jpg ADDED
samples/ROCO_00084.jpg ADDED
samples/ROCO_00138.jpg ADDED
samples/ROCO_00153.jpg ADDED
samples/ROCO_00176.jpg ADDED
samples/ROCO_00185.jpg ADDED
samples/ROCO_00190.jpg ADDED
samples/ROCO_00206.jpg ADDED
samples/ROCO_00218.jpg ADDED
samples/ROCO_00251.jpg ADDED
samples/ROCO_00258.jpg ADDED
samples/ROCO_00261.jpg ADDED
samples/ROCO_00264.jpg ADDED
samples/ROCO_00271.jpg ADDED
samples/ROCO_00300.jpg ADDED
samples/ROCO_00302.jpg ADDED
samples/ROCO_00303.jpg ADDED
samples/ROCO_00307.jpg ADDED
samples/ROCO_00316.jpg ADDED
samples/ROCO_00319.jpg ADDED
samples/ROCO_00328.jpg ADDED
samples/ROCO_00332.jpg ADDED
samples/ROCO_00333.jpg ADDED