import json import os, shutil import random import streamlit as st import os from pathlib import Path import numpy as np from PIL import Image import tensorflow as tf from tensorflow.keras.applications.inception_v3 import preprocess_input from tensorflow.keras.preprocessing import image from tensorflow.keras.applications.inception_v3 import InceptionV3 from tensorflow.keras.models import Model from tensorflow.keras.preprocessing.sequence import pad_sequences root = Path(os.getcwd()) aux_pre = root / 'Inception' / 'PretrainedInceptionLSTM' aux_re = root / 'Inception' / 'RetrainedInceptionLSTM' model_re_path = root / 'Inception' / 'RetrainedInceptionLSTM' / 'Model' model_inception_path = root / 'Inception' / 'RetrainedInceptionFeatureExtraction' / 'Model' model_pre_path = root / 'Inception' / 'PretrainedInceptionLSTM' / 'Model' # Must create def get_pretrained_inceptionV3(): model = InceptionV3(weights='imagenet') model2 = Model(model.input, model.layers[-2].output) return model2 def fetch_auxiliary_files(type): if type == 'Pretrained Inception': word2Index = np.load(aux_pre / "word2Index.npy", allow_pickle=True).item() index2Word = np.load(aux_pre / "index2Word.npy", allow_pickle=True).item() variable_params = np.load(aux_pre / "variable_params.npy", allow_pickle=True).item() return word2Index, index2Word, variable_params if type == 'Retrained Inception': word2Index = np.load(aux_re / "word2Index.npy", allow_pickle=True).item() index2Word = np.load(aux_re / "index2Word.npy", allow_pickle=True).item() variable_params = np.load(aux_re / "variable_params.npy", allow_pickle=True).item() return word2Index, index2Word, variable_params @st.cache(allow_output_mutation=True, show_spinner=False) def fetch_model(type): with st.spinner(text="Fetching Model"): if type == 'Pretrained Inception': model_pre = tf.keras.models.load_model(model_pre_path) model_inc = get_pretrained_inceptionV3() return model_inc, model_pre if type == 'Retrained Inception': model_re = tf.keras.models.load_model(model_re_path) model_inc = tf.keras.models.load_model(model_inception_path) return model_inc, model_re def preprocess_image_inception(image): if image.mode != "RGB": image = image.convert(mode="RGB") x = np.array(image) x = np.expand_dims(x, axis = 0) x = preprocess_input(x) x = x.reshape(1, 299, 299, 3) return x def extract_features(model, image): features = model.predict(image, verbose = 0) return features def generate_caption(model, features, max_len, word2Index, index2Word, beam_index = 3): caption = beam_search(model, features, max_len, word2Index, index2Word, beam_index) return caption def beam_search(model, features, max_len, word2Index, index2Word, beam_index): start = [word2Index["startseq"]] start_word = [[start, 1]] final_preds = [] live_seqs = beam_index features = np.tile(features, (beam_index,1)) count = 0 while len(start_word) > 0: #print(count) count+=1 temp = [] padded_seqs = [] #Get padded seqs for each of the starting seqs so far, misnamed as start_word for s in start_word: par_caps = pad_sequences([s[0]], maxlen=max_len, padding='post') padded_seqs.append(par_caps) #Formatting input so that it can be used for a prediction padded_seqs = np.array(padded_seqs).reshape(len(start_word), max_len) preds = model.predict([features[:len(start_word)],padded_seqs], verbose=0) #Getting the best branches for each of the start seqs that we had for index, pred in enumerate(preds): word_preds = np.argsort(pred)[-live_seqs:] for w in word_preds: next_cap, prob = start_word[index][0][:], start_word[index][1] next_cap.append(w) prob *= pred[w] temp.append([next_cap, prob]) start_word = temp # Sorting according to the probabilities start_word = sorted(start_word, reverse=False, key=lambda l: l[1]) # Getting the top words from all branches start_word = start_word[-live_seqs:] for pair in start_word: if index2Word[pair[0][-1]] == 'endseq': final_preds.append([pair[0][:-1], pair[1]]) start_word = start_word[:-1] live_seqs -= 1 if len(pair[0]) == max_len: final_preds.append(pair) start_word = start_word[:-1] live_seqs -= 1 # Between all the finished sequences (either max len or predicted endseq), decide which is best max_prob = 0 for index, pred in enumerate(final_preds): if pred[1] > max_prob: best_index = index max_prob = pred[1] # Convert to readable text final_pred = final_preds[best_index] final_caption = [index2Word[i] for i in final_pred[0]] final_caption = ' '.join(final_caption[1:]) return final_caption # # create target model directory # model_dir = './models/' # os.makedirs(model_dir, exist_ok=True) # # files_to_download = [ # "config.json", # "flax_model.msgpack", # "merges.txt", # "special_tokens_map.json", # "tokenizer.json", # "tokenizer_config.json", # "vocab.json", # "preprocessor_config.json", # ] def _compile(): image_path = 'samples/ROCO_00929.jpg' image = Image.open(image_path) #predict(image) image.close() _compile() sample_dir = './samples/' sample_image_ids = tuple(["None"] + [int(f.replace('ROCO_', '').replace('.jpg', '')) for f in os.listdir(sample_dir) if f.startswith('ROCO_')]) with open(os.path.join(sample_dir, "Roco-img-ids.json"), "r", encoding="UTF-8") as fp: roco_image_ids = json.load(fp) def get_random_image_id(): image_id = random.sample(roco_image_ids, k=1)[0] return image_id