# -*- coding: utf-8 -*- """HW3.ipynb Automatically generated by Colaboratory. Original file is located at https://colab.research.google.com/drive/1H-R9L74rpYOoQJOnTLLbUpcNpd9Tty_D """ !wget http://vis-www.cs.umass.edu/lfw/lfw.tgz !tar -xvf /content/lfw.tgz import tensorflow as tf from sklearn.datasets import load_sample_image import os import tensorflow.keras.applications.resnet50 as resnet50 from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input from tensorflow.keras.preprocessing.image import load_img, img_to_array import numpy as np from PIL import Image from sklearn.neighbors import NearestNeighbors directory = '/content/lfw' model = resnet50.ResNet50(weights='imagenet', include_top=False, pooling='avg') feature_dict = {} image_files = [] target_size = (224, 224) i = 0 # Sample at most 2000 images because the whole entire dataset # costs too much cpu power and ram def preprocess_image(image_path, target_size): img = load_img(os.path.join(directory,image_path),target_size=target_size) x = img_to_array(img) x = tf.expand_dims(x, axis = 0) x = preprocess_input(x) features = model.predict(x) return features for dir in os.listdir(directory): i += 1 new_dir = '/content/lfw/'+dir if os.path.isdir(new_dir): for files in os.listdir(new_dir): feature_dict[new_dir+'/'+files] = preprocess_image(new_dir+'/'+files, target_size).flatten() if i >= 100: break for file, features in feature_dict.items(): print(file, features) feature_map = np.array(list(feature_dict.values())) NearNeigh = NearestNeighbors(n_neighbors=10,algorithm='auto').fit(feature_map) for image_path in feature_dict: img = feature_dict[image_path].reshape(1,-1) distance,indices = NearNeigh.kneighbors(img) print('Similar images for', image_path) for i, index in enumerate(indices[0]): similar_img_path = list(feature_dict.keys())[index] print(i+1,similar_img_path)