File size: 2,879 Bytes
d8e11b0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 |
#!/bin/bash
#
# script to extract ImageNet dataset
# ILSVRC2012_img_train.tar (about 138 GB)
wget --no-check-certificate https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_train.tar
# ILSVRC2012_img_val.tar (about 6.3 GB)
wget --no-check-certificate https://image-net.org/data/ILSVRC/2012/ILSVRC2012_img_val.tar
# make sure ILSVRC2012_img_train.tar & ILSVRC2012_img_val.tar in your current directory
#
# Adapted from:
# https://github.com/facebook/fb.resnet.torch/blob/master/INSTALL.md
# https://gist.github.com/BIGBALLON/8a71d225eff18d88e469e6ea9b39cef4
#
# imagenet/train/
# βββ n01440764
# β βββ n01440764_10026.JPEG
# β βββ n01440764_10027.JPEG
# β βββ ......
# βββ ......
# imagenet/val/
# βββ n01440764
# β βββ ILSVRC2012_val_00000293.JPEG
# β βββ ILSVRC2012_val_00002138.JPEG
# β βββ ......
# βββ ......
#
#
# Make imagnet directory
#
mkdir imagenet
#
# Extract the training data:
#
# Create train directory; move .tar file; change directory
mkdir imagenet/train && mv ILSVRC2012_img_train.tar imagenet/train/ && cd imagenet/train
# Extract training set; remove compressed file
tar -xvf ILSVRC2012_img_train.tar && rm -f ILSVRC2012_img_train.tar
#
# At this stage imagenet/train will contain 1000 compressed .tar files, one for each category
#
# For each .tar file:
# 1. create directory with same name as .tar file
# 2. extract and copy contents of .tar file into directory
# 3. remove .tar file
find . -name "*.tar" | while read NAME ; do mkdir -p "${NAME%.tar}"; tar -xvf "${NAME}" -C "${NAME%.tar}"; rm -f "${NAME}"; done
#
# This results in a training directory like so:
#
# imagenet/train/
# βββ n01440764
# β βββ n01440764_10026.JPEG
# β βββ n01440764_10027.JPEG
# β βββ ......
# βββ ......
#
# Change back to original directory
cd ../..
#
# Extract the validation data and move images to subfolders:
#
# Create validation directory; move .tar file; change directory; extract validation .tar; remove compressed file
mkdir imagenet/val && mv ILSVRC2012_img_val.tar imagenet/val/ && cd imagenet/val && tar -xvf ILSVRC2012_img_val.tar && rm -f ILSVRC2012_img_val.tar
# get script from soumith and run; this script creates all class directories and moves images into corresponding directories
# wget -qO- https://raw.githubusercontent.com/soumith/imagenetloader.torch/master/valprep.sh | bash
cp ../../imagenetval.sh ./
bash imagenetval.sh
#
# This results in a validation directory like so:
#
# imagenet/val/
# βββ n01440764
# β βββ ILSVRC2012_val_00000293.JPEG
# β βββ ILSVRC2012_val_00002138.JPEG
# β βββ ......
# βββ ......
#
#
# Check total files after extract
cd ..
find train/ -name "*.JPEG" | wc -l
# 1281167
find val/ -name "*.JPEG" | wc -l
# 50000
|