Skip to content

Commit

Permalink
build datasets
Browse files Browse the repository at this point in the history
  • Loading branch information
yiyang7 committed Jun 14, 2018
1 parent 408adfe commit e7b9afc
Show file tree
Hide file tree
Showing 358 changed files with 117 additions and 146 deletions.
117 changes: 117 additions & 0 deletions build_dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
import argparse
import random
import os

from PIL import Image
from tqdm import tqdm

from skimage import io
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np
import matplotlib.pyplot as plt
from skimage import data, color
from skimage.transform import rescale, resize, downscale_local_mean
from skimage import util

INPUT_SIZE = 144
OUTPUT_SIZE = 144

parser = argparse.ArgumentParser()
parser.add_argument('--data_dir', default='../img_align_celeba_test', help="Directory with the SIGNS dataset")
parser.add_argument('--output_dir', default='../data/cnn_faces', help="Where to write the new data")
parser.add_argument('--input_size', default='144', help="Where to write the new data")
parser.add_argument('--output_size', default='144', help="Where to write the new data")


def crop_and_save(filename, output_dir, out_size=OUTPUT_SIZE):
"""crop the image contained in `filename` and save it to the `output_dir`"""
image = io.imread(filename)
vert_start = (218 - out_size) // 2
vert_end = vert_start + out_size
horiz_start = (178 - out_size) // 2
horiz_end = horiz_start + out_size
cropped = image[vert_start:vert_end, horiz_start:horiz_end] # 218*178 -> 144*144
io.imsave(os.path.join(output_dir, filename.split('/')[-1]), cropped)

def blur_and_save(filename, output_dir, in_size=INPUT_SIZE, out_size=OUTPUT_SIZE):
"""Blur the image contained in `filename` and save it to the `output_dir`"""
image = io.imread(filename)
vert_start = (218 - out_size) // 2
vert_end = vert_start + out_size
horiz_start = (178 - out_size) // 2
horiz_end = horiz_start + out_size

cropped = image[vert_start:vert_end, horiz_start:horiz_end] # 218*178 -> 144*144
image_resized = resize(cropped, (out_size // 4, out_size // 4)) # upscaling factor 4
blur = resize(image_resized, (out_size, out_size)) # rescale back to 144 * 144
io.imsave(os.path.join(output_dir, filename.split('/')[-1]), blur)

if __name__ == '__main__':
args = parser.parse_args()

assert os.path.isdir(args.data_dir), "Couldn't find the dataset at {}".format(args.data_dir)

# get args
data_dir = args.data_dir
INPUT_SIZE = int(args.input_size)
OUTPUT_SIZE = int(args.output_size)

# Get the filenames in data directory
filenames = os.listdir(data_dir)
filenames = [os.path.join(data_dir, f) for f in filenames if f.endswith('.jpg')]

# Split the images into 98% train, 1% val, and 1% test
# Make sure to always shuffle with a fixed seed so that the split is reproducible
random.seed(230)
filenames.sort()
random.shuffle(filenames)

split1 = int(0.98 * len(filenames))
split2 = (len(filenames) - split1) // 2 + split1
train_filenames = filenames[:split1]
val_filenames = filenames[split1:split2]
test_filenames = filenames[split2:]

print("train", len(train_filenames))
print("val", len(val_filenames))
print("test", len(test_filenames))

filenames = {'train': train_filenames,
'val': val_filenames,
'test': test_filenames}

if not os.path.exists(args.output_dir):
os.mkdir(args.output_dir)
else:
print("Warning: output dir {} already exists".format(args.output_dir))


# Preprocess train, val and test
for split in ['train', 'val', 'test']:
# clear image
output_dir_split_clear = os.path.join(args.output_dir, '{}_clear'.format(split))
# blur image
output_dir_split_blur = os.path.join(args.output_dir, '{}_blur'.format(split))

# clear image
if not os.path.exists(output_dir_split_clear):
os.mkdir(output_dir_split_clear)
else:
print("Warning: dir {} already exists".format(output_dir_split_clear))
# blur image
if not os.path.exists(output_dir_split_blur):
os.mkdir(output_dir_split_blur)
else:
print("Warning: dir {} already exists".format(output_dir_split_blur))

print("Processing {} data, saving to {} and {}".format(split, output_dir_split_clear, output_dir_split_blur))
# clear image
for filename in tqdm(filenames[split]):
crop_and_save(filename, output_dir_split_clear, out_size=OUTPUT_SIZE)
#blur image
for filename in tqdm(filenames[split]):
blur_and_save(filename, output_dir_split_blur, in_size=INPUT_SIZE, out_size=OUTPUT_SIZE)

print("Done building dataset")

1 change: 0 additions & 1 deletion cnn/README.md

This file was deleted.

6 changes: 0 additions & 6 deletions cnn/Readme.txt

This file was deleted.

139 changes: 0 additions & 139 deletions cnn/build_dataset.py

This file was deleted.

Binary file removed cnn/data/test_faces/000878.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/001041.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/001076.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/002076.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/002300.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/002559.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/002571.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/002747.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/003185.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/003678.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/004734.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/005044.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/005216.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/005934.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/006227.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/007678.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/009783.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/010294.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/011538.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/011790.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/012147.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/012258.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/012267.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/013090.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/013544.jpg
Binary file not shown.
Binary file removed cnn/data/test_faces/013998.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/014867.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/016609.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/016647.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/017110.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/019154.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/019558.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/020208.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/020247.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/020605.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/020928.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/021743.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/022019.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/022383.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/022429.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/022477.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/023324.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/025642.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/025986.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/026213.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/026499.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/026662.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/026731.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/027059.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/028282.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/028297.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/028402.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/029559.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/030006.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/030766.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/031620.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/031767.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/031779.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/032311.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/032371.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/032782.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/033958.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/034191.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/034396.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/034526.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/034584.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/034636.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/037528.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/037958.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/038725.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/038840.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/039791.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/040261.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/040569.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/040744.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/040919.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/040995.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/041044.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/042286.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/042584.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/042795.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/043128.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/043618.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/044130.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/044216.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/044389.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/044937.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/045720.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/045891.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/046443.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/047776.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/047847.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/047888.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048128.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048183.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048354.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048416.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048458.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048752.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces/048784.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/000878.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/001041.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/001076.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/002076.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/002300.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/002559.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/002571.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/002747.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/003185.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/003678.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/004734.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/005044.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/005216.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/005934.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/006227.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/007678.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/009783.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/010294.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/011538.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/011790.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/012147.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/012258.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/012267.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/013090.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/013544.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/013998.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/014867.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/016609.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/016647.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/017110.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/019154.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/019558.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/020208.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/020247.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/020605.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/020928.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/021743.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/022019.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/022383.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/022429.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/022477.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/023324.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/025642.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/025986.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/026213.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/026499.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/026662.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/026731.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/027059.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/028282.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/028297.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/028402.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/029559.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/030006.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/030766.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/031620.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/031767.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/031779.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/032311.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/032371.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/032782.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/033958.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/034191.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/034396.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/034526.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/034584.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/034636.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/037528.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/037958.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/038725.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/038840.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/039791.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/040261.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/040569.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/040744.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/040919.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/040995.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/041044.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/042286.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/042584.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/042795.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/043128.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/043618.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/044130.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/044216.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/044389.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/044937.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/045720.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/045891.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/046443.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/047776.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/047847.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/047888.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048128.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048183.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048354.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048416.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048458.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048752.jpg
Diff not rendered.
Binary file removed cnn/data/test_faces_blur/048784.jpg
Diff not rendered.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Loading

0 comments on commit e7b9afc

Please sign in to comment.