From b2926e789de6848adcd5ee40334a5c6672f73f0f Mon Sep 17 00:00:00 2001 From: Yi Huang Date: Wed, 22 Sep 2021 00:30:09 -0400 Subject: [PATCH 1/8] extract script added --- .../.ipynb_checkpoints/preprocess-checkpoint | 231 ++++++++++++++++++ .../view_dataset-checkpoint | 160 ++++++++++++ scripts/README_extract.md | 48 ++++ scripts/extract_toy-sp.py | 223 +++++++++++++++++ toytools.egg-info/PKG-INFO | 11 + toytools.egg-info/SOURCES.txt | 21 ++ toytools.egg-info/dependency_links.txt | 1 + toytools.egg-info/requires.txt | 3 + toytools.egg-info/top_level.txt | 1 + .../.ipynb_checkpoints/collect-checkpoint.py | 166 +++++++++++++ toytools/__pycache__/__init__.cpython-39.pyc | Bin 0 -> 147 bytes toytools/__pycache__/collect.cpython-39.pyc | Bin 0 -> 4720 bytes toytools/__pycache__/plot.cpython-39.pyc | Bin 0 -> 2013 bytes toytools/__pycache__/transform.cpython-39.pyc | Bin 0 -> 2113 bytes .../__pycache__/__init__.cpython-39.pyc | Bin 0 -> 729 bytes .../datasets/__pycache__/funcs.cpython-39.pyc | Bin 0 -> 927 bytes .../generic_dataset.cpython-39.pyc | Bin 0 -> 801 bytes .../precropped_toyzero.cpython-39.pyc | Bin 0 -> 3311 bytes .../presimple_toyzero.cpython-39.pyc | Bin 0 -> 3985 bytes .../preunaligned_toyzero.cpython-39.pyc | Bin 0 -> 4304 bytes .../__pycache__/simple_toyzero.cpython-39.pyc | Bin 0 -> 4904 bytes .../__pycache__/torch_funcs.cpython-39.pyc | Bin 0 -> 1325 bytes 22 files changed, 865 insertions(+) create mode 100644 scripts/.ipynb_checkpoints/preprocess-checkpoint create mode 100644 scripts/.ipynb_checkpoints/view_dataset-checkpoint create mode 100644 scripts/README_extract.md create mode 100644 scripts/extract_toy-sp.py create mode 100644 toytools.egg-info/PKG-INFO create mode 100644 toytools.egg-info/SOURCES.txt create mode 100644 toytools.egg-info/dependency_links.txt create mode 100644 toytools.egg-info/requires.txt create mode 100644 toytools.egg-info/top_level.txt create mode 100644 toytools/.ipynb_checkpoints/collect-checkpoint.py create mode 100644 toytools/__pycache__/__init__.cpython-39.pyc create mode 100644 toytools/__pycache__/collect.cpython-39.pyc create mode 100644 toytools/__pycache__/plot.cpython-39.pyc create mode 100644 toytools/__pycache__/transform.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/__init__.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/funcs.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/generic_dataset.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc create mode 100644 toytools/datasets/__pycache__/torch_funcs.cpython-39.pyc diff --git a/scripts/.ipynb_checkpoints/preprocess-checkpoint b/scripts/.ipynb_checkpoints/preprocess-checkpoint new file mode 100644 index 0000000..919ba11 --- /dev/null +++ b/scripts/.ipynb_checkpoints/preprocess-checkpoint @@ -0,0 +1,231 @@ +#!/usr/bin/env python +"""Preprocess toyzero dataset""" +# pylint: disable=missing-function-docstring + +import argparse +import logging +import multiprocessing +import os + +from collections import namedtuple + +import tqdm +import numpy as np +import pandas as pd + +from toytools.collect import ( + collect_toyzero_images, filter_parsed_images, parse_images, load_image +) +from toytools.transform import ( + get_background_value_fast, is_image_empty, try_find_region_with_signal +) + +COLUMNS = [ + 'image', 'event', 'apa', 'plane', 'x', 'y', 'width', 'height', 'bkg' +] + +Config = namedtuple( + 'Config', + [ + 'path', + 'label', + 'apas', + 'planes', + 'min_signal', + 'n_crops', + 'crop_retries', + 'crop_shape', + ] +) + +class CroppingWorker: + # pylint: disable=missing-class-docstring + + def __init__(self, config): + self._path = config.path + self._min_signal = config.min_signal + self._n_crops = config.n_crops + self._crop_shape = config.crop_shape + self._crop_retries = config.crop_retries + + def __call__(self, index_and_parsed_image): + """Return a list of preprocessed regions of a parsed image""" + index, parsed_image = index_and_parsed_image + + image = load_image(self._path, True, parsed_image[0]) + bkg_value = get_background_value_fast(image) + + if is_image_empty(image, bkg_value, self._min_signal): + logging.warning( + "Empty image: %s. Background: %d.", parsed_image[0], bkg_value + ) + return [] + + if self._crop_shape is None: + crop_region = (0, 0, image.shape[0], image.shape[1]) + return [ (*parsed_image, *crop_region, bkg_value) ] + + result = [] + prg = np.random.default_rng(index) + + for _ in range(self._n_crops): + cropped_image, crop_region = try_find_region_with_signal( + image, prg, bkg_value, self._min_signal, + self._crop_shape, self._crop_retries + ) + + if not is_image_empty(cropped_image, bkg_value, self._min_signal): + result.append((*parsed_image, *crop_region, bkg_value)) + + return result + +def parse_cmdargs(): + parser = argparse.ArgumentParser("Precompute cropped regions") + + parser.add_argument( + 'path', + help = 'Directory where toyzero dataset is located', + metavar = 'PATH', + type = str, + ) + + parser.add_argument( + 'label', + help = 'Label of the precomputed croppings (used as a file name).', + metavar = 'LABEL', + type = str, + ) + + parser.add_argument( + '--apa', + default = None, + dest = 'apas', + help = 'APAs to select', + type = int, + nargs = '*', + ) + + parser.add_argument( + '--plane', + default = None, + dest = 'planes', + help = 'Wire Planes to select', + choices = [ 'U', 'V', 'W', ], + nargs = '*', + ) + + parser.add_argument( + '-n', + default = 10, + dest = 'n_crops', + help = 'Number of cropped regions to extract from each image', + type = int, + ) + + parser.add_argument( + '--min-signal', + default = 10, + dest = 'min_signal', + help = 'Minimum number of signal pixels to consider image nonempty', + type = int, + ) + + parser.add_argument( + '-r', '--retries', + default = 100, + dest = 'crop_retries', + help = 'Number of attempts to extract a single cropped region', + type = int, + ) + + parser.add_argument( + '-s', '--shape', + default = None, + dest = 'crop_shape', + help = "Shape of the crop 'WIDTHxHEIGHT'", + type = lambda s : tuple(int(x) for x in s.split('x')) + ) + + return parser.parse_args() + +def create_config_from_cmdargs(cmdargs): + + def optional_map(fn, x): + if x is None: + return None + + return fn(x) + + return Config( + path = cmdargs.path, + label = cmdargs.label, + apas = optional_map(set, cmdargs.apas), + planes = optional_map(set, cmdargs.planes), + min_signal = cmdargs.min_signal, + n_crops = cmdargs.n_crops, + crop_retries = cmdargs.crop_retries, + crop_shape = cmdargs.crop_shape + ) + +def construct_dataset(parsed_images, config): + result = [] + + progbar = tqdm.tqdm( + desc = 'Cropping Images', + total = len(parsed_images), + dynamic_ncols = True + ) + + worker = CroppingWorker(config) + + with multiprocessing.Pool() as pool: + for samples in pool.imap_unordered(worker, enumerate(parsed_images)): + result += samples + progbar.update() + + progbar.close() + + return sorted(result) + +def get_save_name(config): + fname = config.label + + if config.apas is not None: + fname += '-' + ''.join(str(x) for x in config.apas) + + if config.planes is not None: + fname += '-' + ''.join(str(x) for x in config.planes) + + if config.crop_shape is not None: + fname += '-%dx%d' % config.crop_shape + + return fname + '.csv' + +def save_dataset(dataset, config): + fname = get_save_name(config) + + df = pd.DataFrame(dataset, columns = COLUMNS) + df.to_csv(os.path.join(config.path, fname), index_label = 'index') + +def main(): + cmdargs = parse_cmdargs() + config = create_config_from_cmdargs(cmdargs) + + print("Collecting Images...") + images = collect_toyzero_images(config.path) + parsed_images = parse_images(images) + + print("Filtering Images...") + parsed_images = filter_parsed_images( + parsed_images, config.apas, config.planes + ) + + print("Cropping Images...") + dataset = construct_dataset(parsed_images, config) + + print("Saving Result...") + save_dataset(dataset, config) + +if __name__ == '__main__': + main() + diff --git a/scripts/.ipynb_checkpoints/view_dataset-checkpoint b/scripts/.ipynb_checkpoints/view_dataset-checkpoint new file mode 100644 index 0000000..c46c714 --- /dev/null +++ b/scripts/.ipynb_checkpoints/view_dataset-checkpoint @@ -0,0 +1,160 @@ +#!/usr/bin/env python +"""Make plots of toyzero dataset images""" +# pylint: disable=missing-function-docstring + +import argparse +import json +import re +import tqdm + +import matplotlib.pyplot as plt + +from toytools.datasets import get_toyzero_dataset +from toytools.plot import ( + save_figure, default_image_plot, get_common_images_range +) + +def parse_cmdargs(): + parser = argparse.ArgumentParser("View toyzero dataset") + + parser.add_argument( + 'path', + help = 'Directory where toyzero dataset is located', + metavar = 'PATH', + type = str, + ) + + parser.add_argument( + '--dataset', + choices = [ + 'toyzero-simple', 'toyzero-presimple', 'toyzero-precropped', + 'toyzero-preunaligned' + ], + default = 'simple', + dest = 'dataset', + help = 'Type of the toyzero Dataset to use.', + type = str, + ) + + parser.add_argument( + '--data_args', + default = {}, + dest = 'data_args', + help = ( + "JSON dict of data_args parameters. " + "Example: '{ \"seed\": 1, \"val_size\": 0.4 }'" + ), + type = json.loads, + ) + + parser.add_argument( + '-i', + default = None, + dest = 'index', + help = \ + 'Index or range of indices [first:last) of the samples to plot', + type = str, + ) + + parser.add_argument( + '--plotdir', + default = None, + dest = 'plotdir', + help = ( + 'Directory where to save plots. If not specified then this ' + 'script will be run in the interactive mode.' + ), + type = str, + ) + + parser.add_argument( + '--ext', + default = 'png', + dest = 'ext', + help = 'Plot extensions', + type = str, + nargs = '+', + ) + + return parser.parse_args() + +def plot_image_pair(img_fake, img_real, vertical = False): + + subplots_kwargs = { + 'sharex' : True, 'sharey' : True, 'constrained_layout' : True, + } + + if vertical: + f, axs = plt.subplots(2, 1, **subplots_kwargs) + else: + f, axs = plt.subplots(1, 2, **subplots_kwargs) + + color_range = get_common_images_range((img_fake, img_real)) + + aximg_fake = default_image_plot(axs[0], img_fake, *color_range) + _ = default_image_plot(axs[1], img_real, *color_range) + + axs[0].set_title('Fake') + axs[1].set_title('Real') + + if vertical: + f.colorbar(aximg_fake, ax = axs, location = 'right') + else: + f.colorbar(aximg_fake, ax = axs, location = 'bottom') + + return f, axs + +def plot_single_sample(dataset, index, plotdir, ext): + img_fake, img_real = dataset[index] + vertical = (img_fake.shape[1] > img_fake.shape[0]) + + f, _ax = plot_image_pair(img_fake, img_real, vertical) + f.suptitle("Index: %d" % index) + + if plotdir is None: + plt.show() + else: + save_figure(f, '%s/sample_%04d' % (plotdir, index), ext) + + plt.close(f) + +def parse_index_range(dataset, index): + if index is None: + return range(len(dataset)) + + if index.isnumeric(): + index = int(index) + return range(index, index + 1) + + range_re = re.compile(r'(\d*):(\d*)') + match = range_re.match(index) + + if not match: + raise ValueError("Failed to parse index range: '%s'" % index) + + first, last = [ (int(x) if x != '' else None) for x in match.groups() ] + index_slice = slice(first, last) + + return range(*index_slice.indices(len(dataset))) + +def main(): + cmdargs = parse_cmdargs() + dataset = get_toyzero_dataset( + cmdargs.dataset, cmdargs.path, **cmdargs.data_args + ) + + index_range = parse_index_range(dataset, cmdargs.index) + + progbar = tqdm.tqdm( + desc = 'Plotting', total = len(index_range), dynamic_ncols = True + ) + + for index in index_range: + plot_single_sample(dataset, index, cmdargs.plotdir, cmdargs.ext) + progbar.update() + + progbar.close() + +if __name__ == '__main__': + main() + diff --git a/scripts/README_extract.md b/scripts/README_extract.md new file mode 100644 index 0000000..b07af5f --- /dev/null +++ b/scripts/README_extract.md @@ -0,0 +1,48 @@ +# Get `toy-sp` (`toy1`) data: +Toy1 data can be downloaded from https://www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/. +1. To download the both image `.npz` files and meta data: + > `wget -r --no-parent https://www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/` +1. To download only the image `.npz` files: + > `wget -r --no-parent https://www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir` +1. To get images generate with one particular seed (example): + > `wget -r --no-parent https://www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir/toyzero-100-10-17698759_1048280_804.tar` + +When the downloading is done, we can find the tar files that contains the images in the following folder: +> `www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir` + +# Extract `toy-sp` (`toy1`) data: +We use `extract_toy-sp.py` to extract the tar files and get a merged dataset contains all images (generated with different seeds) + +Usages: +1. `python extract_toy-sp.py -d www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir/ -s seedwise -m merged -v True`; + Note: Sampled pairs of images will be saved to `flora:/tmp/LS4GAN/toy-sp/sample_plots/` +1. `python extract_toy-sp.py -d www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir/ -m merged -v True`; + Note: If seedwise folder is not specified, the folders for each seed will be saved at `flora:/tmp/LS4GAN/toy-sp/seedwise/`. +1. `python extract_toy-sp.py -d www.phy.bnl.gov/~bviren/tmp/ls4gan/data/toy1/job_output_dir/ -m merged`; + Note: Do not plot sampled pairs. + +# Bulk data downloading and pre-generated window csvs: +Please find in `flora:/data/LS4GAN` folder of the `flora` machine. +1. **Merged data**: A gathering of all toy1 image `.npz` files Brett generated. +1. **Seed-wise data**: The seed-wise data contains the same set of toy1 images as the merged data, but grouped and saved by the seed. + + | file | md5sum | + | ------------------------------ | -------------------------------- | + | 2021-09-01_toy-sp_merged.tgz | bfbd560f65955d66d5d174f0f410eee8 | + | 2021-09-01_toy-sp_seedwise.tgz | b5dde35f59283313998f5ecea51b3340 | +1. **Pre-generated of windows csvs**: in the `flora:/data/LS4GAN/toy-sp_merged_windows`. + - Shared parameters for `toytools/scripts/preprocess`: + - `--plane`: U; + - `-n` (numbers of windows per image): 4; + - Shared parameters for `toytools/scripts/train_test_split`: + - `--test-size`: 4000; + - For window size `128x128`: + - `--min-signal`: 300, 500; + - For window size `512x512`: + - `--min-signal`: 500, 1000, 2000; + - Window file naming convention: + > `ms--.csv`; + > + > `ms--_train.csv`; + > + > `ms--_test.csv` diff --git a/scripts/extract_toy-sp.py b/scripts/extract_toy-sp.py new file mode 100644 index 0000000..d5dd64f --- /dev/null +++ b/scripts/extract_toy-sp.py @@ -0,0 +1,223 @@ +#!/usr/bin/env python +# coding: utf-8 + +# ================================================================================================================== +# ============================================== Extract toy-sp data ============================================== +# The script include three parts: +# 1. Extract the downloaded tar files and save only the images. +# We have 1000 tar files, each generated with a random seed. +# This step also save images extracted from the tar file with seed `` to `/seed-`. +# 2. Gather all images generated with different seeds and save them to`/merged`. +# We have to modified the image filename since images generated with different seed shared the same set of names. +# We do so by plugging in the seed. After the modification, each file in the merged folder is named as: +# > `protodune----.npz` +# (@Brett, I removed the gauss in the name. Let me know if it is a bad idea.) +# 3. (Optional) Do a sanity check by plotting the fake and real images and their difference. +# ================================================================================================================== +# ================================================================================================================== + + +import pandas as pd +import numpy as np +from pathlib import Path +import tarfile +import os +import shutil +import sys +from tqdm import tqdm +import argparse + +import matplotlib.pyplot as plt + + + +# Extraction functions +def extract_files(tar, member_folder, file_extension, save_folder='.'): + """ + Extract all files with from folder of a tarfile object + Input: + - tar (tarfile instance): a tarfile instance to extract from. + - member_folder (str): a folder the contains the files you need. Must be a member in . + - file_extension (str): file extension for the files you need. + - save_folder (str): the folder to extract to. + """ + + Path(save_folder).mkdir(exist_ok=True, parents=True) + # locate all the files in the member_folder that have file_extension as extension. + subdir_and_files = [ + tarinfo for tarinfo in tar.getmembers() + if tarinfo.name.startswith(member_folder) and + tarinfo.name.endswith(file_extension) + ] + + print(f'\tsave {len(subdir_and_files)} {file_extension} files to {save_folder}') + + # extract files and move them to the save_folder + tar.extractall(members=subdir_and_files, path=save_folder) + fnames = Path(f'{save_folder}/{member_folder}').glob(f'*.{file_extension}') + for fname in fnames: + shutil.move(fname, save_folder) + + # Remove the chain of parent folders (now empty) + path_base = os.path.normpath(member_folder).split(os.sep)[0] + shutil.rmtree(f'{save_folder}/{path_base}') + +def get_seed(fname): + return fname.split('-')[-1].split('_')[0] + +def extract_toy_sp(tarfname, folder_base): + """ + extract all the npz files (both fake and real) from a tar file named + Input: + - tarfname (str): the .tar file name. + - folder_base (str): the folder under where the seed- folders are located + """ + seed = get_seed(Path(tarfname).stem) + print(f'seed = {seed}') + save_folder = Path(f'{folder_base}/seed-{seed}') + if save_folder.exists(): + shutil.rmtree(save_folder) + + with tarfile.open(tarfname, 'r') as tar: + try: + fake = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('fake-fake')][0] + real = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('real-fake')][0] + extract_files(tar, fake, 'npz', save_folder/'fake') + extract_files(tar, real, 'npz', save_folder/'real') + print() + except: + print(f'There is something wrong with the tar file {tarfname}') + + +def parse_cmdargs(): + parser = argparse.ArgumentParser("Extract toy-sp (toy1) data Brett generated") + + parser.add_argument( + '--data_path', + '-d', + help = 'The loacation of the tar files', + dest = 'data_path', + type = str, + ) + + parser.add_argument( + '--seed_path', + '-s', + help = 'The location for the seed folders. Images generated with seed= are contained in /seed-.', + dest = 'seed_path', + type = str, + default = None + ) + + parser.add_argument( + '--merged_path', + '-m', + dest = 'merged_path', + help = 'The location of the merged dataset. The folder will contain two subfolders: fake and real.', + type = str + ) + + parser.add_argument( + '--visualize', + '-v', + help = 'whether to visualize a few pairs of fake and real images', + dest = 'visualize', + default = False, + type = bool, + ) + + return parser.parse_args() + + + +if __name__ == '__main__': + cmdargs = parse_cmdargs() + data_path = cmdargs.data_path + seed_path = cmdargs.seed_path + merged_path = cmdargs.merged_path + visualize = cmdargs.visualize + + # extract the image npz files from each tarfile and save them to folder according to their seed + # Each seed- folder looks like + # seed-/ + # - fake/ + # - real/ + + print('Extracting:') + assert Path(data_path).exists(), f"{data_path} does not exist" + + tarfnames = list(Path(data_path).glob('*.tar')) + assert len(tarfnames) > 0, f"{data_path} does not contain any tar files" + + if not seed_path: + seed_path = '/tmp/LS4GAN/toy-sp/seedwise/' + print('\033[96m' + f'Find seedwise data at {seed_path}' + '\033[0m') + + if not Path(seed_path).exists(): + Path(seed_path).mkdir(exist_ok=True, parents=True) + + for tarfname in tarfnames: + extract_toy_sp(tarfname, seed_path) + + + # Merge + # Modified the image filenames and then save copies to ``. + # The merged folder has two subfolders `fake` and `real`. + merged_folder = Path(merged_path) + merged_folder.mkdir(exist_ok=True, parents=True) + merged_fake = merged_folder/'fake' + merged_real = merged_folder/'real' + merged_fake.mkdir(exist_ok=True, parents=True) + merged_real.mkdir(exist_ok=True, parents=True) + + print('Merging:') + for folder in Path(seed_path).glob('seed-*'): + seed = folder.stem.split('-')[-1] + print(f'seed = {seed}') + + npz_fnames = list((folder/'fake').glob('*npz')) + print(f'\tcopy {len(npz_fnames)} to the {merged_fake} folder') + for npz_fname in sorted(npz_fnames): + npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') + shutil.copy(npz_fname, merged_fake/npz_fname_new) + + npz_fnames = list((folder/'real').glob('*npz')) + print(f'\tcopy {len(npz_fnames)} to the {merged_real} folder') + for npz_fname in sorted(npz_fnames): + npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') + shutil.copy(npz_fname, merged_real/npz_fname_new) + print() + + + # Visualization + if not visualize: + exit(0) + + print('Generating sample plots') + def load_image(fname): + with np.load(fname) as f: + return f[f.files[0]] + + fnames_fake = sorted((merged_fake).glob('*npz')) + fnames_real = sorted((merged_real).glob('*npz')) + num_samples = 5 + image_save_folder = Path(f'/tmp/LS4GAN/toy-sp/sample_plots') + if image_save_folder.exists(): + shutil.rmtree(image_save_folder) + image_save_folder.mkdir(exist_ok=True, parents=True) + print('\033[96m' + f'Find sample images at {image_save_folder}' + '\033[0m') + indices = np.random.choice(range(len(fnames_fake)), num_samples, replace=False) + + for idx in tqdm(indices): + image_fname = image_save_folder/f'sample_{idx}.png' + fname_fake = fnames_fake[idx] + fname_real = fnames_real[idx] + + image_fake = load_image(fname_fake) + image_real = load_image(fname_real) + + fig, axes = plt.subplots(1, 3, figsize=(20, 5)) + axes[0].pcolormesh(image_fake) + axes[1].pcolormesh(image_real) + axes[2].pcolormesh(image_fake - image_real) + plt.savefig(image_fname, dpi=200, bbox_inches='tight') diff --git a/toytools.egg-info/PKG-INFO b/toytools.egg-info/PKG-INFO new file mode 100644 index 0000000..881f7e9 --- /dev/null +++ b/toytools.egg-info/PKG-INFO @@ -0,0 +1,11 @@ +Metadata-Version: 1.1 +Name: toytools +Version: 0.0.1 +Summary: A collection of functions to handle toyzero dataset +Home-page: UNKNOWN +Author: LS4GAN Group +Author-email: dtorbunov@bnl.gov +License: UNKNOWN +Description: UNKNOWN +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 :: Only diff --git a/toytools.egg-info/SOURCES.txt b/toytools.egg-info/SOURCES.txt new file mode 100644 index 0000000..d8681ae --- /dev/null +++ b/toytools.egg-info/SOURCES.txt @@ -0,0 +1,21 @@ +README.md +setup.py +scripts/preprocess +scripts/view_dataset +toytools/__init__.py +toytools/collect.py +toytools/plot.py +toytools/transform.py +toytools.egg-info/PKG-INFO +toytools.egg-info/SOURCES.txt +toytools.egg-info/dependency_links.txt +toytools.egg-info/requires.txt +toytools.egg-info/top_level.txt +toytools/datasets/__init__.py +toytools/datasets/funcs.py +toytools/datasets/generic_dataset.py +toytools/datasets/precropped_toyzero.py +toytools/datasets/presimple_toyzero.py +toytools/datasets/preunaligned_toyzero.py +toytools/datasets/simple_toyzero.py +toytools/datasets/torch_funcs.py \ No newline at end of file diff --git a/toytools.egg-info/dependency_links.txt b/toytools.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/toytools.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/toytools.egg-info/requires.txt b/toytools.egg-info/requires.txt new file mode 100644 index 0000000..4021870 --- /dev/null +++ b/toytools.egg-info/requires.txt @@ -0,0 +1,3 @@ +numpy +pandas +matplotlib diff --git a/toytools.egg-info/top_level.txt b/toytools.egg-info/top_level.txt new file mode 100644 index 0000000..8a7b981 --- /dev/null +++ b/toytools.egg-info/top_level.txt @@ -0,0 +1 @@ +toytools diff --git a/toytools/.ipynb_checkpoints/collect-checkpoint.py b/toytools/.ipynb_checkpoints/collect-checkpoint.py new file mode 100644 index 0000000..1a0dfcd --- /dev/null +++ b/toytools/.ipynb_checkpoints/collect-checkpoint.py @@ -0,0 +1,166 @@ +"""Functions to load toyzero image dataset""" + +import os +import re + +from typing import List, Tuple, Optional, Set, Union +import numpy as np + +DIR_FAKE = 'fake' +DIR_REAL = 'real' + +def find_images_in_dir(path : str) -> List[str]: + """Return sorted list of '*.npz' files in `path`""" + result = [] + + for fname in os.listdir(path): + fullpath = os.path.join(path, fname) + + if not os.path.isfile(fullpath): + continue + + ext = os.path.splitext(fname)[1] + if ext != '.npz': + continue + + result.append(fname) + + result.sort() + return result + +def validate_toyzero_images(imgs_fake : List[str], imgs_real : List[str]): + """Verify that image names are the same in fake and real image lists""" + + if len(imgs_fake) != len(imgs_real): + raise RuntimeError( + "Number of real/fake images does not match: %d != %d" % ( + len(imgs_fake), len(imgs_real) + ) + ) + + for (img_a,img_b) in zip(imgs_fake, imgs_real): + if img_a != img_b: + raise RuntimeError( + "Fake/Real image name mismatch: %s != %s" % (img_a, img_b) + ) + +def collect_toyzero_images(root : str) -> List[str]: + """Return a list of `toyzero` image names found in `root`""" + + path_fake = os.path.join(root, DIR_FAKE) + path_real = os.path.join(root, DIR_REAL) + + imgs_fake = find_images_in_dir(path_fake) + imgs_real = find_images_in_dir(path_real) + + validate_toyzero_images(imgs_fake, imgs_real) + + return imgs_fake + +def parse_images(images : List[str]) -> List[Tuple[str, str, int, str]]: + """Parse image names to infer Event, APA and Wire Plane""" + result = [] + + pattern = re.compile(r'^(.+)-(\d+)-([UVW])\.npz$') + + for image in images: + match = pattern.match(image) + + if not match: + raise RuntimeError("Cannot parse image name: %s" % image) + + base = match.group(1) + apa = int(match.group(2)) + plane = match.group(3) + + result.append((image, base, apa, plane)) + + return result + +def filter_parsed_images( + parsed_images : List[Tuple[str, str, int, str]], + apas : Optional[Set[int]] = None, + planes : Optional[Set[str]] = None, +) -> List[Tuple[str, str, int, str]]: + """Filter parsed images list based on APAs and Wire Planes""" + + if apas is not None: + parsed_images = [ x for x in parsed_images if x[2] in apas ] + + if planes is not None: + parsed_images = [ x for x in parsed_images if x[3] in planes ] + + return parsed_images + +def filter_images( + images : List[str], + apas : Optional[Set[int]] = None, + planes : Optional[Set[str]] = None, +) -> List[str]: + """Filter images list based on APAs and Wire Planes""" + + if (planes is None) and (apas is None): + return images + + parsed_images = parse_images(images) + parsed_images = filter_parsed_images(parsed_images, apas, planes) + + return [ x[0] for x in parsed_images ] + +def load_image(root : str, is_fake : bool, name : str) -> np.ndarray: + """Load image `name`""" + + if is_fake: + subdir = DIR_FAKE + else: + subdir = DIR_REAL + + path = os.path.join(root, subdir, name) + + with np.load(path) as f: + return f[f.files[0]] + +def train_test_split( + n : int, + test_size : Union[int, float], + shuffle : bool, + prg : np.random.Generator, +) -> Tuple[np.ndarray, np.ndarray]: + """Split dataset of size `n` into training/test parts. + + Parameters + ---------- + n : int + Size of the dataset to split. + test_size : int or float + Fraction of the dataset that will be used as a test sample. + If `test_size` <= 1, then it is treated as a fraction, i.e. + (size of test sample) = `test_size` * (size of toyzero dataset) + Otherwise, (size of val sample) = `test_size`. + shuffle : bool + Whether to shuffle dataset + prg : np.random.Generator + RNG that will be used for shuffling the dataset. + This parameter has no effect if `shuffle` is False. + + Returns + ------- + (train_indices, test_indices) : (np.ndarray, np.ndarray) + Indices of training and validation samples. + + """ + indices = np.arange(n) + + if shuffle: + prg.shuffle(indices) + + if test_size <= 1: + test_size = int(len(indices) * test_size) + + train_size = max(0, int(len(indices) - test_size)) + + train_indices = indices[:train_size] + test_indices = indices[train_size:] + + return (train_indices, test_indices) + diff --git a/toytools/__pycache__/__init__.cpython-39.pyc b/toytools/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b45c3fe0e5c317dfcde86efcbb8810eb6da60967 GIT binary patch literal 147 zcmYe~<>g`kg6M@diHtz{F^Gc7I5! zf%GYtFKczXd;VT0T1$dBl6Df?VeGhgZ*@Hp+d<1&_iWcm9O)(R$VdxyGcz6rGRe#@ zJMGZR%8%MK*9kLy*+cJF5n2tycbF$wp=gFX@PA4OZ>{Ow_2{Mv*Ic ztZUmW(RNv??Xf%Bvpd^7(Kh+6o@y!IMdG=3h|SosZ=b&~6SceNZ9fP-X$O(LLRzo9 zBXzq5jn&H;k7ZV(8Ezm%86%mo83$2j1(N1vW!Vmc#Cw%wmeX!~k((LxnYx(^0|MCz zfs!9NElB&FFjUy+nDU(_Ca~i7*Jn54mN&b#(Q%^ntFwzskNzNMA1+_JKff>w!z6JW z%0Xo<4nuD(nQ3ojV}201O=UCL45B7*c#K1#P-9M3JDJu&9#d25=q*aH8=1 zL-@^NDM@ax>E!QaR^}Bheug$svvL>s5H||t5{klY6O|qhxonSZ13$R$61gt01osfr z$jgZi2i9{pPmNu}r4bWoQr*}iY%Ntc(O*)Gq^D+Tr6nJuV5Tv}NLKgSq9Y_cTCr{J z>R{p^^5MaEUwTn;(VkzNSDyYjKnyH~PULl`{xUuD%lf71r!GC8eD(F?XZ5GVQ|G#; zZaWcqc>B=0aCRx^e0@i;{Z7ZDUdp)eveH`IY7>K*%Gt$f3}xoJh&$~}528ejkZf7} z94|3OgC&?N@kErt&2dVIY3ejqaZ+Tu({?hmOlBJUDsx11 zWVBnv#{WUly3;s=plg}_() zsO#bi)zu#Q(sjPCLv3pOWBM;*rc@bYsm#XUBx79s8hxNz<&1+x6&vn%1p>X#hF_qO zB6sX)VjRHdZo5%1#C$D1Z?+g0amWk$E*y1DT4kFTFN4n8swi*2B;lavx&h1`4MIS`6?H5 zrT}>&?5q?NIfFr*^$NWPk$tpBCXR`?6#ld4R z3+dJfk}WZhUZl({%7zk8CQTW1LAG+E#9hiKXX zL)%JWq>i|?E$iu_Kd)}6}M*YdpM|_Bo_SzmKN^k>GuH`eV8EsA{=19mqHbs1Hzmkk3?3QuwYqu8aH|x77mRo*%0Oso)Mv+(i80G6$rP_* zT%gA(>egG%E6~gDsDnr@AHPdOr9#CQkK zCXSp_a?OD|hvvxN!)>k{P;W@wkPv)toX}8-6JyQWH*aQkzxUqvp5w*EmWT1(>3aNm z=y|`;;^KhK`wUap5Yp>;l1ZMkg72U&t@^#Yhllq!LiqymzQKi zHji0vK`zUdoQJlUwsx1&ODV?`>7{GlfM4_E!jD2O%B5q!w=Bh$*Iqg3+>Med&(p+Y zRT))-XfQ6Ft|L=LIxFft8%&})uZ+pc;d7w$+S~@;mN;+y`cPca5Y&WeR?`PTu zd!sBF*$`G!6;_z4wtk)tj15#a92q%lmb#;zDD|YjJRj@ThL-jqvm#1A2-Y!$03 z9;^&0=NHZi!=^$8I491|37ivCG35h}=!OS3zf28MTogzzGF_=i!AC?pQI*zFG0sg^ z=V_D`@i5i@N+m%KFt;%Es}Mr~e7tnXjQ5O9*(v*hKS2sz2*C+n)>ae_Y)9KoCacU}H~3h8aG~h2%sQkb&%<7y&zo zbBgXkwQe@dr=6LA=p%Vc{tCquHG!h)i0P1!VYS_hH zyYS6kwUt+O>Q+dV9#wnohN57rRT2>i`m_!8q{yqGdWS4GNW7H3%V%!QDA}i~r;D1{ z;E5)0vSpgke0GH|b9EDTILZdruVXW^BHcF$)%p3Ly8kDg8(2`48+OFijVZt9Jzhql z_|!w-#SvQnlO@TvsDC1nl2Z>@^`wyg38%jW*E>JmiqSdi34x0S*>J2-$z}93R#`RH z(Eve-js76gb(&;@ES1sDB-#e9?M~OSjtvYoP3nz1JJo(aE0a;G!SYt7vr?P5Oj5L0 z)SjK!_)>rN2_yjv=xtwh;D%ZwaSOt-^rbMWk72%zdDb3VXpT$F=g!_{ii)8vy4}t8 zif#1!vP$}W8=6Ute;Y*|NKpiBP>zdwGArH|d?JVQY)8FIn=3C2=*zTa!?UNTb2B3p wRhLVV4L4n&tk6a|7#AQr4!BO$TrVm)&eILzs3wz-BsT$nfj8$`i><4F0|JWgt^fc4 literal 0 HcmV?d00001 diff --git a/toytools/__pycache__/transform.cpython-39.pyc b/toytools/__pycache__/transform.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9ef5acae5822d2e790cdca4d989ca261cd37adc5 GIT binary patch literal 2113 zcma)7-EJF26rP#=a~wNK(<*IR2`x~lRsvF?f`n9|B3f0TDxkDirf9YCjO|VAU3X@d z#?e|rq`l+~deL6?jtAih=9a5I0tgAdnRU`MDiX7rv*&nb&+m7Rvu?Lduzq-PGyAoID1Z|wDa?(^UY=>_~ek9g||?S+DgNJIm+NltP8 zUTbfuk=?1kN%+z;pSSt)6TjEy9o~hOWxm2!@$LwJuZwNJfv!_}EN{=8jblyL7GHZ7 z@pF6~+E%!CpClI!FMl)1h9cH7E7YKrlT7Dj5$m!Xt5=AKB-8{Cs_9s4_ou zJpL&gS0XM4ac*6D0LxwJZl`^mYn6D06;l&dMgFJ~Cd}k;k{u+z3ET%JbPALSo9n$O z=Lacnyd=`r@UJhnM&(3o%|=yL3_soae&@Sy)z-cHH|~D5y`{^Ui}@6DoRLqaGjm}m zbo$!W)L9B`?3NI=PTN#=z`gnzq|ZQ(m*oR&m#Ir&G_iTA=~LEFTbLt??6AW-i|hljz3~Z0wt)rj}kJzy{%n zA2KLpzxFuYC+d3b+qb->3CEq94%h~=^b4)ob9`HSFW5X-@ILsk4(hPc+UOei$f$pK zbtfx$IY9=M7~Aw^v8r|(-%p?>EwW!(a`<`E4lc&{+6U@doU2$(vhi5RSdTKa#z@4A z?_b|anCa{QqxnSKk+PJgl^0qJg;Yr-*PvLQvt%8l7agR4)=>1KS;H81aNV%k^|Qq} zud)dmFKyD^*tELT1_+7P=o(w243Ga0p0GnZOm5=j4ft7no)q8)1f+*lM0RR4WKAFX zb2j(pK9Wglv}^5OBu`oG;R!5f!Fiv&q}$H9By141+?VAvZ9I|}p;TI6o8WQI^~i)H zkq<}uY&dQAkqhP;ZYxYvLTHQntV88H;NJkw8w17$jB$biQ`3FVjTEnD`|wO&#u0#S zubZxG{In4QN8UWtKB5ng7;Xl2gi3iwp4y<~(d|bYki!9U1NP$wauyGA zjMFn9x9HVzzbtf?7r?a3!nV8s7n0Bf4wA;hxO5mYfdjXzOB3zVe?}|chYooK1Qh@< zrh+$_Gc-M$)hk-gQtRFVJ?Z0Ij}|5Q3>uULIBEMp#X4P~ZD1p0GQn>cYWBZ2Sv66b z^0J?%Ce*WOUJRujQzj^?$#mv`;3lz~TE;^kVlcTOV3zKRLdZ;GlG;9b9h`{@o=KU_ zoTXDf+y4#eCS~j9q~z6Dd}+so`Vhq8w#8auJ7{D2hjiud&c#-%75U(8SGKV){rNA^ CNcu7W literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/__init__.cpython-39.pyc b/toytools/datasets/__pycache__/__init__.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3678ab829e14ef75a233fa930f5f06bd1a02e615 GIT binary patch literal 729 zcmZ`$O^?$s5Vg}JO+U9QRvvNF<2N&!nfEgJXw*k=KJI4uuK=MR&iVUvzFk_MtpEHVZ$AUPQe0AN88yvEtHhJutHU-TBh~JDqXBoDZcRZRc>>` ztx2s)*LlSx2ejF8tx^W5J5YkI?l9OXZXd0n(b1ApigX literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/funcs.cpython-39.pyc b/toytools/datasets/__pycache__/funcs.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..8c462535110bc226e22ef130e8f996ddaea376b5 GIT binary patch literal 927 zcmZ{iL2uJA6vu5RZPT(%tgB$+o#upoqlgr;c-5C~A2R<=Vp1QUC0v?PvfN2pOx zz?tuW#D%YvD<{4JC!W)^lvOzL%k%%A*m=J^w!Pg$Fy4JTWFrTmpLVf62rQ0aY8yBT z5XYP(I3)qWHg^&ybpyA~-K3SagLa*_l1}Oc9^`G_`GSHj_r_@0EgzmU70W`?sYr66 z=tYr5CYG6|M$!oGMir5P#jF%cQqBz1!svrv7|tK!YA}w|JP~gf{?mmG`ppw1{$t$& z>nvkQJjn##cyS-xCrak|RWGuoF9)DOF3eXjbq-+;#a|M&Z_pfH;26!xcl;HPtxiC{ zXYhvZfd0ae4c!I(y>b54jmukebOU|HpD=F&x=wlLRG30#SJ}`HK!{VB(b#C3u~d{h z4Zgd&hE?t^*SQjnG+2|0C7iOqrbY`WB|Xb7Gx>4zTzk|bdRvc%EoE6L3rSf@ss*9y zowKA6&yTh z(vQLktK9O<*$?ZCNZlaKg|&k#Z3a#+mdfbC8k175R>&v)fNE)|YktiD8IReDk0M@m zU?$u&@~Xh9{Hh8lS~1f_=%3H=R2M2CJtP{txMHK>MW##~=uB{33<-z0Gj$6#u0Qd`<0YY>fyTRp+nV~8xUcDY^cV5hL zRjAS_`N7wg sGPLLoLxUS4O;6VnHspa9`k`NE@g>yf#us&gdSBs5)B8Pr`+&;+0^eG(Bme*a literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..e983143a965523a4cd21aa4aa7225a3c9806aa19 GIT binary patch literal 3311 zcma)8-)|eo5#GIDJW>>uDsJth0eT7Y5EcpLrbSU0h7rTIj5LO9SV9Y2kep9~Zp{qwADNA|I!up?w7RFX?Po2=2x}iJuLT}m%TT?&uIZN%-$=lOT z*x~FUOWo9a$x=^rPVI0b<@>DP`Uw`iCh|)|}*Pq9Emd08{x;#1-vaEI0PaH=3yy`s^g^*ctKi07lI-#}Jo7)W3 zEii(GJZ0Dq5AC$~9UJo0OIt77p_BS)`y~tAw3BY&-%GpcCjMLBF^7ddcCl66cp}9; zSDl`0K)y!}f z>v3=^P+FQ{JprN@V<831i*>sVHUWSl6oO1)?F6W$12Y>Kj*3XjI4fZDuq^YH(SMFb z0c$eQWnk0-qm~t;mAAAuwfkZi&vSh%*o|`~)(j&?|J!U6$gC{hZ1_OVYrC0xy|>Sn zAD#unJRSu}S!h}2Dkz@|8RvO$D8}*gtdv1HG$1vI)G*K{J|6~aMgSD8`7!AhX2LR5 zQjFj#48u+x?#AsPmbH&$O^IA2fH^?4Wn84?bPzlyIA3I{wi>)@5YsQGAs+E66jID& zna-2!Fc-lzE|A#4Tu~&RDIwBl!EBi^gCIBEx3SQ&dU&XO_B&`T%~=ZsRP z+08OnG#M%nC)h{uA_IiOGMPhCTw?Z`z>`CWc~QP-uah#+4MV$z zZZ6na@0_9BvVC^J_fU^qHJ=Gt`4LKGrlY88N6W)exe=9*%8Sg2#*o5Qb?8jh$W~S{ ztGqg6tBtfi*pVzol@rb62s30^>Z;pR+h`d7OI(hmi1TVIO3IwFztQQpDhJ15SUI$I zxUp7q!gf7C38?&)lhB4!?Uhd#99xKzJXR`-F4)gs?2OB)*f|={<6`vb&XcE)|E_i( z?f>QB-Mt+ou|^eBmrRq+YUi@tF0YKi>`3CyV|{e>I}qk~EsuM=hYo6Z&pLkpO%;By z#5keYFB?Uiu9ti0>IaYon=tvV<`e4-@trs+Kes5t#rB%as%6?+$z0t!w@ zasOE z4D%;#In*8Gk9BHsR@{JO_wA%7N zAW(!W;~n0ydMH6YdWUaY#~=PbnDyP=gWdWf>bL0%FfK-@@X-`6h+N=0=tj{u^Ehw5 zAd}NFi6T=7%~6zp#QO5DAk}75b`z23P%%f=kUx+Lxi8xwbbQn`5T82%?xOnlbMLzU zwr?Aaeph~o@0E+o9!1xr&Rfe`J!nqwT9Glpo5DktrrI)(gDi=fVlI71jo~$)jAI5; U|8~9EJCxgsbbany@7Z1J7iPS59RL6T literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..177353aca69f11b142f913dfbbb3c0fcd28a6779 GIT binary patch literal 3985 zcma)9OK%*<5$>Lu-JM-NL^E>yNPsk8__2nxlw|_}1X^|=Swf6}5=7>*u`?Xb^zP1j zc4pl@D}}`JL;{izIph)`I*~{Hfc%L3g}LSs_?%O6alYz#a7l_lhUnhup6;rzzN)Tj z`YS6PhUX8boBqoc#{Nlz#mB+mOEkTME@UB(S-<$@eanpPzJp&YZcLi}=A_kcar$n@ z?xfvsGfz+BD0{{8PN33h@%=}RJzbu!K9q@6QE=bSd@ZwpL@hqea-i?vMY28*8DR6i z9bS9R20Uzq?r|Fiw8PFZ>o>xsa2dbNa3x&DZ|ga8SlQ8RgiEKQ8L z6i!okK*?d0CI#nY39)g`Na2HEEn^vgN&(j@KMB)GPdtJw^(@kbR8b17ft7;KZ;uk; zLnQ(roCVPy7@znFf>_KnalWgi40pw}3WW;<8)p1ir)Ts$8%4p0He{6_Ws*e{5*NZD zgNJHt>E0OTh-VRq9i+hwG$&`e?#qEci?ci80dz^TNsnfO0nlFX+C7Ae@e;z2*r52X zmQqYsvA8D(DX~arIU_@(OhieR)__b*&GLag1iG+`b{LW$sUjnDR=l;s$M7lmJtR}i3acq6x>4$`s6P};8vQ*8@0Me zQ$p&;qF!D=hjM>9jrR=-N^1*>zUmEom{-B*X{52cSMSxmCnmh^Av|AWH>oz7!I7uQ zsyjF@DWVd%7D;%Z{J=0+p6<)xfIY_lIIe~)A9-ql`r1D2Q{FQNs#d!Bx%i|!OXNun z+a+ZhVT24y-8V$klV?VJWbgtH#E3#_S9~sZE2+EUV^OcUfVW%2SvYm|;;RIn>8?Zi zR`0eEwe>(z71{aM%fHd{k1qmup>)tYGCQ-;^dHe>?1-K4IX~r`&Fwk!ths~Uo;T1t z^CtR6)*QD^+_`hgF>23S=sVfccsW}cuby0+^SO13>JwUjw(w7ZpSiF@8V6p zN!>N-u2YAr8_S&t)xPVfU(#U3h(pYT5iT0}67gOQTIGv+pZ1#W1A6-fy06gmDmrFa z{Q6I>1LiEpI{4s9&P+4vyRr{A-|FT%qf0}mlxI;2jO>uV;A7{=KD5fQ zRgP`^Hqe@AEi`xF{J`ec+#Xtoj=4SBp>vGWexDt2%y8T%8;diz>^jG+`VL*~kj=RI z>vPN8JQw!P9Wdf`j;y~}-(zDz8d>?u%B|k5 zoFj+iOXjj%+(6D;oOd?qz8R#G>0J*y{*A^Cetk8iLzzXHoOoVuldh7nzKfSi!X>l> z;u2cII%aRP1+>U<_TG0x)x&yVZ5E~A45qq`54&jUQ*@Wq5GmVenzD}}W$h3?J;Avg zTc>&C#!el@;Gl6H*}-EYpYL4&1ck6elU3QCC{rJ{CmurC}VutTH|Zhb<4HZ`8r>< zI(*$ac<=uRl5Vpqs#~fJdEGYEE&8tmb*tlf6Wn=YdSCXu?`M8oe!(#h)4=mg;x<`G z-6V`(pvzavDiO#y#u7uO>E5FyhE}T8M1_Ixa_2)7)#~p*t8cn1uB~WIcSXI0pS*z! zAj*V^h~^ZREnh1-U-+-HtTa8;Ub^lZzZAK5z5Z5LGu5C?*UUXK3aS*ST9`HZ7Qogq Xkh_}&3wO!ono5<+@dwJ6LHz#!l_5G# literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..399fbc46faa7108d028c225b6fd2c4eeec147f06 GIT binary patch literal 4304 zcma)9OK;rP73L*59M0%r)lM5bXaP2TWzuBix0jWsiz4f;+C`G@Ts}0TC`LKpnfpA?@1AoRtgUqo zJb${_4*qt-F#biI#m7eHS9sMv2sb#3jY0X(2Bz*?0~`Nl+?qNAXWAaL8O>X7+QZM5+S4TVUPt%WFfIye4MJIw$Kp9fN=wAvw_8Ld~6IE zZ*%vo0|Pp|du9w;e1)&#-{EU~9sli*4ci#FP;#Sq?THkBNP;*TC4%p#N5?{@mDIq4 zm}rQS@6nM^~Wl;B<*sjT1adU*ag7KwKdDPP-v z;6=)VU9@9g&c$0`i}`GfVP2Z-J&)qplOmhT#G5CxAd-S>R;Ad!_ZWNR^GIQLlr1eU z7gzdK3Oos9Fcq1QN;7%8d31LcWMgm3Q<>CV()bAtJ%skaE8$y;8%zU+ zXNb8JqbNe%-^PrFk4Ac$4kIHFn;i+L6_+5J?8?HP+Pl)L=~y zC$OUg-!Kj5usgZ**+Vf5=5e;=?Lby(yc*AkL#TNrJ`Vs49S;DGU`+?3UP{iWg&vAw zN`9csk&%T!ew1WsqrN^~Gy$i2&v2}<6Y|4X-^Kn$tmsHb89_ zf#0#H0_G+2=^->7QKkyAR9?sau4V?M5(UMl5bywK3n);)+HRVNw;FMeXQXKmd(HAv zbqEh;v-n7BK}mE$&~N&qKE`D@eikY0?l*f?|9y?C2f*buc9Uo=89wqAS+(U2k=77( zx#WZ$8HCz{)j6&~T}MRcL!GN|FtxJ+q{Ic+H}Q%Rjlyx zIQudg6}CTLsKmal*q zomJCe^jl{RwanY>_ll#trLJ|Vx2n(Ji)1KJJe3x+{OwUE7+u+x9$*#T-|nbyQA-D=JDHm zluDcYO1YHskFL?1aO`sa-0gKG>0Vf|NF@0{-XgL_V|JLrL*LcS2zLYdO`7@^NUtrw zL+$Sq(F@kJ3}BE)xGNPa6vC=ASgjfwO_x1Fl>8BK(ButjeH-LGyy`ZHVVZ2?3)gm- z%WQL%t(&&~KkmIOB)XK;7!2ts*AKN#ygD#+o6g9^2n@~_!@XAaFs6-s!Akp$S#2Uw?wlfjN(f0-`j@=!0O<#;HW&|RIpuKoe?i7|l^IlE(=m}r|-+sZ8TY>u=r zX`KQqRo~1l-3J!(*2n}NFoOR!x43;~WDXKf`_z1BoU#*#x2_p;Xe@Wi6_C;9#+=Dd zt82D>=GJq$L%06j{hcK*lHz2<9vSc^$OvBi^XmXKJ2Bx=QkY&qePZFYhxV!YSM#a8 zR|dM*l4O6uJ}THz(RvQ^(^@g z7R$Rtenw;&H}dBgo8tWsUSmljA%k*-mPnY_5;snOhjV0&iFv^m7iqeb0R@th#!8S~nxu#>fpF8#|$SqxV(>(s+|A(ZW zQ{NJHC7lJ7Iuu~~U_t@z`u;TK^O)MJzW;O{#MKP)2Tw!a*HuU7I=PA6@*zmER#mxB z#4(m!hVciqgzS-R5XxIRTF8k#gEhbZuD|NuaxJ~4wESavX4syeSzpGm6hOTGuKq)^1$BE7vm_bbE~9pb}er~*#;JcyIp$WJ^?^corS;Q J@eS?&F9EA(nKA$X literal 0 HcmV?d00001 diff --git a/toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc new file mode 100644 index 0000000000000000000000000000000000000000..bf0b1187278bf465d399533f11b50220dabc0de6 GIT binary patch literal 4904 zcmai1&5zs073WZtL}|71dhNJzzB)kD)Jk2gpF1PhSw+xCIOdAjwG%r zQl6n~ub@4xfw+ehNN?$B_vk+**B*M{wTHsz9t!l(^!J7z+I6<51U?RjGw;3my-#|} z%WVU{&knD7-<&awzth9fpM{6t;8LH11V+Fj!(q6akvXn8wXx+`jxMG!ERyUNjZ%z*A|~_GL0Dw!WFlU3U;-ffU0qiQOk*I&xJwjJ@b3gGxP?b#IAS z$k4yxrJfS$%K+0a2{<3uv0t-zQlDZVG**roFsBxr`O+A$pc&ZDT89CUGS2B(4*-0k3Wu!?)@OT#jpHqbeno!tz_lSn+sF)1|hRvU(qzu?6bPZLha z;I7f*IeI*ac_7AK9B?`yj}tzTVj>e?C>6%Te!I=_yXQ%7EP%4ok3Xw^`f=i6=T)v! zsUPzVn0hi2Qt)(ys}LIGNbyiX8Q)7q(AQfu@M`2u1o&JCG4GJjU*No+uH-K zCc+N~p$IM#=a|#h@-RX2B&LJroT`<4dzWu5L~P|UNzEH#;7z0S>OzWmI`Lvf1|N8S z3i($_lS+Y&>3Ca6o(#0yi3oV^9!iT9KPfG7w!1!Qtlq7%t*8Brkl$15+@lbW65( z$Z$_W06R$hDPWElvMMK8hpZgwN#w;s!DyQz{S1Ep#rHh*6JVruD3+2}YS6SP$f*nP z$(Y5OqtKinzKR4wF;;^_ zjv?SWbo0n4MHpVJ0J}R$5h7m1tK}X|kZ^A@iFUOXIz2B0-Ruwhn3w+OaYzByulB0` zeeFZn;e(6y73|fyk*bk_ajL5@eFE2mUPps-T_R*jjXWkGEKhZ1)=5X$qm8^RayFNO=*6@Zuemo=*m*+ThQmsGy~@t1Gt_wBEJnk#=a zt`(Qs!5}sEjRQ7khm09>Yi_va&;YGzY6T4A`rHI<%$uOic>~m**FaloduQpOGjANy zs^xhdv^%#!PtA>kl|us@jf2y44-C{Yb3-fETg~cfIuSCnU6(xBb+eXRro*gR)_EH* zgAipZv)o9;*^;}+@L8L(YVOBQ-KBT|H{^QC=$RQ$vPPb@vZWxe0IrOOU^J0KNXWwx zQcEjkr`)pnD4LFL-cPuNYxP>0h43C^7S%#0ZzOaTa#migD2`no#%UCiuooJ(DxEXC zl>CB04cCu6rCj%g@$Xw}qhu`Bc1Ke$9$sF%cmM9EYVFSEm0O=|tigR!)DX286xyjZ zrJIDZbLvlazqE}{&Q$(P45&4{j!V4* zlETD0Y@hA1)SNSNmz~-^IR{*aOg_7EN4Ei-vf$WD{Sl@8qBTKrpvqK6Z-6vT_S@}m z^y-pCWp$U}mgKRL&LLMpdZy&KWsRJT{0Y6#5EPq^M!G+3eku4|v?-$IeMx z(P8E1q^=L=GWHNA0efi4cTzejoz;B9*f(+ZT8hxwsqfPY1j|k%sC{L9Ztj_Tz+$H< zf2%{tjvG2~Y`xmXYH&D9;noT)pjDssH>rtT;DdAvpFxns!!>wJk;RHUqxw4BAN0(u zsYcVmAQD*(4!+TA%eSF(RvUXy?5T)%(!o_qi1gZsB{Zb}l{YvxYf)=rJy^r6U#F{)4`FVeQw_QPTRJ>#qv{hVHz zr%Y!_C&;`Hd&N%KIhQ_*C@JbRicZi)H{v}4Tctk*VYCi2|6|u@KRp?<6ZiWXXE323 zotNxI;i~=y@|^7$^1YPpmCm8h`bCFQ$aCX$<7@K~0)Anm7iMPmFJmXXe6;klPAd4KbBTC}4 zx`7YqT~uvan>BTN_Tgno6=rb^sT1(8F!v{#zXo!mP>sqtywyUxd%)xX(RN%Ule8k2 zutp;t?v;eEX^JHDx_K%i#pHP+Z-8VCf0Uqm$TM7@rm1#F%Iva7ggjM8qx=PpNLbI7 z9Jj2FWG9~Lm~%RLQ%CtvJavkYjPbE7KcsC}h`djP)I({~=HoO$s-kOI7-fbDMF|#p zlC{}6v%}7!K6UZ8akrTLoo$_CU5r=F8UKGa?lp^kFYl{1WEY%!OVn18erT)Ot~*YG zX+-0W>pq%#QSk=3GDv*a)op?iU7WsIoT z(0zu@toM<-7yo~C-?8iV8M|h;B(3bNWOaP2pg$pVIV)vL(=Up#F8g`C*Onp|byllY zRRfhbl&-#~N{P}4q~`Q6Z&zBLq@f%inn zg4J)t=)1V=T2Z?o2oh4r!{RiwugCzh;r68$I^5wd;DsD`8*yI|hlFjw?wPIk2(ga* zK}O@TkdwVy@FQ82Iroo5E@TqdUcMeh+K;7(w7`tv|6Y9+1I_b=(2(L)*lGt3O)kh8 z9gu`ftzYy9rDRH{gjt-Nl52V#*v3^Q#5W#eNuFrNjLSGM2RDuq>A*Osnbzm8ksPXP zYC4R?X`~cm*W~YEe^g|me?BUseE6jQ?&R$&)qivP^yv9u1!_PgJ{?#RFO8ezTzr%3 zAkU5sw5~D+g-^7|80&&TRh-hv-m~LIl zSfnZ3-jaeGNCY8WIFLq#HS(xqtvQ}uS?^Q<3y1qPx;}sdbDz?I)%4wWThpnHTm*Eo zdm?lx^ZBsW1J_Ka{|QEy`$g_2Fu<4tR1aUT;s%a!&}N)*q(`QMO~tS`&H8XQGtJ;G zBve&-d&zLautO_uA?&L8tW7E(fp#3U<$d^?jmn`{>LgV?j3u$g#;uz77FG4rLcog4 VDyp|XtI7fPM@1u$NY|{5?mx4PNO}MO literal 0 HcmV?d00001 From 8a2dd54e92c7e817070f5225781a1c24b9e1cec8 Mon Sep 17 00:00:00 2001 From: Yi Huang Date: Wed, 22 Sep 2021 00:31:38 -0400 Subject: [PATCH 2/8] rm checkpoint folder --- .../.ipynb_checkpoints/preprocess-checkpoint | 231 ------------------ .../view_dataset-checkpoint | 160 ------------ 2 files changed, 391 deletions(-) delete mode 100644 scripts/.ipynb_checkpoints/preprocess-checkpoint delete mode 100644 scripts/.ipynb_checkpoints/view_dataset-checkpoint diff --git a/scripts/.ipynb_checkpoints/preprocess-checkpoint b/scripts/.ipynb_checkpoints/preprocess-checkpoint deleted file mode 100644 index 919ba11..0000000 --- a/scripts/.ipynb_checkpoints/preprocess-checkpoint +++ /dev/null @@ -1,231 +0,0 @@ -#!/usr/bin/env python -"""Preprocess toyzero dataset""" -# pylint: disable=missing-function-docstring - -import argparse -import logging -import multiprocessing -import os - -from collections import namedtuple - -import tqdm -import numpy as np -import pandas as pd - -from toytools.collect import ( - collect_toyzero_images, filter_parsed_images, parse_images, load_image -) -from toytools.transform import ( - get_background_value_fast, is_image_empty, try_find_region_with_signal -) - -COLUMNS = [ - 'image', 'event', 'apa', 'plane', 'x', 'y', 'width', 'height', 'bkg' -] - -Config = namedtuple( - 'Config', - [ - 'path', - 'label', - 'apas', - 'planes', - 'min_signal', - 'n_crops', - 'crop_retries', - 'crop_shape', - ] -) - -class CroppingWorker: - # pylint: disable=missing-class-docstring - - def __init__(self, config): - self._path = config.path - self._min_signal = config.min_signal - self._n_crops = config.n_crops - self._crop_shape = config.crop_shape - self._crop_retries = config.crop_retries - - def __call__(self, index_and_parsed_image): - """Return a list of preprocessed regions of a parsed image""" - index, parsed_image = index_and_parsed_image - - image = load_image(self._path, True, parsed_image[0]) - bkg_value = get_background_value_fast(image) - - if is_image_empty(image, bkg_value, self._min_signal): - logging.warning( - "Empty image: %s. Background: %d.", parsed_image[0], bkg_value - ) - return [] - - if self._crop_shape is None: - crop_region = (0, 0, image.shape[0], image.shape[1]) - return [ (*parsed_image, *crop_region, bkg_value) ] - - result = [] - prg = np.random.default_rng(index) - - for _ in range(self._n_crops): - cropped_image, crop_region = try_find_region_with_signal( - image, prg, bkg_value, self._min_signal, - self._crop_shape, self._crop_retries - ) - - if not is_image_empty(cropped_image, bkg_value, self._min_signal): - result.append((*parsed_image, *crop_region, bkg_value)) - - return result - -def parse_cmdargs(): - parser = argparse.ArgumentParser("Precompute cropped regions") - - parser.add_argument( - 'path', - help = 'Directory where toyzero dataset is located', - metavar = 'PATH', - type = str, - ) - - parser.add_argument( - 'label', - help = 'Label of the precomputed croppings (used as a file name).', - metavar = 'LABEL', - type = str, - ) - - parser.add_argument( - '--apa', - default = None, - dest = 'apas', - help = 'APAs to select', - type = int, - nargs = '*', - ) - - parser.add_argument( - '--plane', - default = None, - dest = 'planes', - help = 'Wire Planes to select', - choices = [ 'U', 'V', 'W', ], - nargs = '*', - ) - - parser.add_argument( - '-n', - default = 10, - dest = 'n_crops', - help = 'Number of cropped regions to extract from each image', - type = int, - ) - - parser.add_argument( - '--min-signal', - default = 10, - dest = 'min_signal', - help = 'Minimum number of signal pixels to consider image nonempty', - type = int, - ) - - parser.add_argument( - '-r', '--retries', - default = 100, - dest = 'crop_retries', - help = 'Number of attempts to extract a single cropped region', - type = int, - ) - - parser.add_argument( - '-s', '--shape', - default = None, - dest = 'crop_shape', - help = "Shape of the crop 'WIDTHxHEIGHT'", - type = lambda s : tuple(int(x) for x in s.split('x')) - ) - - return parser.parse_args() - -def create_config_from_cmdargs(cmdargs): - - def optional_map(fn, x): - if x is None: - return None - - return fn(x) - - return Config( - path = cmdargs.path, - label = cmdargs.label, - apas = optional_map(set, cmdargs.apas), - planes = optional_map(set, cmdargs.planes), - min_signal = cmdargs.min_signal, - n_crops = cmdargs.n_crops, - crop_retries = cmdargs.crop_retries, - crop_shape = cmdargs.crop_shape - ) - -def construct_dataset(parsed_images, config): - result = [] - - progbar = tqdm.tqdm( - desc = 'Cropping Images', - total = len(parsed_images), - dynamic_ncols = True - ) - - worker = CroppingWorker(config) - - with multiprocessing.Pool() as pool: - for samples in pool.imap_unordered(worker, enumerate(parsed_images)): - result += samples - progbar.update() - - progbar.close() - - return sorted(result) - -def get_save_name(config): - fname = config.label - - if config.apas is not None: - fname += '-' + ''.join(str(x) for x in config.apas) - - if config.planes is not None: - fname += '-' + ''.join(str(x) for x in config.planes) - - if config.crop_shape is not None: - fname += '-%dx%d' % config.crop_shape - - return fname + '.csv' - -def save_dataset(dataset, config): - fname = get_save_name(config) - - df = pd.DataFrame(dataset, columns = COLUMNS) - df.to_csv(os.path.join(config.path, fname), index_label = 'index') - -def main(): - cmdargs = parse_cmdargs() - config = create_config_from_cmdargs(cmdargs) - - print("Collecting Images...") - images = collect_toyzero_images(config.path) - parsed_images = parse_images(images) - - print("Filtering Images...") - parsed_images = filter_parsed_images( - parsed_images, config.apas, config.planes - ) - - print("Cropping Images...") - dataset = construct_dataset(parsed_images, config) - - print("Saving Result...") - save_dataset(dataset, config) - -if __name__ == '__main__': - main() - diff --git a/scripts/.ipynb_checkpoints/view_dataset-checkpoint b/scripts/.ipynb_checkpoints/view_dataset-checkpoint deleted file mode 100644 index c46c714..0000000 --- a/scripts/.ipynb_checkpoints/view_dataset-checkpoint +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python -"""Make plots of toyzero dataset images""" -# pylint: disable=missing-function-docstring - -import argparse -import json -import re -import tqdm - -import matplotlib.pyplot as plt - -from toytools.datasets import get_toyzero_dataset -from toytools.plot import ( - save_figure, default_image_plot, get_common_images_range -) - -def parse_cmdargs(): - parser = argparse.ArgumentParser("View toyzero dataset") - - parser.add_argument( - 'path', - help = 'Directory where toyzero dataset is located', - metavar = 'PATH', - type = str, - ) - - parser.add_argument( - '--dataset', - choices = [ - 'toyzero-simple', 'toyzero-presimple', 'toyzero-precropped', - 'toyzero-preunaligned' - ], - default = 'simple', - dest = 'dataset', - help = 'Type of the toyzero Dataset to use.', - type = str, - ) - - parser.add_argument( - '--data_args', - default = {}, - dest = 'data_args', - help = ( - "JSON dict of data_args parameters. " - "Example: '{ \"seed\": 1, \"val_size\": 0.4 }'" - ), - type = json.loads, - ) - - parser.add_argument( - '-i', - default = None, - dest = 'index', - help = \ - 'Index or range of indices [first:last) of the samples to plot', - type = str, - ) - - parser.add_argument( - '--plotdir', - default = None, - dest = 'plotdir', - help = ( - 'Directory where to save plots. If not specified then this ' - 'script will be run in the interactive mode.' - ), - type = str, - ) - - parser.add_argument( - '--ext', - default = 'png', - dest = 'ext', - help = 'Plot extensions', - type = str, - nargs = '+', - ) - - return parser.parse_args() - -def plot_image_pair(img_fake, img_real, vertical = False): - - subplots_kwargs = { - 'sharex' : True, 'sharey' : True, 'constrained_layout' : True, - } - - if vertical: - f, axs = plt.subplots(2, 1, **subplots_kwargs) - else: - f, axs = plt.subplots(1, 2, **subplots_kwargs) - - color_range = get_common_images_range((img_fake, img_real)) - - aximg_fake = default_image_plot(axs[0], img_fake, *color_range) - _ = default_image_plot(axs[1], img_real, *color_range) - - axs[0].set_title('Fake') - axs[1].set_title('Real') - - if vertical: - f.colorbar(aximg_fake, ax = axs, location = 'right') - else: - f.colorbar(aximg_fake, ax = axs, location = 'bottom') - - return f, axs - -def plot_single_sample(dataset, index, plotdir, ext): - img_fake, img_real = dataset[index] - vertical = (img_fake.shape[1] > img_fake.shape[0]) - - f, _ax = plot_image_pair(img_fake, img_real, vertical) - f.suptitle("Index: %d" % index) - - if plotdir is None: - plt.show() - else: - save_figure(f, '%s/sample_%04d' % (plotdir, index), ext) - - plt.close(f) - -def parse_index_range(dataset, index): - if index is None: - return range(len(dataset)) - - if index.isnumeric(): - index = int(index) - return range(index, index + 1) - - range_re = re.compile(r'(\d*):(\d*)') - match = range_re.match(index) - - if not match: - raise ValueError("Failed to parse index range: '%s'" % index) - - first, last = [ (int(x) if x != '' else None) for x in match.groups() ] - index_slice = slice(first, last) - - return range(*index_slice.indices(len(dataset))) - -def main(): - cmdargs = parse_cmdargs() - dataset = get_toyzero_dataset( - cmdargs.dataset, cmdargs.path, **cmdargs.data_args - ) - - index_range = parse_index_range(dataset, cmdargs.index) - - progbar = tqdm.tqdm( - desc = 'Plotting', total = len(index_range), dynamic_ncols = True - ) - - for index in index_range: - plot_single_sample(dataset, index, cmdargs.plotdir, cmdargs.ext) - progbar.update() - - progbar.close() - -if __name__ == '__main__': - main() - From 524a26cbf84ecc0b772c1328f21177fb16c55dae Mon Sep 17 00:00:00 2001 From: Yi Huang Date: Wed, 22 Sep 2021 00:33:54 -0400 Subject: [PATCH 3/8] extract_toy-sp.py --- scripts/extract_toy-sp.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/scripts/extract_toy-sp.py b/scripts/extract_toy-sp.py index d5dd64f..3443e5a 100644 --- a/scripts/extract_toy-sp.py +++ b/scripts/extract_toy-sp.py @@ -1,20 +1,6 @@ #!/usr/bin/env python # coding: utf-8 -# ================================================================================================================== -# ============================================== Extract toy-sp data ============================================== -# The script include three parts: -# 1. Extract the downloaded tar files and save only the images. -# We have 1000 tar files, each generated with a random seed. -# This step also save images extracted from the tar file with seed `` to `/seed-`. -# 2. Gather all images generated with different seeds and save them to`/merged`. -# We have to modified the image filename since images generated with different seed shared the same set of names. -# We do so by plugging in the seed. After the modification, each file in the merged folder is named as: -# > `protodune----.npz` -# (@Brett, I removed the gauss in the name. Let me know if it is a bad idea.) -# 3. (Optional) Do a sanity check by plotting the fake and real images and their difference. -# ================================================================================================================== -# ================================================================================================================== import pandas as pd From be5537e76f8f97a38b2a6bd63a5ac3ef44e1cc8e Mon Sep 17 00:00:00 2001 From: Yi Huang Date: Fri, 24 Sep 2021 00:24:17 -0400 Subject: [PATCH 4/8] remove unnecessary folders --- toytools.egg-info/PKG-INFO | 11 -- toytools.egg-info/SOURCES.txt | 21 --- toytools.egg-info/dependency_links.txt | 1 - toytools.egg-info/requires.txt | 3 - toytools.egg-info/top_level.txt | 1 - .../.ipynb_checkpoints/collect-checkpoint.py | 166 ------------------ toytools/__pycache__/__init__.cpython-39.pyc | Bin 147 -> 0 bytes toytools/__pycache__/collect.cpython-39.pyc | Bin 4720 -> 0 bytes toytools/__pycache__/plot.cpython-39.pyc | Bin 2013 -> 0 bytes toytools/__pycache__/transform.cpython-39.pyc | Bin 2113 -> 0 bytes .../test/batch_00227_protodune-orig-5-1-U.npz | Bin 0 -> 33030 bytes .../test/batch_00227_protodune-orig-5-1-U.png | Bin 0 -> 4266 bytes toytools/test/test_transform.py | 27 +++ 13 files changed, 27 insertions(+), 203 deletions(-) delete mode 100644 toytools.egg-info/PKG-INFO delete mode 100644 toytools.egg-info/SOURCES.txt delete mode 100644 toytools.egg-info/dependency_links.txt delete mode 100644 toytools.egg-info/requires.txt delete mode 100644 toytools.egg-info/top_level.txt delete mode 100644 toytools/.ipynb_checkpoints/collect-checkpoint.py delete mode 100644 toytools/__pycache__/__init__.cpython-39.pyc delete mode 100644 toytools/__pycache__/collect.cpython-39.pyc delete mode 100644 toytools/__pycache__/plot.cpython-39.pyc delete mode 100644 toytools/__pycache__/transform.cpython-39.pyc create mode 100644 toytools/test/batch_00227_protodune-orig-5-1-U.npz create mode 100644 toytools/test/batch_00227_protodune-orig-5-1-U.png create mode 100644 toytools/test/test_transform.py diff --git a/toytools.egg-info/PKG-INFO b/toytools.egg-info/PKG-INFO deleted file mode 100644 index 881f7e9..0000000 --- a/toytools.egg-info/PKG-INFO +++ /dev/null @@ -1,11 +0,0 @@ -Metadata-Version: 1.1 -Name: toytools -Version: 0.0.1 -Summary: A collection of functions to handle toyzero dataset -Home-page: UNKNOWN -Author: LS4GAN Group -Author-email: dtorbunov@bnl.gov -License: UNKNOWN -Description: UNKNOWN -Platform: UNKNOWN -Classifier: Programming Language :: Python :: 3 :: Only diff --git a/toytools.egg-info/SOURCES.txt b/toytools.egg-info/SOURCES.txt deleted file mode 100644 index d8681ae..0000000 --- a/toytools.egg-info/SOURCES.txt +++ /dev/null @@ -1,21 +0,0 @@ -README.md -setup.py -scripts/preprocess -scripts/view_dataset -toytools/__init__.py -toytools/collect.py -toytools/plot.py -toytools/transform.py -toytools.egg-info/PKG-INFO -toytools.egg-info/SOURCES.txt -toytools.egg-info/dependency_links.txt -toytools.egg-info/requires.txt -toytools.egg-info/top_level.txt -toytools/datasets/__init__.py -toytools/datasets/funcs.py -toytools/datasets/generic_dataset.py -toytools/datasets/precropped_toyzero.py -toytools/datasets/presimple_toyzero.py -toytools/datasets/preunaligned_toyzero.py -toytools/datasets/simple_toyzero.py -toytools/datasets/torch_funcs.py \ No newline at end of file diff --git a/toytools.egg-info/dependency_links.txt b/toytools.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/toytools.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/toytools.egg-info/requires.txt b/toytools.egg-info/requires.txt deleted file mode 100644 index 4021870..0000000 --- a/toytools.egg-info/requires.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy -pandas -matplotlib diff --git a/toytools.egg-info/top_level.txt b/toytools.egg-info/top_level.txt deleted file mode 100644 index 8a7b981..0000000 --- a/toytools.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -toytools diff --git a/toytools/.ipynb_checkpoints/collect-checkpoint.py b/toytools/.ipynb_checkpoints/collect-checkpoint.py deleted file mode 100644 index 1a0dfcd..0000000 --- a/toytools/.ipynb_checkpoints/collect-checkpoint.py +++ /dev/null @@ -1,166 +0,0 @@ -"""Functions to load toyzero image dataset""" - -import os -import re - -from typing import List, Tuple, Optional, Set, Union -import numpy as np - -DIR_FAKE = 'fake' -DIR_REAL = 'real' - -def find_images_in_dir(path : str) -> List[str]: - """Return sorted list of '*.npz' files in `path`""" - result = [] - - for fname in os.listdir(path): - fullpath = os.path.join(path, fname) - - if not os.path.isfile(fullpath): - continue - - ext = os.path.splitext(fname)[1] - if ext != '.npz': - continue - - result.append(fname) - - result.sort() - return result - -def validate_toyzero_images(imgs_fake : List[str], imgs_real : List[str]): - """Verify that image names are the same in fake and real image lists""" - - if len(imgs_fake) != len(imgs_real): - raise RuntimeError( - "Number of real/fake images does not match: %d != %d" % ( - len(imgs_fake), len(imgs_real) - ) - ) - - for (img_a,img_b) in zip(imgs_fake, imgs_real): - if img_a != img_b: - raise RuntimeError( - "Fake/Real image name mismatch: %s != %s" % (img_a, img_b) - ) - -def collect_toyzero_images(root : str) -> List[str]: - """Return a list of `toyzero` image names found in `root`""" - - path_fake = os.path.join(root, DIR_FAKE) - path_real = os.path.join(root, DIR_REAL) - - imgs_fake = find_images_in_dir(path_fake) - imgs_real = find_images_in_dir(path_real) - - validate_toyzero_images(imgs_fake, imgs_real) - - return imgs_fake - -def parse_images(images : List[str]) -> List[Tuple[str, str, int, str]]: - """Parse image names to infer Event, APA and Wire Plane""" - result = [] - - pattern = re.compile(r'^(.+)-(\d+)-([UVW])\.npz$') - - for image in images: - match = pattern.match(image) - - if not match: - raise RuntimeError("Cannot parse image name: %s" % image) - - base = match.group(1) - apa = int(match.group(2)) - plane = match.group(3) - - result.append((image, base, apa, plane)) - - return result - -def filter_parsed_images( - parsed_images : List[Tuple[str, str, int, str]], - apas : Optional[Set[int]] = None, - planes : Optional[Set[str]] = None, -) -> List[Tuple[str, str, int, str]]: - """Filter parsed images list based on APAs and Wire Planes""" - - if apas is not None: - parsed_images = [ x for x in parsed_images if x[2] in apas ] - - if planes is not None: - parsed_images = [ x for x in parsed_images if x[3] in planes ] - - return parsed_images - -def filter_images( - images : List[str], - apas : Optional[Set[int]] = None, - planes : Optional[Set[str]] = None, -) -> List[str]: - """Filter images list based on APAs and Wire Planes""" - - if (planes is None) and (apas is None): - return images - - parsed_images = parse_images(images) - parsed_images = filter_parsed_images(parsed_images, apas, planes) - - return [ x[0] for x in parsed_images ] - -def load_image(root : str, is_fake : bool, name : str) -> np.ndarray: - """Load image `name`""" - - if is_fake: - subdir = DIR_FAKE - else: - subdir = DIR_REAL - - path = os.path.join(root, subdir, name) - - with np.load(path) as f: - return f[f.files[0]] - -def train_test_split( - n : int, - test_size : Union[int, float], - shuffle : bool, - prg : np.random.Generator, -) -> Tuple[np.ndarray, np.ndarray]: - """Split dataset of size `n` into training/test parts. - - Parameters - ---------- - n : int - Size of the dataset to split. - test_size : int or float - Fraction of the dataset that will be used as a test sample. - If `test_size` <= 1, then it is treated as a fraction, i.e. - (size of test sample) = `test_size` * (size of toyzero dataset) - Otherwise, (size of val sample) = `test_size`. - shuffle : bool - Whether to shuffle dataset - prg : np.random.Generator - RNG that will be used for shuffling the dataset. - This parameter has no effect if `shuffle` is False. - - Returns - ------- - (train_indices, test_indices) : (np.ndarray, np.ndarray) - Indices of training and validation samples. - - """ - indices = np.arange(n) - - if shuffle: - prg.shuffle(indices) - - if test_size <= 1: - test_size = int(len(indices) * test_size) - - train_size = max(0, int(len(indices) - test_size)) - - train_indices = indices[:train_size] - test_indices = indices[train_size:] - - return (train_indices, test_indices) - diff --git a/toytools/__pycache__/__init__.cpython-39.pyc b/toytools/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index b45c3fe0e5c317dfcde86efcbb8810eb6da60967..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 147 zcmYe~<>g`kg6M@diHtz{F^Gc7I5! zf%GYtFKczXd;VT0T1$dBl6Df?VeGhgZ*@Hp+d<1&_iWcm9O)(R$VdxyGcz6rGRe#@ zJMGZR%8%MK*9kLy*+cJF5n2tycbF$wp=gFX@PA4OZ>{Ow_2{Mv*Ic ztZUmW(RNv??Xf%Bvpd^7(Kh+6o@y!IMdG=3h|SosZ=b&~6SceNZ9fP-X$O(LLRzo9 zBXzq5jn&H;k7ZV(8Ezm%86%mo83$2j1(N1vW!Vmc#Cw%wmeX!~k((LxnYx(^0|MCz zfs!9NElB&FFjUy+nDU(_Ca~i7*Jn54mN&b#(Q%^ntFwzskNzNMA1+_JKff>w!z6JW z%0Xo<4nuD(nQ3ojV}201O=UCL45B7*c#K1#P-9M3JDJu&9#d25=q*aH8=1 zL-@^NDM@ax>E!QaR^}Bheug$svvL>s5H||t5{klY6O|qhxonSZ13$R$61gt01osfr z$jgZi2i9{pPmNu}r4bWoQr*}iY%Ntc(O*)Gq^D+Tr6nJuV5Tv}NLKgSq9Y_cTCr{J z>R{p^^5MaEUwTn;(VkzNSDyYjKnyH~PULl`{xUuD%lf71r!GC8eD(F?XZ5GVQ|G#; zZaWcqc>B=0aCRx^e0@i;{Z7ZDUdp)eveH`IY7>K*%Gt$f3}xoJh&$~}528ejkZf7} z94|3OgC&?N@kErt&2dVIY3ejqaZ+Tu({?hmOlBJUDsx11 zWVBnv#{WUly3;s=plg}_() zsO#bi)zu#Q(sjPCLv3pOWBM;*rc@bYsm#XUBx79s8hxNz<&1+x6&vn%1p>X#hF_qO zB6sX)VjRHdZo5%1#C$D1Z?+g0amWk$E*y1DT4kFTFN4n8swi*2B;lavx&h1`4MIS`6?H5 zrT}>&?5q?NIfFr*^$NWPk$tpBCXR`?6#ld4R z3+dJfk}WZhUZl({%7zk8CQTW1LAG+E#9hiKXX zL)%JWq>i|?E$iu_Kd)}6}M*YdpM|_Bo_SzmKN^k>GuH`eV8EsA{=19mqHbs1Hzmkk3?3QuwYqu8aH|x77mRo*%0Oso)Mv+(i80G6$rP_* zT%gA(>egG%E6~gDsDnr@AHPdOr9#CQkK zCXSp_a?OD|hvvxN!)>k{P;W@wkPv)toX}8-6JyQWH*aQkzxUqvp5w*EmWT1(>3aNm z=y|`;;^KhK`wUap5Yp>;l1ZMkg72U&t@^#Yhllq!LiqymzQKi zHji0vK`zUdoQJlUwsx1&ODV?`>7{GlfM4_E!jD2O%B5q!w=Bh$*Iqg3+>Med&(p+Y zRT))-XfQ6Ft|L=LIxFft8%&})uZ+pc;d7w$+S~@;mN;+y`cPca5Y&WeR?`PTu zd!sBF*$`G!6;_z4wtk)tj15#a92q%lmb#;zDD|YjJRj@ThL-jqvm#1A2-Y!$03 z9;^&0=NHZi!=^$8I491|37ivCG35h}=!OS3zf28MTogzzGF_=i!AC?pQI*zFG0sg^ z=V_D`@i5i@N+m%KFt;%Es}Mr~e7tnXjQ5O9*(v*hKS2sz2*C+n)>ae_Y)9KoCacU}H~3h8aG~h2%sQkb&%<7y&zo zbBgXkwQe@dr=6LA=p%Vc{tCquHG!h)i0P1!VYS_hH zyYS6kwUt+O>Q+dV9#wnohN57rRT2>i`m_!8q{yqGdWS4GNW7H3%V%!QDA}i~r;D1{ z;E5)0vSpgke0GH|b9EDTILZdruVXW^BHcF$)%p3Ly8kDg8(2`48+OFijVZt9Jzhql z_|!w-#SvQnlO@TvsDC1nl2Z>@^`wyg38%jW*E>JmiqSdi34x0S*>J2-$z}93R#`RH z(Eve-js76gb(&;@ES1sDB-#e9?M~OSjtvYoP3nz1JJo(aE0a;G!SYt7vr?P5Oj5L0 z)SjK!_)>rN2_yjv=xtwh;D%ZwaSOt-^rbMWk72%zdDb3VXpT$F=g!_{ii)8vy4}t8 zif#1!vP$}W8=6Ute;Y*|NKpiBP>zdwGArH|d?JVQY)8FIn=3C2=*zTa!?UNTb2B3p wRhLVV4L4n&tk6a|7#AQr4!BO$TrVm)&eILzs3wz-BsT$nfj8$`i><4F0|JWgt^fc4 diff --git a/toytools/__pycache__/transform.cpython-39.pyc b/toytools/__pycache__/transform.cpython-39.pyc deleted file mode 100644 index 9ef5acae5822d2e790cdca4d989ca261cd37adc5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 2113 zcma)7-EJF26rP#=a~wNK(<*IR2`x~lRsvF?f`n9|B3f0TDxkDirf9YCjO|VAU3X@d z#?e|rq`l+~deL6?jtAih=9a5I0tgAdnRU`MDiX7rv*&nb&+m7Rvu?Lduzq-PGyAoID1Z|wDa?(^UY=>_~ek9g||?S+DgNJIm+NltP8 zUTbfuk=?1kN%+z;pSSt)6TjEy9o~hOWxm2!@$LwJuZwNJfv!_}EN{=8jblyL7GHZ7 z@pF6~+E%!CpClI!FMl)1h9cH7E7YKrlT7Dj5$m!Xt5=AKB-8{Cs_9s4_ou zJpL&gS0XM4ac*6D0LxwJZl`^mYn6D06;l&dMgFJ~Cd}k;k{u+z3ET%JbPALSo9n$O z=Lacnyd=`r@UJhnM&(3o%|=yL3_soae&@Sy)z-cHH|~D5y`{^Ui}@6DoRLqaGjm}m zbo$!W)L9B`?3NI=PTN#=z`gnzq|ZQ(m*oR&m#Ir&G_iTA=~LEFTbLt??6AW-i|hljz3~Z0wt)rj}kJzy{%n zA2KLpzxFuYC+d3b+qb->3CEq94%h~=^b4)ob9`HSFW5X-@ILsk4(hPc+UOei$f$pK zbtfx$IY9=M7~Aw^v8r|(-%p?>EwW!(a`<`E4lc&{+6U@doU2$(vhi5RSdTKa#z@4A z?_b|anCa{QqxnSKk+PJgl^0qJg;Yr-*PvLQvt%8l7agR4)=>1KS;H81aNV%k^|Qq} zud)dmFKyD^*tELT1_+7P=o(w243Ga0p0GnZOm5=j4ft7no)q8)1f+*lM0RR4WKAFX zb2j(pK9Wglv}^5OBu`oG;R!5f!Fiv&q}$H9By141+?VAvZ9I|}p;TI6o8WQI^~i)H zkq<}uY&dQAkqhP;ZYxYvLTHQntV88H;NJkw8w17$jB$biQ`3FVjTEnD`|wO&#u0#S zubZxG{In4QN8UWtKB5ng7;Xl2gi3iwp4y<~(d|bYki!9U1NP$wauyGA zjMFn9x9HVzzbtf?7r?a3!nV8s7n0Bf4wA;hxO5mYfdjXzOB3zVe?}|chYooK1Qh@< zrh+$_Gc-M$)hk-gQtRFVJ?Z0Ij}|5Q3>uULIBEMp#X4P~ZD1p0GQn>cYWBZ2Sv66b z^0J?%Ce*WOUJRujQzj^?$#mv`;3lz~TE;^kVlcTOV3zKRLdZ;GlG;9b9h`{@o=KU_ zoTXDf+y4#eCS~j9q~z6Dd}+so`Vhq8w#8auJ7{D2hjiud&c#-%75U(8SGKV){rNA^ CNcu7W diff --git a/toytools/test/batch_00227_protodune-orig-5-1-U.npz b/toytools/test/batch_00227_protodune-orig-5-1-U.npz new file mode 100644 index 0000000000000000000000000000000000000000..a64d0b6cdbb2f2e7341f1e0e88c6d81e2cd21415 GIT binary patch literal 33030 zcmeI)%SyvQ6oBDLFH(epi#$NOib5&ag^0LzQjA2kAViUObgkC0|0(& zrkPybq_x`4sF+}bs+dNGE-J3tz)=Y`Y+lV&{iDPbJ}XqKd}B$ zZQP9!zhS?huV%zGbml$&VPbt@N60TxTlLGVg#Pj8JzNX&t-`h??i?)_XA8vb3U=wu z+ZRX6Hh(KPqPU3GR4}2>48kF<#co4D9yr;!E`;2*fs zlLFb(e|SJPFmVNBSO4R1$6RFJz}+IAUayCt&eqn}vaVkrUJRrSd_JGWV!3i*vghpu zsEmw^{xcWowQy>AGF)Z66>C)x*N`mOiz;q28Eyx#EMMNODkJw%2*9!Otnu%qe zGjOapZq0Rd1fv>j8u3I8wVhgqm;+*ox?5pxH`4)*?=IX^N5 zv*DkKOS6fbP2|VkKeL>f<;*N+KG$phUp`Hak=EBDZIQ%7%EbxQwntP2)xlE7<>9s^ zgIf8k=5pN?cG|#sx@mN*{~-1(FJDuhwE$>vLv)T$etD_dHLm2XHu4xeBsnE%+%&G{ za4r0?}&h>q68!WX(v| zWmoRa61cGN(}m#&RN9}UG=t!%?uaBXnqALk9SpRygXtAnb1zUGKaimXnvxtP8k=uU zv%N9MqWc*{WfGH+*Bkdd#2q(&+<^h#8{IG>X@fQyiI>_@h;xB7IVWK5d-}E%FBntV z!|k#?t!eJGmTV&{sb>xOcN-3)2B(6jb0_Z{%_UAttudxydl&c(qlMh{ww+kLpxb-g zDOKTJdglszTJn<6IDnQq@uw&|Qgg;$!6?P5^&rNL;%%z04o=`o{3sc=n>Ti&8rac{87f0Cou1RNO4DW@K~_}Iv{PDPH!?>(gvF>x?BbLIoL;UX)+>Dp0i%Ufx((pOJbnr)2b9d8Bh5C#Q6_pRlYMkR+e7i&<9t?Gb$;>M(JQ;6m zK~-(?^sI>wMP!aVkBN&bCshuhnHC9ECA=)o@atMPNwEnF-F=@{+x`ptT1heAcYFi$ U&vRkFK`=pRP Date: Fri, 24 Sep 2021 00:25:21 -0400 Subject: [PATCH 5/8] remove unnecessary folders --- .../test/batch_00227_protodune-orig-5-1-U.npz | Bin 33030 -> 0 bytes .../test/batch_00227_protodune-orig-5-1-U.png | Bin 4266 -> 0 bytes toytools/test/test_transform.py | 27 ------------------ 3 files changed, 27 deletions(-) delete mode 100644 toytools/test/batch_00227_protodune-orig-5-1-U.npz delete mode 100644 toytools/test/batch_00227_protodune-orig-5-1-U.png delete mode 100644 toytools/test/test_transform.py diff --git a/toytools/test/batch_00227_protodune-orig-5-1-U.npz b/toytools/test/batch_00227_protodune-orig-5-1-U.npz deleted file mode 100644 index a64d0b6cdbb2f2e7341f1e0e88c6d81e2cd21415..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 33030 zcmeI)%SyvQ6oBDLFH(epi#$NOib5&ag^0LzQjA2kAViUObgkC0|0(& zrkPybq_x`4sF+}bs+dNGE-J3tz)=Y`Y+lV&{iDPbJ}XqKd}B$ zZQP9!zhS?huV%zGbml$&VPbt@N60TxTlLGVg#Pj8JzNX&t-`h??i?)_XA8vb3U=wu z+ZRX6Hh(KPqPU3GR4}2>48kF<#co4D9yr;!E`;2*fs zlLFb(e|SJPFmVNBSO4R1$6RFJz}+IAUayCt&eqn}vaVkrUJRrSd_JGWV!3i*vghpu zsEmw^{xcWowQy>AGF)Z66>C)x*N`mOiz;q28Eyx#EMMNODkJw%2*9!Otnu%qe zGjOapZq0Rd1fv>j8u3I8wVhgqm;+*ox?5pxH`4)*?=IX^N5 zv*DkKOS6fbP2|VkKeL>f<;*N+KG$phUp`Hak=EBDZIQ%7%EbxQwntP2)xlE7<>9s^ zgIf8k=5pN?cG|#sx@mN*{~-1(FJDuhwE$>vLv)T$etD_dHLm2XHu4xeBsnE%+%&G{ za4r0?}&h>q68!WX(v| zWmoRa61cGN(}m#&RN9}UG=t!%?uaBXnqALk9SpRygXtAnb1zUGKaimXnvxtP8k=uU zv%N9MqWc*{WfGH+*Bkdd#2q(&+<^h#8{IG>X@fQyiI>_@h;xB7IVWK5d-}E%FBntV z!|k#?t!eJGmTV&{sb>xOcN-3)2B(6jb0_Z{%_UAttudxydl&c(qlMh{ww+kLpxb-g zDOKTJdglszTJn<6IDnQq@uw&|Qgg;$!6?P5^&rNL;%%z04o=`o{3sc=n>Ti&8rac{87f0Cou1RNO4DW@K~_}Iv{PDPH!?>(gvF>x?BbLIoL;UX)+>Dp0i%Ufx((pOJbnr)2b9d8Bh5C#Q6_pRlYMkR+e7i&<9t?Gb$;>M(JQ;6m zK~-(?^sI>wMP!aVkBN&bCshuhnHC9ECA=)o@atMPNwEnF-F=@{+x`ptT1heAcYFi$ U&vRkFK`=pRP Date: Fri, 24 Sep 2021 00:27:12 -0400 Subject: [PATCH 6/8] add a gitignore file --- .gitignore | 111 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 111 insertions(+) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0e0c7a3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,111 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +env/ +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# dotenv +.env + +# virtualenv +.venv +venv/ +ENV/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ + +# IDE settings +.vscode/.envrc +.direnv +*~ +*.root +*.npz +*.dat +*.pdf From 2711612c21289c7d28cded44430db2e3e9c21dbb Mon Sep 17 00:00:00 2001 From: Yi Huang Date: Mon, 27 Sep 2021 08:49:54 -0400 Subject: [PATCH 7/8] better alignment --- scripts/extract_toy-sp.py | 354 +++++++++++++++++++------------------- 1 file changed, 177 insertions(+), 177 deletions(-) diff --git a/scripts/extract_toy-sp.py b/scripts/extract_toy-sp.py index 3443e5a..9b7eb91 100644 --- a/scripts/extract_toy-sp.py +++ b/scripts/extract_toy-sp.py @@ -19,191 +19,191 @@ # Extraction functions def extract_files(tar, member_folder, file_extension, save_folder='.'): - """ - Extract all files with from folder of a tarfile object - Input: - - tar (tarfile instance): a tarfile instance to extract from. - - member_folder (str): a folder the contains the files you need. Must be a member in . - - file_extension (str): file extension for the files you need. - - save_folder (str): the folder to extract to. - """ - - Path(save_folder).mkdir(exist_ok=True, parents=True) - # locate all the files in the member_folder that have file_extension as extension. - subdir_and_files = [ - tarinfo for tarinfo in tar.getmembers() - if tarinfo.name.startswith(member_folder) and - tarinfo.name.endswith(file_extension) - ] - - print(f'\tsave {len(subdir_and_files)} {file_extension} files to {save_folder}') - - # extract files and move them to the save_folder - tar.extractall(members=subdir_and_files, path=save_folder) - fnames = Path(f'{save_folder}/{member_folder}').glob(f'*.{file_extension}') - for fname in fnames: - shutil.move(fname, save_folder) - - # Remove the chain of parent folders (now empty) - path_base = os.path.normpath(member_folder).split(os.sep)[0] - shutil.rmtree(f'{save_folder}/{path_base}') + """ + Extract all files with from folder of a tarfile object + Input: + - tar (tarfile instance): a tarfile instance to extract from. + - member_folder (str): a folder the contains the files you need. Must be a member in . + - file_extension (str): file extension for the files you need. + - save_folder (str): the folder to extract to. + """ + + Path(save_folder).mkdir(exist_ok=True, parents=True) + # locate all the files in the member_folder that have file_extension as extension. + subdir_and_files = [ + tarinfo for tarinfo in tar.getmembers() + if tarinfo.name.startswith(member_folder) and + tarinfo.name.endswith(file_extension) + ] + + print(f'\tsave {len(subdir_and_files)} {file_extension} files to {save_folder}') + + # extract files and move them to the save_folder + tar.extractall(members=subdir_and_files, path=save_folder) + fnames = Path(f'{save_folder}/{member_folder}').glob(f'*.{file_extension}') + for fname in fnames: + shutil.move(fname, save_folder) + + # Remove the chain of parent folders (now empty) + path_base = os.path.normpath(member_folder).split(os.sep)[0] + shutil.rmtree(f'{save_folder}/{path_base}') def get_seed(fname): - return fname.split('-')[-1].split('_')[0] + return fname.split('-')[-1].split('_')[0] def extract_toy_sp(tarfname, folder_base): - """ - extract all the npz files (both fake and real) from a tar file named - Input: - - tarfname (str): the .tar file name. - - folder_base (str): the folder under where the seed- folders are located - """ - seed = get_seed(Path(tarfname).stem) - print(f'seed = {seed}') - save_folder = Path(f'{folder_base}/seed-{seed}') - if save_folder.exists(): - shutil.rmtree(save_folder) - - with tarfile.open(tarfname, 'r') as tar: - try: - fake = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('fake-fake')][0] - real = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('real-fake')][0] - extract_files(tar, fake, 'npz', save_folder/'fake') - extract_files(tar, real, 'npz', save_folder/'real') - print() - except: - print(f'There is something wrong with the tar file {tarfname}') + """ + extract all the npz files (both fake and real) from a tar file named + Input: + - tarfname (str): the .tar file name. + - folder_base (str): the folder under where the seed- folders are located + """ + seed = get_seed(Path(tarfname).stem) + print(f'seed = {seed}') + save_folder = Path(f'{folder_base}/seed-{seed}') + if save_folder.exists(): + shutil.rmtree(save_folder) + + with tarfile.open(tarfname, 'r') as tar: + try: + fake = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('fake-fake')][0] + real = [tarinfo.name for tarinfo in tar.getmembers() if tarinfo.name.endswith('real-fake')][0] + extract_files(tar, fake, 'npz', save_folder/'fake') + extract_files(tar, real, 'npz', save_folder/'real') + print() + except: + print(f'There is something wrong with the tar file {tarfname}') def parse_cmdargs(): - parser = argparse.ArgumentParser("Extract toy-sp (toy1) data Brett generated") - - parser.add_argument( - '--data_path', - '-d', - help = 'The loacation of the tar files', - dest = 'data_path', - type = str, - ) - - parser.add_argument( - '--seed_path', - '-s', - help = 'The location for the seed folders. Images generated with seed= are contained in /seed-.', - dest = 'seed_path', - type = str, - default = None - ) - - parser.add_argument( - '--merged_path', - '-m', - dest = 'merged_path', - help = 'The location of the merged dataset. The folder will contain two subfolders: fake and real.', - type = str - ) - - parser.add_argument( - '--visualize', - '-v', - help = 'whether to visualize a few pairs of fake and real images', - dest = 'visualize', - default = False, - type = bool, - ) - - return parser.parse_args() + parser = argparse.ArgumentParser("Extract toy-sp (toy1) data Brett generated") + + parser.add_argument( + '--data_path', + '-d', + help = 'The loacation of the tar files', + dest = 'data_path', + type = str, + ) + + parser.add_argument( + '--seed_path', + '-s', + help = 'The location for the seed folders. Images generated with seed= are contained in /seed-.', + dest = 'seed_path', + type = str, + default = None + ) + + parser.add_argument( + '--merged_path', + '-m', + dest = 'merged_path', + help = 'The location of the merged dataset. The folder will contain two subfolders: fake and real.', + type = str + ) + + parser.add_argument( + '--visualize', + '-v', + help = 'whether to visualize a few pairs of fake and real images', + dest = 'visualize', + default = False, + type = bool, + ) + + return parser.parse_args() if __name__ == '__main__': - cmdargs = parse_cmdargs() - data_path = cmdargs.data_path - seed_path = cmdargs.seed_path - merged_path = cmdargs.merged_path - visualize = cmdargs.visualize - - # extract the image npz files from each tarfile and save them to folder according to their seed - # Each seed- folder looks like - # seed-/ - # - fake/ - # - real/ - - print('Extracting:') - assert Path(data_path).exists(), f"{data_path} does not exist" - - tarfnames = list(Path(data_path).glob('*.tar')) - assert len(tarfnames) > 0, f"{data_path} does not contain any tar files" - - if not seed_path: - seed_path = '/tmp/LS4GAN/toy-sp/seedwise/' - print('\033[96m' + f'Find seedwise data at {seed_path}' + '\033[0m') - - if not Path(seed_path).exists(): - Path(seed_path).mkdir(exist_ok=True, parents=True) - - for tarfname in tarfnames: - extract_toy_sp(tarfname, seed_path) - - - # Merge - # Modified the image filenames and then save copies to ``. - # The merged folder has two subfolders `fake` and `real`. - merged_folder = Path(merged_path) - merged_folder.mkdir(exist_ok=True, parents=True) - merged_fake = merged_folder/'fake' - merged_real = merged_folder/'real' - merged_fake.mkdir(exist_ok=True, parents=True) - merged_real.mkdir(exist_ok=True, parents=True) - - print('Merging:') - for folder in Path(seed_path).glob('seed-*'): - seed = folder.stem.split('-')[-1] - print(f'seed = {seed}') - - npz_fnames = list((folder/'fake').glob('*npz')) - print(f'\tcopy {len(npz_fnames)} to the {merged_fake} folder') - for npz_fname in sorted(npz_fnames): - npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') - shutil.copy(npz_fname, merged_fake/npz_fname_new) - - npz_fnames = list((folder/'real').glob('*npz')) - print(f'\tcopy {len(npz_fnames)} to the {merged_real} folder') - for npz_fname in sorted(npz_fnames): - npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') - shutil.copy(npz_fname, merged_real/npz_fname_new) - print() - - - # Visualization - if not visualize: - exit(0) - - print('Generating sample plots') - def load_image(fname): - with np.load(fname) as f: - return f[f.files[0]] - - fnames_fake = sorted((merged_fake).glob('*npz')) - fnames_real = sorted((merged_real).glob('*npz')) - num_samples = 5 - image_save_folder = Path(f'/tmp/LS4GAN/toy-sp/sample_plots') - if image_save_folder.exists(): - shutil.rmtree(image_save_folder) - image_save_folder.mkdir(exist_ok=True, parents=True) - print('\033[96m' + f'Find sample images at {image_save_folder}' + '\033[0m') - indices = np.random.choice(range(len(fnames_fake)), num_samples, replace=False) - - for idx in tqdm(indices): - image_fname = image_save_folder/f'sample_{idx}.png' - fname_fake = fnames_fake[idx] - fname_real = fnames_real[idx] - - image_fake = load_image(fname_fake) - image_real = load_image(fname_real) - - fig, axes = plt.subplots(1, 3, figsize=(20, 5)) - axes[0].pcolormesh(image_fake) - axes[1].pcolormesh(image_real) - axes[2].pcolormesh(image_fake - image_real) - plt.savefig(image_fname, dpi=200, bbox_inches='tight') + cmdargs = parse_cmdargs() + data_path = cmdargs.data_path + seed_path = cmdargs.seed_path + merged_path = cmdargs.merged_path + visualize = cmdargs.visualize + + # extract the image npz files from each tarfile and save them to folder according to their seed + # Each seed- folder looks like + # seed-/ + # - fake/ + # - real/ + + print('Extracting:') + assert Path(data_path).exists(), f"{data_path} does not exist" + + tarfnames = list(Path(data_path).glob('*.tar')) + assert len(tarfnames) > 0, f"{data_path} does not contain any tar files" + + if not seed_path: + seed_path = '/tmp/LS4GAN/toy-sp/seedwise/' + print('\033[96m' + f'Find seedwise data at {seed_path}' + '\033[0m') + + if not Path(seed_path).exists(): + Path(seed_path).mkdir(exist_ok=True, parents=True) + + for tarfname in tarfnames: + extract_toy_sp(tarfname, seed_path) + + + # Merge + # Modified the image filenames and then save copies to ``. + # The merged folder has two subfolders `fake` and `real`. + merged_folder = Path(merged_path) + merged_folder.mkdir(exist_ok=True, parents=True) + merged_fake = merged_folder/'fake' + merged_real = merged_folder/'real' + merged_fake.mkdir(exist_ok=True, parents=True) + merged_real.mkdir(exist_ok=True, parents=True) + + print('Merging:') + for folder in Path(seed_path).glob('seed-*'): + seed = folder.stem.split('-')[-1] + print(f'seed = {seed}') + + npz_fnames = list((folder/'fake').glob('*npz')) + print(f'\tcopy {len(npz_fnames)} to the {merged_fake} folder') + for npz_fname in sorted(npz_fnames): + npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') + shutil.copy(npz_fname, merged_fake/npz_fname_new) + + npz_fnames = list((folder/'real').glob('*npz')) + print(f'\tcopy {len(npz_fnames)} to the {merged_real} folder') + for npz_fname in sorted(npz_fnames): + npz_fname_new = str(npz_fname.name).replace('gauss', f'{seed}-') + shutil.copy(npz_fname, merged_real/npz_fname_new) + print() + + + # Visualization + if not visualize: + exit(0) + + print('Generating sample plots') + def load_image(fname): + with np.load(fname) as f: + return f[f.files[0]] + + fnames_fake = sorted((merged_fake).glob('*npz')) + fnames_real = sorted((merged_real).glob('*npz')) + num_samples = 5 + image_save_folder = Path(f'/tmp/LS4GAN/toy-sp/sample_plots') + if image_save_folder.exists(): + shutil.rmtree(image_save_folder) + image_save_folder.mkdir(exist_ok=True, parents=True) + print('\033[96m' + f'Find sample images at {image_save_folder}' + '\033[0m') + indices = np.random.choice(range(len(fnames_fake)), num_samples, replace=False) + + for idx in tqdm(indices): + image_fname = image_save_folder/f'sample_{idx}.png' + fname_fake = fnames_fake[idx] + fname_real = fnames_real[idx] + + image_fake = load_image(fname_fake) + image_real = load_image(fname_real) + + fig, axes = plt.subplots(1, 3, figsize=(20, 5)) + axes[0].pcolormesh(image_fake) + axes[1].pcolormesh(image_real) + axes[2].pcolormesh(image_fake - image_real) + plt.savefig(image_fname, dpi=200, bbox_inches='tight') From 676f9ede8562d399601d74f91060566699128e0d Mon Sep 17 00:00:00 2001 From: yren Date: Thu, 21 Oct 2021 02:19:05 -0400 Subject: [PATCH 8/8] presimple_toyzero in memory dataset --- .../__pycache__/__init__.cpython-39.pyc | Bin 729 -> 0 bytes .../datasets/__pycache__/funcs.cpython-39.pyc | Bin 927 -> 0 bytes .../generic_dataset.cpython-39.pyc | Bin 801 -> 0 bytes .../precropped_toyzero.cpython-39.pyc | Bin 3311 -> 0 bytes .../presimple_toyzero.cpython-39.pyc | Bin 3985 -> 0 bytes .../preunaligned_toyzero.cpython-39.pyc | Bin 4304 -> 0 bytes .../__pycache__/simple_toyzero.cpython-39.pyc | Bin 4904 -> 0 bytes .../__pycache__/torch_funcs.cpython-39.pyc | Bin 1325 -> 0 bytes toytools/datasets/presimple_toyzero.py | 36 +++++++++++++++--- 9 files changed, 30 insertions(+), 6 deletions(-) delete mode 100644 toytools/datasets/__pycache__/__init__.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/funcs.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/generic_dataset.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc delete mode 100644 toytools/datasets/__pycache__/torch_funcs.cpython-39.pyc diff --git a/toytools/datasets/__pycache__/__init__.cpython-39.pyc b/toytools/datasets/__pycache__/__init__.cpython-39.pyc deleted file mode 100644 index 3678ab829e14ef75a233fa930f5f06bd1a02e615..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 729 zcmZ`$O^?$s5Vg}JO+U9QRvvNF<2N&!nfEgJXw*k=KJI4uuK=MR&iVUvzFk_MtpEHVZ$AUPQe0AN88yvEtHhJutHU-TBh~JDqXBoDZcRZRc>>` ztx2s)*LlSx2ejF8tx^W5J5YkI?l9OXZXd0n(b1ApigX diff --git a/toytools/datasets/__pycache__/funcs.cpython-39.pyc b/toytools/datasets/__pycache__/funcs.cpython-39.pyc deleted file mode 100644 index 8c462535110bc226e22ef130e8f996ddaea376b5..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 927 zcmZ{iL2uJA6vu5RZPT(%tgB$+o#upoqlgr;c-5C~A2R<=Vp1QUC0v?PvfN2pOx zz?tuW#D%YvD<{4JC!W)^lvOzL%k%%A*m=J^w!Pg$Fy4JTWFrTmpLVf62rQ0aY8yBT z5XYP(I3)qWHg^&ybpyA~-K3SagLa*_l1}Oc9^`G_`GSHj_r_@0EgzmU70W`?sYr66 z=tYr5CYG6|M$!oGMir5P#jF%cQqBz1!svrv7|tK!YA}w|JP~gf{?mmG`ppw1{$t$& z>nvkQJjn##cyS-xCrak|RWGuoF9)DOF3eXjbq-+;#a|M&Z_pfH;26!xcl;HPtxiC{ zXYhvZfd0ae4c!I(y>b54jmukebOU|HpD=F&x=wlLRG30#SJ}`HK!{VB(b#C3u~d{h z4Zgd&hE?t^*SQjnG+2|0C7iOqrbY`WB|Xb7Gx>4zTzk|bdRvc%EoE6L3rSf@ss*9y zowKA6&yTh z(vQLktK9O<*$?ZCNZlaKg|&k#Z3a#+mdfbC8k175R>&v)fNE)|YktiD8IReDk0M@m zU?$u&@~Xh9{Hh8lS~1f_=%3H=R2M2CJtP{txMHK>MW##~=uB{33<-z0Gj$6#u0Qd`<0YY>fyTRp+nV~8xUcDY^cV5hL zRjAS_`N7wg sGPLLoLxUS4O;6VnHspa9`k`NE@g>yf#us&gdSBs5)B8Pr`+&;+0^eG(Bme*a diff --git a/toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/precropped_toyzero.cpython-39.pyc deleted file mode 100644 index e983143a965523a4cd21aa4aa7225a3c9806aa19..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3311 zcma)8-)|eo5#GIDJW>>uDsJth0eT7Y5EcpLrbSU0h7rTIj5LO9SV9Y2kep9~Zp{qwADNA|I!up?w7RFX?Po2=2x}iJuLT}m%TT?&uIZN%-$=lOT z*x~FUOWo9a$x=^rPVI0b<@>DP`Uw`iCh|)|}*Pq9Emd08{x;#1-vaEI0PaH=3yy`s^g^*ctKi07lI-#}Jo7)W3 zEii(GJZ0Dq5AC$~9UJo0OIt77p_BS)`y~tAw3BY&-%GpcCjMLBF^7ddcCl66cp}9; zSDl`0K)y!}f z>v3=^P+FQ{JprN@V<831i*>sVHUWSl6oO1)?F6W$12Y>Kj*3XjI4fZDuq^YH(SMFb z0c$eQWnk0-qm~t;mAAAuwfkZi&vSh%*o|`~)(j&?|J!U6$gC{hZ1_OVYrC0xy|>Sn zAD#unJRSu}S!h}2Dkz@|8RvO$D8}*gtdv1HG$1vI)G*K{J|6~aMgSD8`7!AhX2LR5 zQjFj#48u+x?#AsPmbH&$O^IA2fH^?4Wn84?bPzlyIA3I{wi>)@5YsQGAs+E66jID& zna-2!Fc-lzE|A#4Tu~&RDIwBl!EBi^gCIBEx3SQ&dU&XO_B&`T%~=ZsRP z+08OnG#M%nC)h{uA_IiOGMPhCTw?Z`z>`CWc~QP-uah#+4MV$z zZZ6na@0_9BvVC^J_fU^qHJ=Gt`4LKGrlY88N6W)exe=9*%8Sg2#*o5Qb?8jh$W~S{ ztGqg6tBtfi*pVzol@rb62s30^>Z;pR+h`d7OI(hmi1TVIO3IwFztQQpDhJ15SUI$I zxUp7q!gf7C38?&)lhB4!?Uhd#99xKzJXR`-F4)gs?2OB)*f|={<6`vb&XcE)|E_i( z?f>QB-Mt+ou|^eBmrRq+YUi@tF0YKi>`3CyV|{e>I}qk~EsuM=hYo6Z&pLkpO%;By z#5keYFB?Uiu9ti0>IaYon=tvV<`e4-@trs+Kes5t#rB%as%6?+$z0t!w@ zasOE z4D%;#In*8Gk9BHsR@{JO_wA%7N zAW(!W;~n0ydMH6YdWUaY#~=PbnDyP=gWdWf>bL0%FfK-@@X-`6h+N=0=tj{u^Ehw5 zAd}NFi6T=7%~6zp#QO5DAk}75b`z23P%%f=kUx+Lxi8xwbbQn`5T82%?xOnlbMLzU zwr?Aaeph~o@0E+o9!1xr&Rfe`J!nqwT9Glpo5DktrrI)(gDi=fVlI71jo~$)jAI5; U|8~9EJCxgsbbany@7Z1J7iPS59RL6T diff --git a/toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/presimple_toyzero.cpython-39.pyc deleted file mode 100644 index 177353aca69f11b142f913dfbbb3c0fcd28a6779..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 3985 zcma)9OK%*<5$>Lu-JM-NL^E>yNPsk8__2nxlw|_}1X^|=Swf6}5=7>*u`?Xb^zP1j zc4pl@D}}`JL;{izIph)`I*~{Hfc%L3g}LSs_?%O6alYz#a7l_lhUnhup6;rzzN)Tj z`YS6PhUX8boBqoc#{Nlz#mB+mOEkTME@UB(S-<$@eanpPzJp&YZcLi}=A_kcar$n@ z?xfvsGfz+BD0{{8PN33h@%=}RJzbu!K9q@6QE=bSd@ZwpL@hqea-i?vMY28*8DR6i z9bS9R20Uzq?r|Fiw8PFZ>o>xsa2dbNa3x&DZ|ga8SlQ8RgiEKQ8L z6i!okK*?d0CI#nY39)g`Na2HEEn^vgN&(j@KMB)GPdtJw^(@kbR8b17ft7;KZ;uk; zLnQ(roCVPy7@znFf>_KnalWgi40pw}3WW;<8)p1ir)Ts$8%4p0He{6_Ws*e{5*NZD zgNJHt>E0OTh-VRq9i+hwG$&`e?#qEci?ci80dz^TNsnfO0nlFX+C7Ae@e;z2*r52X zmQqYsvA8D(DX~arIU_@(OhieR)__b*&GLag1iG+`b{LW$sUjnDR=l;s$M7lmJtR}i3acq6x>4$`s6P};8vQ*8@0Me zQ$p&;qF!D=hjM>9jrR=-N^1*>zUmEom{-B*X{52cSMSxmCnmh^Av|AWH>oz7!I7uQ zsyjF@DWVd%7D;%Z{J=0+p6<)xfIY_lIIe~)A9-ql`r1D2Q{FQNs#d!Bx%i|!OXNun z+a+ZhVT24y-8V$klV?VJWbgtH#E3#_S9~sZE2+EUV^OcUfVW%2SvYm|;;RIn>8?Zi zR`0eEwe>(z71{aM%fHd{k1qmup>)tYGCQ-;^dHe>?1-K4IX~r`&Fwk!ths~Uo;T1t z^CtR6)*QD^+_`hgF>23S=sVfccsW}cuby0+^SO13>JwUjw(w7ZpSiF@8V6p zN!>N-u2YAr8_S&t)xPVfU(#U3h(pYT5iT0}67gOQTIGv+pZ1#W1A6-fy06gmDmrFa z{Q6I>1LiEpI{4s9&P+4vyRr{A-|FT%qf0}mlxI;2jO>uV;A7{=KD5fQ zRgP`^Hqe@AEi`xF{J`ec+#Xtoj=4SBp>vGWexDt2%y8T%8;diz>^jG+`VL*~kj=RI z>vPN8JQw!P9Wdf`j;y~}-(zDz8d>?u%B|k5 zoFj+iOXjj%+(6D;oOd?qz8R#G>0J*y{*A^Cetk8iLzzXHoOoVuldh7nzKfSi!X>l> z;u2cII%aRP1+>U<_TG0x)x&yVZ5E~A45qq`54&jUQ*@Wq5GmVenzD}}W$h3?J;Avg zTc>&C#!el@;Gl6H*}-EYpYL4&1ck6elU3QCC{rJ{CmurC}VutTH|Zhb<4HZ`8r>< zI(*$ac<=uRl5Vpqs#~fJdEGYEE&8tmb*tlf6Wn=YdSCXu?`M8oe!(#h)4=mg;x<`G z-6V`(pvzavDiO#y#u7uO>E5FyhE}T8M1_Ixa_2)7)#~p*t8cn1uB~WIcSXI0pS*z! zAj*V^h~^ZREnh1-U-+-HtTa8;Ub^lZzZAK5z5Z5LGu5C?*UUXK3aS*ST9`HZ7Qogq Xkh_}&3wO!ono5<+@dwJ6LHz#!l_5G# diff --git a/toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/preunaligned_toyzero.cpython-39.pyc deleted file mode 100644 index 399fbc46faa7108d028c225b6fd2c4eeec147f06..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4304 zcma)9OK;rP73L*59M0%r)lM5bXaP2TWzuBix0jWsiz4f;+C`G@Ts}0TC`LKpnfpA?@1AoRtgUqo zJb${_4*qt-F#biI#m7eHS9sMv2sb#3jY0X(2Bz*?0~`Nl+?qNAXWAaL8O>X7+QZM5+S4TVUPt%WFfIye4MJIw$Kp9fN=wAvw_8Ld~6IE zZ*%vo0|Pp|du9w;e1)&#-{EU~9sli*4ci#FP;#Sq?THkBNP;*TC4%p#N5?{@mDIq4 zm}rQS@6nM^~Wl;B<*sjT1adU*ag7KwKdDPP-v z;6=)VU9@9g&c$0`i}`GfVP2Z-J&)qplOmhT#G5CxAd-S>R;Ad!_ZWNR^GIQLlr1eU z7gzdK3Oos9Fcq1QN;7%8d31LcWMgm3Q<>CV()bAtJ%skaE8$y;8%zU+ zXNb8JqbNe%-^PrFk4Ac$4kIHFn;i+L6_+5J?8?HP+Pl)L=~y zC$OUg-!Kj5usgZ**+Vf5=5e;=?Lby(yc*AkL#TNrJ`Vs49S;DGU`+?3UP{iWg&vAw zN`9csk&%T!ew1WsqrN^~Gy$i2&v2}<6Y|4X-^Kn$tmsHb89_ zf#0#H0_G+2=^->7QKkyAR9?sau4V?M5(UMl5bywK3n);)+HRVNw;FMeXQXKmd(HAv zbqEh;v-n7BK}mE$&~N&qKE`D@eikY0?l*f?|9y?C2f*buc9Uo=89wqAS+(U2k=77( zx#WZ$8HCz{)j6&~T}MRcL!GN|FtxJ+q{Ic+H}Q%Rjlyx zIQudg6}CTLsKmal*q zomJCe^jl{RwanY>_ll#trLJ|Vx2n(Ji)1KJJe3x+{OwUE7+u+x9$*#T-|nbyQA-D=JDHm zluDcYO1YHskFL?1aO`sa-0gKG>0Vf|NF@0{-XgL_V|JLrL*LcS2zLYdO`7@^NUtrw zL+$Sq(F@kJ3}BE)xGNPa6vC=ASgjfwO_x1Fl>8BK(ButjeH-LGyy`ZHVVZ2?3)gm- z%WQL%t(&&~KkmIOB)XK;7!2ts*AKN#ygD#+o6g9^2n@~_!@XAaFs6-s!Akp$S#2Uw?wlfjN(f0-`j@=!0O<#;HW&|RIpuKoe?i7|l^IlE(=m}r|-+sZ8TY>u=r zX`KQqRo~1l-3J!(*2n}NFoOR!x43;~WDXKf`_z1BoU#*#x2_p;Xe@Wi6_C;9#+=Dd zt82D>=GJq$L%06j{hcK*lHz2<9vSc^$OvBi^XmXKJ2Bx=QkY&qePZFYhxV!YSM#a8 zR|dM*l4O6uJ}THz(RvQ^(^@g z7R$Rtenw;&H}dBgo8tWsUSmljA%k*-mPnY_5;snOhjV0&iFv^m7iqeb0R@th#!8S~nxu#>fpF8#|$SqxV(>(s+|A(ZW zQ{NJHC7lJ7Iuu~~U_t@z`u;TK^O)MJzW;O{#MKP)2Tw!a*HuU7I=PA6@*zmER#mxB z#4(m!hVciqgzS-R5XxIRTF8k#gEhbZuD|NuaxJ~4wESavX4syeSzpGm6hOTGuKq)^1$BE7vm_bbE~9pb}er~*#;JcyIp$WJ^?^corS;Q J@eS?&F9EA(nKA$X diff --git a/toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc b/toytools/datasets/__pycache__/simple_toyzero.cpython-39.pyc deleted file mode 100644 index bf0b1187278bf465d399533f11b50220dabc0de6..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 4904 zcmai1&5zs073WZtL}|71dhNJzzB)kD)Jk2gpF1PhSw+xCIOdAjwG%r zQl6n~ub@4xfw+ehNN?$B_vk+**B*M{wTHsz9t!l(^!J7z+I6<51U?RjGw;3my-#|} z%WVU{&knD7-<&awzth9fpM{6t;8LH11V+Fj!(q6akvXn8wXx+`jxMG!ERyUNjZ%z*A|~_GL0Dw!WFlU3U;-ffU0qiQOk*I&xJwjJ@b3gGxP?b#IAS z$k4yxrJfS$%K+0a2{<3uv0t-zQlDZVG**roFsBxr`O+A$pc&ZDT89CUGS2B(4*-0k3Wu!?)@OT#jpHqbeno!tz_lSn+sF)1|hRvU(qzu?6bPZLha z;I7f*IeI*ac_7AK9B?`yj}tzTVj>e?C>6%Te!I=_yXQ%7EP%4ok3Xw^`f=i6=T)v! zsUPzVn0hi2Qt)(ys}LIGNbyiX8Q)7q(AQfu@M`2u1o&JCG4GJjU*No+uH-K zCc+N~p$IM#=a|#h@-RX2B&LJroT`<4dzWu5L~P|UNzEH#;7z0S>OzWmI`Lvf1|N8S z3i($_lS+Y&>3Ca6o(#0yi3oV^9!iT9KPfG7w!1!Qtlq7%t*8Brkl$15+@lbW65( z$Z$_W06R$hDPWElvMMK8hpZgwN#w;s!DyQz{S1Ep#rHh*6JVruD3+2}YS6SP$f*nP z$(Y5OqtKinzKR4wF;;^_ zjv?SWbo0n4MHpVJ0J}R$5h7m1tK}X|kZ^A@iFUOXIz2B0-Ruwhn3w+OaYzByulB0` zeeFZn;e(6y73|fyk*bk_ajL5@eFE2mUPps-T_R*jjXWkGEKhZ1)=5X$qm8^RayFNO=*6@Zuemo=*m*+ThQmsGy~@t1Gt_wBEJnk#=a zt`(Qs!5}sEjRQ7khm09>Yi_va&;YGzY6T4A`rHI<%$uOic>~m**FaloduQpOGjANy zs^xhdv^%#!PtA>kl|us@jf2y44-C{Yb3-fETg~cfIuSCnU6(xBb+eXRro*gR)_EH* zgAipZv)o9;*^;}+@L8L(YVOBQ-KBT|H{^QC=$RQ$vPPb@vZWxe0IrOOU^J0KNXWwx zQcEjkr`)pnD4LFL-cPuNYxP>0h43C^7S%#0ZzOaTa#migD2`no#%UCiuooJ(DxEXC zl>CB04cCu6rCj%g@$Xw}qhu`Bc1Ke$9$sF%cmM9EYVFSEm0O=|tigR!)DX286xyjZ zrJIDZbLvlazqE}{&Q$(P45&4{j!V4* zlETD0Y@hA1)SNSNmz~-^IR{*aOg_7EN4Ei-vf$WD{Sl@8qBTKrpvqK6Z-6vT_S@}m z^y-pCWp$U}mgKRL&LLMpdZy&KWsRJT{0Y6#5EPq^M!G+3eku4|v?-$IeMx z(P8E1q^=L=GWHNA0efi4cTzejoz;B9*f(+ZT8hxwsqfPY1j|k%sC{L9Ztj_Tz+$H< zf2%{tjvG2~Y`xmXYH&D9;noT)pjDssH>rtT;DdAvpFxns!!>wJk;RHUqxw4BAN0(u zsYcVmAQD*(4!+TA%eSF(RvUXy?5T)%(!o_qi1gZsB{Zb}l{YvxYf)=rJy^r6U#F{)4`FVeQw_QPTRJ>#qv{hVHz zr%Y!_C&;`Hd&N%KIhQ_*C@JbRicZi)H{v}4Tctk*VYCi2|6|u@KRp?<6ZiWXXE323 zotNxI;i~=y@|^7$^1YPpmCm8h`bCFQ$aCX$<7@K~0)Anm7iMPmFJmXXe6;klPAd4KbBTC}4 zx`7YqT~uvan>BTN_Tgno6=rb^sT1(8F!v{#zXo!mP>sqtywyUxd%)xX(RN%Ule8k2 zutp;t?v;eEX^JHDx_K%i#pHP+Z-8VCf0Uqm$TM7@rm1#F%Iva7ggjM8qx=PpNLbI7 z9Jj2FWG9~Lm~%RLQ%CtvJavkYjPbE7KcsC}h`djP)I({~=HoO$s-kOI7-fbDMF|#p zlC{}6v%}7!K6UZ8akrTLoo$_CU5r=F8UKGa?lp^kFYl{1WEY%!OVn18erT)Ot~*YG zX+-0W>pq%#QSk=3GDv*a)op?iU7WsIoT z(0zu@toM<-7yo~C-?8iV8M|h;B(3bNWOaP2pg$pVIV)vL(=Up#F8g`C*Onp|byllY zRRfhbl&-#~N{P}4q~`Q6Z&zBLq@f%inn zg4J)t=)1V=T2Z?o2oh4r!{RiwugCzh;r68$I^5wd;DsD`8*yI|hlFjw?wPIk2(ga* zK}O@TkdwVy@FQ82Iroo5E@TqdUcMeh+K;7(w7`tv|6Y9+1I_b=(2(L)*lGt3O)kh8 z9gu`ftzYy9rDRH{gjt-Nl52V#*v3^Q#5W#eNuFrNjLSGM2RDuq>A*Osnbzm8ksPXP zYC4R?X`~cm*W~YEe^g|me?BUseE6jQ?&R$&)qivP^yv9u1!_PgJ{?#RFO8ezTzr%3 zAkU5sw5~D+g-^7|80&&TRh-hv-m~LIl zSfnZ3-jaeGNCY8WIFLq#HS(xqtvQ}uS?^Q<3y1qPx;}sdbDz?I)%4wWThpnHTm*Eo zdm?lx^ZBsW1J_Ka{|QEy`$g_2Fu<4tR1aUT;s%a!&}N)*q(`QMO~tS`&H8XQGtJ;G zBve&-d&zLautO_uA?&L8tW7E(fp#3U<$d^?jmn`{>LgV?j3u$g#;uz77FG4rLcog4 VDyp|XtI7fPM@1u$NY|{5?mx4PNO}MO diff --git a/toytools/datasets/presimple_toyzero.py b/toytools/datasets/presimple_toyzero.py index 5432e4e..84149e2 100644 --- a/toytools/datasets/presimple_toyzero.py +++ b/toytools/datasets/presimple_toyzero.py @@ -1,5 +1,7 @@ # pylint: disable=missing-module-docstring import os +import ctypes +import multiprocessing as mp import numpy as np import pandas as pd @@ -53,6 +55,7 @@ def __init__( shuffle = True, transform = None, val_size = 0.2, + is_in_mem = False, ): super().__init__(path) @@ -62,9 +65,12 @@ def __init__( self._transform = transform self._prg = np.random.default_rng(seed) self._val_size = val_size + self._is_in_mem = is_in_mem self._df = pd.read_csv(os.path.join(path, fname), index_col = 'index') - self._split_dataset() + self._df = self._split_dataset() + if is_in_mem: + self._shared_data = self._preload_data() def _split_dataset(self): """Split dataset into training/validation parts.""" @@ -77,12 +83,20 @@ def _split_dataset(self): else: indices = val_indices - self._df = self._df.iloc[indices] + return self._df.iloc[indices] - def __len__(self): - return len(self._df) + def _preload_data(self): + data_sz = len(self._df) + img, _ = self._load_image_pair(0) + h, w = img.shape + shared_alloc = mp.Array(ctypes.c_float, data_sz * 2 * h * w) + shared_data = np.ctypeslib.as_array(shared_alloc.get_obj()) + shared_data = shared_data.reshape(data_sz, 2, h, w) + for i in range(data_sz): + shared_data[i][0], shared_data[i][1] = self._load_image_pair(i) + return shared_data - def __getitem__(self, index): + def _load_image_pair(self, index): sample = self._df.iloc[index] image_fake = load_image(self._path, True, sample.image) @@ -96,8 +110,18 @@ def __getitem__(self, index): images = [ (x - sample.bkg) for x in images ] images = [ x.astype(np.float32) for x in images ] + return images + + def __len__(self): + return len(self._df) + + def __getitem__(self, index): + if self._is_in_mem: + images = [self._shared_data[index][0], self._shared_data[index][1]] + else: + images = self._load_image_pair(index) + if self._transform is not None: images = [ self._transform(x) for x in images ] return images -