Skip to content

Commit

Permalink
add aditional files for data augmentation
Browse files Browse the repository at this point in the history
  • Loading branch information
Packophys authored and Packophys committed Aug 25, 2023
1 parent c19453d commit 482678e
Show file tree
Hide file tree
Showing 2 changed files with 79 additions and 0 deletions.
41 changes: 41 additions & 0 deletions src/plasticorigins/training/data/DA_for_GenData.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
from pathlib import Path
import argparse
from argparse import Namespace
import os


#############
# COPY AND RENAME OLD TRAIN AND VAL TXT
# FILL NEW TRAIN TXT by excuting :
# python src/plasticorigins/training/data/DA_for_GenData.py --data-dir /datadrive/data/data_20062022 --artificial-data /datadrive/data/artificial_data
# RENAME WITH _DA SUFFIX AFTER
#############


def main(args: Namespace) -> None:
"""Main Function to write new images paths for training.
Args:
args (argparse): list of arguments to build dataset for label mapping and training
"""

data_dir = Path(args.data_dir)

# use data augmentation for artificial data only if original data have been processed
artificial_data_dir = Path(args.artificial_data)
artificial_train_files = [Path(path).as_posix() for path in os.listdir(artificial_data_dir / "images")]

# concatenate original images and artificial data
with open(data_dir / "train.txt", "w") as f:
for path in artificial_train_files:
f.write(path + "\n")


if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Build dataset")
parser.add_argument("--data-dir", type=str, help="path to main data folder")
parser.add_argument("--artificial-data", type=str, help="path to artificial data folder")

args = parser.parse_args()
main(args)
38 changes: 38 additions & 0 deletions src/plasticorigins/training/data/remove_img.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from pathlib import Path
import argparse
from argparse import Namespace
import os
from tqdm import tqdm


#############
# remove by excuting :
# python src/plasticorigins/training/data/remove_img.py --artificial-data /datadrive/data/artificial_data/
#############


def main(args: Namespace) -> None:
"""Main Function to remove data augmentation from Artificial Data.
Args:
args (argparse): list of arguments to build dataset for label mapping and training
"""

# use data augmentation for artificial data only if original data have been processed
artificial_data_dir = Path(args.artificial_data)
artificial_train_files = [(artificial_data_dir / "images" / path).as_posix() for path in os.listdir(artificial_data_dir / "images")]
original_data = [(artificial_data_dir / "images" / path).as_posix() for path in os.listdir(artificial_data_dir / "images") if len(path.split('/')[-1].split('_')) < 3]
data_to_remove = list(set(artificial_train_files) - set(original_data))

# remove data augmentation files
for file in tqdm(data_to_remove):
os.remove(file)


if __name__ == "__main__":

parser = argparse.ArgumentParser(description="Build dataset")
parser.add_argument("--artificial-data", type=str, help="path to artificial data folder")

args = parser.parse_args()
main(args)

0 comments on commit 482678e

Please sign in to comment.