diff --git a/cubi_tk/sodar/ingest.py b/cubi_tk/sodar/ingest.py index 2ed09d59..3d43d0e1 100644 --- a/cubi_tk/sodar/ingest.py +++ b/cubi_tk/sodar/ingest.py @@ -81,6 +81,13 @@ def setup_argparse(cls, parser: argparse.ArgumentParser) -> None: action="store_true", help="Recursively match files in subdirectories. Creates iRODS sub-collections to match directory structure.", ) + parser.add_argument( + "-e", + "--exclude", + nargs="+", + type=list, + help="Exclude files by defining one or multiple glob-style patterns.", + ) parser.add_argument( "-K", "--remote-checksums", @@ -276,11 +283,15 @@ def build_file_list(self): if src.is_dir(): paths = abspath.glob("**/*" if self.args.recursive else "*") + excludes = self.args.exclude for p in paths: + if excludes and any([p.match(e) for e in excludes]): + continue if p.is_file() and not p.suffix.lower() == ".md5": output_paths.append({"spath": p, "ipath": p.relative_to(abspath)}) else: - output_paths.append({"spath": src, "ipath": Path(src.name)}) + if not any([src.match(e) for e in excludes if e]): + output_paths.append({"spath": src, "ipath": Path(src.name)}) return output_paths def build_jobs(self, source_paths) -> typing.Set[TransferJob]: diff --git a/tests/test_sodar_ingest.py b/tests/test_sodar_ingest.py index da2ab234..bc5d05d2 100644 --- a/tests/test_sodar_ingest.py +++ b/tests/test_sodar_ingest.py @@ -75,8 +75,9 @@ class DummyArgs(object): fs.create_symlink("/loop_src2", "/loop_src") args = DummyArgs() - args.sources = ["broken_link", "not_here", "loop_src", "testdir"] + args.sources = ["broken_link", "not_here", "loop_src", "testdir", "testdir", "file5"] args.recursive = True + args.exclude = ["file4", "file5"] dummy = MagicMock() args_mock = PropertyMock(return_value=args) type(dummy).args = args_mock @@ -86,6 +87,8 @@ class DummyArgs(object): fs.create_file("/testdir/file1.md5") fs.create_file("/testdir/subdir/file2") fs.create_file("/file3") + fs.create_file("/testdir/file4") + fs.create_file("/file5") fs.create_symlink("/testdir/file3", "/file3") paths = SodarIngest.build_file_list(dummy) @@ -120,6 +123,8 @@ class DummyArgs(object): "ipath": Path("subdir/file2"), } not in paths assert {"spath": Path("/testdir/file3"), "ipath": Path("file3")} in paths + assert {"spath": Path("/testdir/file4"), "ipath": Path("file4")} not in paths + assert {"spath": Path("file5"), "ipath": Path("file5")} not in paths @patch("cubi_tk.sodar.ingest.sorted")