From eb3b0a59a7becb0293f67ca164b5607ffe434045 Mon Sep 17 00:00:00 2001 From: John SJ Anderson Date: Wed, 18 Dec 2024 13:51:12 -0800 Subject: [PATCH] Standardize csvtk and tk-utils usage [#23] * Wrap tsv-utils usage in `csv2tk --csv-delim $'\t'` / `csvtk fix-quotes --tabs` * Remove `csvtk fix-quotes` at start of pipeline in "format_ncbi_dataset_report" rule * Remove '-l' flag in "format_ncbi_dataset_report" rule --- ingest/rules/curate.smk | 5 +++-- ingest/rules/fetch_from_ncbi.smk | 8 +++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/ingest/rules/curate.smk b/ingest/rules/curate.smk index d871be2..8c3ae25 100644 --- a/ingest/rules/curate.smk +++ b/ingest/rules/curate.smk @@ -125,6 +125,7 @@ rule subset_curated_metadata_columns: metadata_fields=",".join(config["curate"]["metadata_columns"]), shell: r""" - tsv-select -H -f {params.metadata_fields} \ - {input.metadata} > {output.metadata} + csvtk cut -t -f {params.metadata_fields} \ + {input.metadata} \ + > {output.metadata} """ diff --git a/ingest/rules/fetch_from_ncbi.smk b/ingest/rules/fetch_from_ncbi.smk index 9718dcf..b8f595d 100644 --- a/ingest/rules/fetch_from_ncbi.smk +++ b/ingest/rules/fetch_from_ncbi.smk @@ -89,12 +89,10 @@ rule format_ncbi_dataset_report: --fields {params.ncbi_datasets_fields:q} \ --elide-header \ | csvtk fix-quotes -Ht \ - | csvtk add-header -t -l -n {params.ncbi_datasets_fields:q} \ + | csvtk add-header -t -n {params.ncbi_datasets_fields:q} \ | csvtk rename -t -f accession -n accession_version \ - | csvtk -t mutate -f accession_version -n accession -p "^(.+?)\." \ - | csvtk del-quotes -t \ - | tsv-select -H -f accession --rest last \ - > {output.ncbi_dataset_tsv} + | csvtk -t mutate -f accession_version -n accession -p "^(.+?)\." --at 1 \ + > {output.ncbi_dataset_tsv} """