From 8a9072e6c950c228f58999e77bec3b26370db293 Mon Sep 17 00:00:00 2001 From: Kevin Sayers Date: Thu, 3 Oct 2024 10:06:55 -0600 Subject: [PATCH 1/5] creating samplesheet command --- omics/cli/sample/__init__.py | 0 omics/cli/sample/samplesheet.py | 18 ++++++++++++++++++ 2 files changed, 18 insertions(+) create mode 100644 omics/cli/sample/__init__.py create mode 100644 omics/cli/sample/samplesheet.py diff --git a/omics/cli/sample/__init__.py b/omics/cli/sample/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/omics/cli/sample/samplesheet.py b/omics/cli/sample/samplesheet.py new file mode 100644 index 0000000..81bceae --- /dev/null +++ b/omics/cli/sample/samplesheet.py @@ -0,0 +1,18 @@ +import boto3 + +session = boto3.Session() +omics = session.client("omics") + +response = omics.list_read_sets( + maxResults=100, + sequenceStoreId="3543742895", + filter={ + "status": "ACTIVE" + } +) + + +for read in response['readSets']: + id = read['id'] + _ = omics.get_read_set_metadata(id=id, sequenceStoreId="3543742895") + print(_['files']['source1']['s3Access']['s3Uri']) \ No newline at end of file From c8a7406d390880cfd625915798a0018b983c0736 Mon Sep 17 00:00:00 2001 From: Kevin Sayers Date: Sat, 12 Oct 2024 22:30:34 -0600 Subject: [PATCH 2/5] minimal working sample sheet --- omics/cli/sample/samplesheet.py | 77 ++++++++++++++++++++++++++++----- 1 file changed, 66 insertions(+), 11 deletions(-) diff --git a/omics/cli/sample/samplesheet.py b/omics/cli/sample/samplesheet.py index 81bceae..8325a11 100644 --- a/omics/cli/sample/samplesheet.py +++ b/omics/cli/sample/samplesheet.py @@ -1,18 +1,73 @@ +#!/usr/bin/env python3 +""" +Command-line tool to create a sample sheet + +Usage: omics-samples [] + [--start=] + [--end=] + [--out=] + [--help] + +Options: + -s, --start= Show runs completed after specified date/time (UTC) + -e, --end= Show runs completed before specified date/time (UTC) + -h, --help Show help text + +Examples: + # Create a sample sheet with samples created after the specified date + omics-samples 1234567890 -s 2023-07-01 +""" import boto3 +import docopt +from dateutil import parser session = boto3.Session() omics = session.client("omics") +omics_client = boto3.client("omics") + + +def get_samples(sqnid, filter): + samples = [] + paginator = omics_client.get_paginator("list_read_sets") + + params = {"sequenceStoreId": sqnid, "filter": filter} + + for page in paginator.paginate(**params): + read_sets = page.get("readSets", []) + for read_set in read_sets: + data = omics_client.get_read_set_metadata(id=read_set["id"], sequenceStoreId=sqnid) + sample = data["sampleId"] + uri = data["files"]["source1"]["s3Access"]["s3Uri"] + samples.append(f"{sample},{uri}") + return samples + + +def get_filter(cli_opts) -> dict: + filter = {} + if opts["--start"]: + filter["createdAfter"] = parser.parse(opts["--start"]) + if opts["--end"]: + filter["createdBefore"] = parser.parse(opts["--end"]) + return filter + + +def write_samples(samples, outpath): + with open(outpath, "w") as out: + for sample in samples: + out.write(sample + "\n") + -response = omics.list_read_sets( - maxResults=100, - sequenceStoreId="3543742895", - filter={ - "status": "ACTIVE" - } -) +def main(): + sequence_store = opts[""] + filter = get_filter(opts) + samples = get_samples(sequence_store, filter) + if opts["--out"]: + write_samples(samples, opts["--out"]) + else: + for sample in samples: + print(sample) -for read in response['readSets']: - id = read['id'] - _ = omics.get_read_set_metadata(id=id, sequenceStoreId="3543742895") - print(_['files']['source1']['s3Access']['s3Uri']) \ No newline at end of file +if __name__ == "__main__": + opts = docopt.docopt(__doc__) + main() From e3a7fcbc10f190271a11aebc3807f6e0a506e419 Mon Sep 17 00:00:00 2001 From: Kevin Sayers Date: Sun, 13 Oct 2024 22:32:47 -0600 Subject: [PATCH 3/5] filter active readsets --- omics/cli/sample/samplesheet.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/omics/cli/sample/samplesheet.py b/omics/cli/sample/samplesheet.py index 8325a11..db61705 100644 --- a/omics/cli/sample/samplesheet.py +++ b/omics/cli/sample/samplesheet.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Command-line tool to create a sample sheet +Command-line tool to create a sample sheet from active readsets Usage: omics-samples [] [--start=] @@ -20,11 +20,11 @@ import boto3 import docopt from dateutil import parser +import logging -session = boto3.Session() -omics = session.client("omics") omics_client = boto3.client("omics") +logging.basicConfig(level=logging.INFO) def get_samples(sqnid, filter): samples = [] @@ -35,6 +35,7 @@ def get_samples(sqnid, filter): for page in paginator.paginate(**params): read_sets = page.get("readSets", []) for read_set in read_sets: + logging.info(f"Processing read set {read_set['id']}") data = omics_client.get_read_set_metadata(id=read_set["id"], sequenceStoreId=sqnid) sample = data["sampleId"] uri = data["files"]["source1"]["s3Access"]["s3Uri"] @@ -43,7 +44,9 @@ def get_samples(sqnid, filter): def get_filter(cli_opts) -> dict: - filter = {} + filter = { + "status": "ACTIVE" + } if opts["--start"]: filter["createdAfter"] = parser.parse(opts["--start"]) if opts["--end"]: From 4e5cdc6a24409260e08d61f1197e83f3bc7cc88f Mon Sep 17 00:00:00 2001 From: Kevin Sayers Date: Fri, 18 Oct 2024 22:33:52 -0600 Subject: [PATCH 4/5] adding profile, region, sampleid, and subjectid as options --- omics/cli/sample/samplesheet.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/omics/cli/sample/samplesheet.py b/omics/cli/sample/samplesheet.py index db61705..cc2fab2 100644 --- a/omics/cli/sample/samplesheet.py +++ b/omics/cli/sample/samplesheet.py @@ -6,6 +6,10 @@ [--start=] [--end=] [--out=] + [--profile=] + [--region=] + [--sampleId=] + [--subjectId=] [--help] Options: @@ -18,14 +22,19 @@ omics-samples 1234567890 -s 2023-07-01 """ import boto3 +from botocore.config import Config import docopt from dateutil import parser import logging -omics_client = boto3.client("omics") +opts = docopt.docopt(__doc__) +config = Config(retries={"max_attempts": 10, "mode": "standard"}) +session = boto3.session.Session(profile_name=opts["--profile"], region_name=opts["--region"]) +omics_client = session.client("omics", config=config) logging.basicConfig(level=logging.INFO) + def get_samples(sqnid, filter): samples = [] paginator = omics_client.get_paginator("list_read_sets") @@ -44,9 +53,7 @@ def get_samples(sqnid, filter): def get_filter(cli_opts) -> dict: - filter = { - "status": "ACTIVE" - } + filter = {"status": "ACTIVE"} if opts["--start"]: filter["createdAfter"] = parser.parse(opts["--start"]) if opts["--end"]: @@ -72,5 +79,4 @@ def main(): if __name__ == "__main__": - opts = docopt.docopt(__doc__) main() From 0d2272a3c15714253e1ed7ce83ca7358c1243169 Mon Sep 17 00:00:00 2001 From: Kevin Sayers Date: Fri, 18 Oct 2024 23:01:45 -0600 Subject: [PATCH 5/5] adding options, error handling, and CSV formatting --- omics/cli/sample/samplesheet.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/omics/cli/sample/samplesheet.py b/omics/cli/sample/samplesheet.py index cc2fab2..531e6bb 100644 --- a/omics/cli/sample/samplesheet.py +++ b/omics/cli/sample/samplesheet.py @@ -15,6 +15,11 @@ Options: -s, --start= Show runs completed after specified date/time (UTC) -e, --end= Show runs completed before specified date/time (UTC) + --sampleId= Select the sampleId + --subjectId= Select the subjectId + -o, --out= Write output to file + -p, --profile= AWS profile + -r, --region= AWS region -h, --help Show help text Examples: @@ -26,6 +31,7 @@ import docopt from dateutil import parser import logging +import sys opts = docopt.docopt(__doc__) config = Config(retries={"max_attempts": 10, "mode": "standard"}) @@ -48,7 +54,7 @@ def get_samples(sqnid, filter): data = omics_client.get_read_set_metadata(id=read_set["id"], sequenceStoreId=sqnid) sample = data["sampleId"] uri = data["files"]["source1"]["s3Access"]["s3Uri"] - samples.append(f"{sample},{uri}") + samples.append(f"{sqnid},{sample},{uri}") return samples @@ -58,11 +64,17 @@ def get_filter(cli_opts) -> dict: filter["createdAfter"] = parser.parse(opts["--start"]) if opts["--end"]: filter["createdBefore"] = parser.parse(opts["--end"]) + if opts["--sampleId"]: + filter["sampleId"] = opts["--sampleId"] + if opts["--subjectId"]: + filter["subjectId"] = opts["--subjectId"] return filter def write_samples(samples, outpath): + headers = ["sequenceStoreId", "sampleId", "s3Uri"] with open(outpath, "w") as out: + out.write(",".join(headers) + "\n") for sample in samples: out.write(sample + "\n") @@ -79,4 +91,8 @@ def main(): if __name__ == "__main__": + if opts["--sampleId"]: + if not opts["--subjectId"]: + logging.error("If using --sampleId you must also specify --sampleId") + sys.exit(1) main()