From 5a7e4d7b82a26d7c3ac236ce2d9ab959d89f5871 Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Wed, 25 May 2022 13:50:27 -0700
Subject: [PATCH 1/2] Revise mem_mb definitions to always result in an integer
 > 0
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously they would result in mem_mb=0 when the input size in GiB was
less than 1, as the int() would truncate that to 0 which then was
propagated to the final result by multiplication via the constant
scaling factor.  This only impacted smaller builds, such as our CI, but
good to fix anyway.

Use ceil() instead of int() to ensure that we always at least result in
mem_mb=1 (although almost every process we run is going to have a larger
memory footprint than that in reality, so there's still a bit of
inaccuracy…).
---
 .../snakemake_rules/export_for_nextstrain.smk    |  3 ++-
 workflow/snakemake_rules/main_workflow.smk       | 16 +++++++++-------
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/workflow/snakemake_rules/export_for_nextstrain.smk b/workflow/snakemake_rules/export_for_nextstrain.smk
index 8655a84bc..ae1eab1c4 100644
--- a/workflow/snakemake_rules/export_for_nextstrain.smk
+++ b/workflow/snakemake_rules/export_for_nextstrain.smk
@@ -22,6 +22,7 @@
 import re
 import requests
 import json
+from math import ceil
 from workflow.lib.persistent_dict import PersistentDict, NoSuchEntryError
 
 ruleorder: dated_json > finalize
@@ -80,7 +81,7 @@ rule export_all_regions:
         # Memory use scales primarily with the size of the metadata file.
         # Compared to other rules, this rule loads metadata as a pandas
         # DataFrame instead of a dictionary, so it uses much less memory.
-        mem_mb=lambda wildcards, input: 5 * int(input.metadata.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(5 * (input.metadata.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk
index d93d1e7c9..fbcbabc76 100644
--- a/workflow/snakemake_rules/main_workflow.smk
+++ b/workflow/snakemake_rules/main_workflow.smk
@@ -1,3 +1,5 @@
+from math import ceil
+
 rule sanitize_metadata:
     input:
         metadata=lambda wildcards: _get_path_for_input("metadata", wildcards.origin)
@@ -803,7 +805,7 @@ rule tree:
         # Multiple sequence alignments can use up to 40 times their disk size in
         # memory, especially for larger alignments.
         # Note that Snakemake >5.10.0 supports input.size_mb to avoid converting from bytes to MB.
-        mem_mb=lambda wildcards, input: 40 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(40 * (input.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
@@ -839,7 +841,7 @@ rule refine:
         # Multiple sequence alignments can use up to 15 times their disk size in
         # memory.
         # Note that Snakemake >5.10.0 supports input.size_mb to avoid converting from bytes to MB.
-        mem_mb=lambda wildcards, input: 15 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(15 * (input.size / 1024 / 1024))
     params:
         root = config["refine"]["root"],
         clock_rate = config["refine"]["clock_rate"],
@@ -893,7 +895,7 @@ rule ancestral:
         # Multiple sequence alignments can use up to 15 times their disk size in
         # memory.
         # Note that Snakemake >5.10.0 supports input.size_mb to avoid converting from bytes to MB.
-        mem_mb=lambda wildcards, input: 15 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(15 * (input.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
@@ -924,7 +926,7 @@ rule translate:
         # Multiple sequence alignments can use up to 15 times their disk size in
         # memory.
         # Note that Snakemake >5.10.0 supports input.size_mb to avoid converting from bytes to MB.
-        mem_mb=lambda wildcards, input: 15 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(15 * (input.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
@@ -1055,7 +1057,7 @@ rule clades:
         "benchmarks/clades_{build_name}.txt"
     resources:
         # Memory use scales primarily with size of the node data.
-        mem_mb=lambda wildcards, input: 3 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(3 * (input.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
@@ -1081,7 +1083,7 @@ rule emerging_lineages:
         "benchmarks/emerging_lineages_{build_name}.txt"
     resources:
         # Memory use scales primarily with size of the node data.
-        mem_mb=lambda wildcards, input: 3 * int(input.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(3 * (input.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """
@@ -1125,7 +1127,7 @@ rule colors:
         # Memory use scales primarily with the size of the metadata file.
         # Compared to other rules, this rule loads metadata as a pandas
         # DataFrame instead of a dictionary, so it uses much less memory.
-        mem_mb=lambda wildcards, input: 5 * int(input.metadata.size / 1024 / 1024)
+        mem_mb=lambda wildcards, input: ceil(5 * (input.metadata.size / 1024 / 1024))
     conda: config["conda_environment"]
     shell:
         """

From acb198eefe99ec21d164d31ec96e7b89731f74b0 Mon Sep 17 00:00:00 2001
From: Thomas Sibley <tsibley@fredhutch.org>
Date: Wed, 25 May 2022 14:00:17 -0700
Subject: [PATCH 2/2] Pass the tree rule's mem_mb through to IQ-TREE

While most of our mem_mb definitions are only heuristics for Snakemake's
scheduler and the commands themselves aren't limited or aware of the
mem_mb defined, IQ-TREE *does* support memory limits.

Resolves <https://github.com/nextstrain/ncov/issues/949>.
---
 workflow/snakemake_rules/main_workflow.smk | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/workflow/snakemake_rules/main_workflow.smk b/workflow/snakemake_rules/main_workflow.smk
index fbcbabc76..13ef8e3f7 100644
--- a/workflow/snakemake_rules/main_workflow.smk
+++ b/workflow/snakemake_rules/main_workflow.smk
@@ -811,7 +811,7 @@ rule tree:
         """
         augur tree \
             --alignment {input.alignment} \
-            --tree-builder-args {params.args} \
+            --tree-builder-args {params.args}' --mem {resources.mem_mb}M' \
             {params.exclude_sites} \
             --output {output.tree} \
             --nthreads {threads} 2>&1 | tee {log}