From f7425bf376fb71b27a5d65259e3aab941d178ff4 Mon Sep 17 00:00:00 2001
From: Lucas <lpatel@ucsd.edu>
Date: Mon, 3 Feb 2025 13:46:33 -0800
Subject: [PATCH 1/2] docs: updated README with more examples

---
 README.md | 91 ++++++++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 70 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index 248822c..db4d6e4 100644
--- a/README.md
+++ b/README.md
@@ -19,7 +19,7 @@ available from that resource.
 ## Installation
 
 We currently recommend creating a separate conda environment, and installing
-into that
+into that.
 
 ```bash
 $ conda create -n micov -c conda-forge polars matplotlib scipy click tqdm numba duckdb pyarrow
@@ -45,37 +45,86 @@ $ micov qiita-coverage \
     --samples-to-keep metadata-with-samples-of-interest.tsv
 ```
 
-The above command can be constrained to particular features as well.
+Exising SAM/BAM data can be compressed into a BED-like format. Genome lengths and taxonomy are optional, but useful for downstream analysis:
 
-If instead, the desire is to produce non-cumulative, cumulative and coverage
-maps, the command is slightly restructured. This command as well can be limited
-to specific features.
+```bash
+$ micov compress \
+    --data input.sam \
+    --output compressed.tsv \
+    --lengths genome-lengths.tsv \
+    --taxonomy taxonomy.tsv
+```
+
+Compressed SAM/BAM data can also be piped in:
 
 ```bash
-$ micov per-sample-group \
-    --qiita-coverages /qmounts/qiita_data/BIOM/191463/coverages.tgz \
-    --qiita-coverages /qmounts/qiita_data/BIOM/191556/coverages.tgz \
-    --qiita-coverages /qmounts/qiita_data/BIOM/191575/coverages.tgz \
-    --qiita-coverages /qmounts/qiita_data/BIOM/191879/coverages.tgz \
-    --lengths genome-lengths-in-reference.map \
-    --sample-metadata metadata-with-samples-of-interest.tsv \
-    --sample-metadata-column cool_categorical_variable \
-    --output plots-example 
+$ xzcat some_data.sam.xz | micov compress > compressed_output.tsv
 ```
 
-Exising .SAM/.BAM can be compressed into a BED-like format by file or pipe. A
-pipe example is shown below:
+Generate a coverage visualization for a single sample:
 
 ```bash
-$ xzcat some_data.sam.xz | micov compress | compressed.tsv
+$ micov position-plot \
+    --positions sample_coverage.bed \
+    --output sample_coverage_plot.png \
+    --lengths genome-lengths.tsv
 ```
 
-Compressed BED-like representations can be aggregated into Qiita-like coverage
-files as well:
+Consolidate multiple coverage files into a Qiita-like archive:
 
 ```bash
 $ micov consolidate \
+    --paths file_with_list_of_coverages.txt \
+    --output consolidated_coverages.tgz \
+    --lengths genome-lengths.tsv
+```
+
+Convert Qiita coverage data to Parquet for efficient querying:
+
+```bash
+$ micov qiita-to-parquet \
+    --qiita-coverages /path/to/coverage1.tgz \
+    --qiita-coverages /path/to/coverage2.tgz \
+    --output coverage_data_base \
     --lengths genome-lengths.tsv \
-    --paths a-file-with-a-list-of-paths \
-    --output consolidated.tgz
+    --samples-to-keep sample_metadata.tsv
+```
+
+Generate per-sample group analysis plots from precomputed parquet coverage. Include `--plot` to generate visualizations and `--monte focused` to generate a null coverage curve:
+
+```bash
+$ micov per-sample-group \
+    --parquet-coverage coverage_data_base \
+    --sample-metadata sample_metadata.tsv \
+    --sample-metadata-column experimental_group \
+    --output per_sample_plots \
+    --features-to-keep features_list.tsv \
+    --plot \
+    --monte focused \
+    --monte-iters 100 \
+    --target-names target_names.tsv
+```
+
+Monte Carlo simulation can also be run as a separate command to generate a null coverage curve:
+
+```bash
+$ micov per-sample-monte \
+    --parquet-coverage coverage_data_base \
+    --sample-metadata sample_metadata.tsv \
+    --sample-metadata-column group_column \
+    --output monte_results \
+    --plot \
+    --iters 500 \
+    --target-names target_names.tsv
+```
+
+Analyze coverage distribution by binning the genome positions for a genome of interest:
+
+```bash
+$ micov binning \
+    --covered-positions all_samples_covered_positions.tsv \
+    --outdir binning_results \
+    --genome-id G000005825 \
+    --genome-length 4249288 \
+    --bin-num 1000
 ```

From b4b4e126efcafc7a7e62ff46f6c809e72a6c8fe1 Mon Sep 17 00:00:00 2001
From: Lucas <lpatel@ucsd.edu>
Date: Mon, 3 Feb 2025 17:51:19 -0800
Subject: [PATCH 2/2] fix: typos and filename consistency

---
 README.md | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index db4d6e4..9af41ff 100644
--- a/README.md
+++ b/README.md
@@ -45,12 +45,12 @@ $ micov qiita-coverage \
     --samples-to-keep metadata-with-samples-of-interest.tsv
 ```
 
-Exising SAM/BAM data can be compressed into a BED-like format. Genome lengths and taxonomy are optional, but useful for downstream analysis:
+Existing SAM/BAM data can be compressed into a BED-like format. Genome lengths and taxonomy are optional, but useful for downstream analysis:
 
 ```bash
 $ micov compress \
     --data input.sam \
-    --output compressed.tsv \
+    --output compressed_output.tsv \
     --lengths genome-lengths.tsv \
     --taxonomy taxonomy.tsv
 ```
@@ -65,7 +65,7 @@ Generate a coverage visualization for a single sample:
 
 ```bash
 $ micov position-plot \
-    --positions sample_coverage.bed \
+    --positions covered-positions.tsv \
     --output sample_coverage_plot.png \
     --lengths genome-lengths.tsv
 ```
@@ -74,8 +74,8 @@ Consolidate multiple coverage files into a Qiita-like archive:
 
 ```bash
 $ micov consolidate \
-    --paths file_with_list_of_coverages.txt \
-    --output consolidated_coverages.tgz \
+    --paths /path/to/coverage/files \
+    --output consolidated_coverages \
     --lengths genome-lengths.tsv
 ```