fix packaging issue; update metadata table docs and usage help

phac-nml · Oct 13, 2019 · f653c2f · f653c2f
1 parent 34d3a46
commit f653c2f
Show file tree

Hide file tree

Showing 4 changed files with 45 additions and 7 deletions.
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -3,5 +3,6 @@ include README.rst
 include MANIFEST.in
 include setup.py
 recursive-include *.py *.fasta
+exclude tests
 exclude ipynbs
 exclude venv
diff --git a/README.rst b/README.rst
@@ -269,13 +269,51 @@ Analysis of all FASTA/FASTQ files in a directory
 Metadata addition to analysis
 -----------------------------
 
-*Works with any of the analyses above
+Add subtype metadata to your analysis results with `-M your-subtype-metadata.tsv`:
 
 .. code-block:: bash
 
-    hansel -s heidelberg -M <metadata.tsv> -vv --threads <n_cpu> -o results.tab -O match_results.tab -D /path/to/fastas_or_fastqs/
-
-``biohansel`` works best on TSV metadata files. If possible, use a tab separated metadata file or your analysis may fail. 
+    hansel -s heidelberg \
+      -M your-subtype-metadata.tsv \
+      -o results.tab \
+      -O match_results.tab \
+      -D ~/your-reads-directory/
+
+Your metadata table **must** contain a field with the field name `subtype`, e.g.
+
+.. list-table::
+   :header-rows: 1
+
+   * - subtype
+     - host_association
+     - geoloc
+     - genotype_alternative
+   * - 1
+     - human
+     - Canada
+     - A
+   * - 2
+     - cow
+     - USA
+     - B
+
+``biohansel`` accepts metadata table files with the following formats and extensions:
+
+.. list-table:: 
+   :header-rows: 1
+
+   * - Format
+     - Extension
+     - Example Filename
+   * - Tab-delimited table/tab-separated values (TSV)
+     - `.tsv`
+     - `my-metadata-table.tsv`
+   * - Tab-delimited table/tab-separated values (TSV)
+     - `.tab`
+     - `my-metadata-table.tab`
+   * - Comma-separated values (CSV)
+     - `.csv`
+     - `my-metadata-table.csv`
 
 
 Development

diff --git a/bio_hansel/main.py b/bio_hansel/main.py
@@ -51,7 +51,7 @@ def init_parser():
     parser.add_argument('--scheme-name',
                         help='Custom user-specified SNP substyping scheme name')
     parser.add_argument('-M', '--scheme-metadata',
-                        help='Scheme subtype metadata table (.TSV format accepted; contain column called "subtype")')
+                        help='Scheme subtype metadata table (tab-delimited file with ".tsv" or ".tab" extension or CSV with ".csv" extension format accepted; MUST contain column called "subtype")')
     parser.add_argument('-p', '--paired-reads',
                         nargs=2,
                         metavar=('forward_reads', 'reverse_reads'),

diff --git a/setup.py b/setup.py
@@ -38,8 +38,7 @@
     long_description=readme,
     name='bio_hansel',
     package_data={'bio_hansel': ['data/*/*.fasta', 'data/*/*.tsv',]},
-    package_dir={'bio_hansel': 'bio_hansel'},
-    packages=find_packages(include=['bio_hansel']),
+    packages=find_packages(exclude=['test_*.py', 'tests']),
     setup_requires=setup_requirements,
     test_suite='tests',
     tests_require=test_requirements,