Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pango build for Nextclade #15

Draft
wants to merge 21 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
99c8fbf
feat: join pango designations onto metadata
corneliusroemer Nov 1, 2021
cfb73d5
feat: join pango designation lineage onto metadata
corneliusroemer Nov 4, 2021
ec4bab6
feat: make pango designation joins part of preprocess
corneliusroemer Nov 4, 2021
956fbf3
feat: create filter rules for pango build
corneliusroemer Nov 4, 2021
fa6da7d
fix: add distance map configs to default/parameters.yaml
corneliusroemer Nov 4, 2021
e8ba420
fix: distance_map -> distance_maps typo
corneliusroemer Nov 4, 2021
804c49c
feat: add pango-cluster to profiles
corneliusroemer Nov 4, 2021
eab2374
fix: use proper auspice_config for pango build
corneliusroemer Nov 4, 2021
6a00fe3
chore: dos2unix since our profiles were sadly dosed
corneliusroemer Nov 4, 2021
0c1b3b8
fix: config overwrites keys not appends
corneliusroemer Nov 4, 2021
ef786ba
feat: add designation coloring
corneliusroemer Nov 4, 2021
fab6f36
chore: merge in from master
corneliusroemer Nov 4, 2021
618930b
feat: sophisticated pango sampling
corneliusroemer Nov 8, 2021
0f70a73
fix: add download_pango_designations to localrules
corneliusroemer Nov 8, 2021
3eee392
fix: add new designation files to builds.yaml for profile/basel-combined
corneliusroemer Nov 9, 2021
42e4928
fix: add designation origin to basel-combined builds.yaml
corneliusroemer Nov 9, 2021
42ec77a
fix: bug in metadata download rule output
corneliusroemer Nov 9, 2021
e8190ae
fix: bug in metadata download rule output number 2
corneliusroemer Nov 9, 2021
1c49e22
fix: bug in metadata download rule output number 3
corneliusroemer Nov 9, 2021
620e79c
fix: add deploy url for pango builds
corneliusroemer Nov 10, 2021
e12ceac
Merge branch 'feat/pango-build' of https://github.com/neherlab/ncov-s…
corneliusroemer Nov 10, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ stats.json
log/*
!log/placeholder_for_sbatch_output
deployed/*
freezed


.vscode/*
.DS_Store
Expand Down
34 changes: 30 additions & 4 deletions defaults/parameters.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,28 @@ files:
annotation: "defaults/annotation.gff"
include: "defaults/include.txt"
color_schemes: "defaults/color_schemes.tsv"
clades: "defaults/clades.tsv"
ordering: "defaults/color_ordering.tsv"
lat_longs: "defaults/lat_longs.tsv"
auspice_config: "defaults/auspice_config.json"
description: "defaults/description.md"
mut_fit: "defaults/mutational_fitness_distance_map.json"
clades: "builds/clades.tsv"
ordering: "builds/color_ordering.tsv"
lat_longs: "builds/lat_longs.tsv"
mut_fit: "builds/mutational_fitness_distance_map.json"
pango_designations: "builds/pango_designations.csv"
metadata_designated: "builds/metadata_designated.tsv"

data_source:
clades: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/clades.tsv"
lat_longs: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/lat_longs.tsv"
color_ordering: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/color_ordering.tsv"
mut_fit: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/mutational_fitness_distance_map.json"
pango_designations: "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/lineages.csv"

origins:
gisaid:
metadata: "s3://nextstrain-ncov-private/metadata.tsv.gz"
sequences: "s3://nextstrain-ncov-private/sequences.fasta.xz"
exclude: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/exclude.txt"
filters: "--min-length 27000 --min-date 2019-12-01"

tree:
tree-builder-args: "'-ninit 10 -n 4 -czb'"
Expand Down Expand Up @@ -84,3 +100,13 @@ traits:
sampling_bias_correction: 2.5
columns: ["country"]

distances:
comparisons: ['root', 'root', 'root', 'root', 'root', 'root']
attributes: ['S1_mutations', 'DMS_convalescent_serum', 'DMS_Class_1', 'DMS_Class_2', 'DMS_Class_3', 'ACE2_binding_site_mutations']
maps:
- "defaults/distance_maps/S1.json"
- "defaults/distance_maps/convalescent_serum_mean_dms.json"
- "defaults/distance_maps/class_1_mean_dms.json"
- "defaults/distance_maps/class_2_mean_dms.json"
- "defaults/distance_maps/class_3_mean_dms.json"
- "defaults/distance_maps/ace2.json"
10 changes: 3 additions & 7 deletions profiles/basel-combined/builds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,19 +8,15 @@ files:
ordering: "builds/color_ordering.tsv"
lat_longs: "builds/lat_longs.tsv"
mut_fit: "builds/mutational_fitness_distance_map.json"
pango_designations: "builds/pango_designations.csv"
metadata_designated: "builds/metadata_designated.tsv"

data_source:
clades: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/clades.tsv"
lat_longs: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/lat_longs.tsv"
color_ordering: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/color_ordering.tsv"
mut_fit: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/mutational_fitness_distance_map.json"

origins:
gisaid:
metadata: "s3://nextstrain-ncov-private/metadata.tsv.gz"
sequences: "s3://nextstrain-ncov-private/sequences.fasta.xz"
exclude: "https://raw.githubusercontent.com/nextstrain/ncov/master/defaults/exclude.txt"
filters: "--min-length 27000 --min-date 2019-12-01"
pango_designations: "https://raw.githubusercontent.com/cov-lineages/pango-designation/master/lineages.csv"

build_dir: "builds-combined"
auspice_dir: "auspice-combined"
Expand Down
270 changes: 135 additions & 135 deletions profiles/basel-countries/auspice_config.json
Original file line number Diff line number Diff line change
@@ -1,135 +1,135 @@
{
"title": "Genomic epidemiology of SARS-CoV-2 in Europe",
"build_url": "https://github.com/neherlab/ncov-simple",
"maintainers": [
{ "name": "Cornelius Roemer", "url": "https://neherlab.org" },
{ "name": "Richard Neher", "url": "https://neherlab.org" }
],
"data_provenance": [
{
"name": "GISAID"
}
],
"colorings": [
{
"key": "country",
"title": "Country",
"type": "categorical"
},
{
"key": "division",
"title": "Admin Division",
"type": "categorical"
},
{
"key": "pango_lineage",
"title": "PANGO Lineage by GISAID",
"type": "categorical"
},
{
"key": "pango_default",
"title": "PANGO Lineage by Pangolin",
"type": "categorical"
},
{
"key": "pango_usher",
"title": "PANGO Lineage by Usher",
"type": "categorical"
},
{
"key": "S1_mutations",
"title": "S1 mutations",
"type": "continuous"
},
{
"key": "GISAID_clade",
"title": "GISAID Clade",
"type": "categorical"
},
{
"key": "subclade_membership",
"title": "Emerging clade",
"type": "categorical"
},
{
"key": "region",
"title": "Region",
"type": "categorical"
},
{
"key": "host",
"title": "Host",
"type": "categorical"
},
{
"key": "age",
"title": "Age",
"type": "continuous"
},
{
"key": "sex",
"title": "Sex",
"type": "categorical"
},
{
"key": "author",
"title": "Authors",
"type": "categorical"
},
{
"key": "originating_lab",
"title": "Originating Lab",
"type": "categorical"
},
{
"key": "submitting_lab",
"title": "Submitting Lab",
"type": "categorical"
},
{
"key": "recency",
"title": "Submission Date",
"type": "categorical"
},
{
"key": "country_exposure",
"title": "Country of exposure",
"type": "categorical"
},
{
"key": "division_exposure",
"title": "Division of exposure",
"type": "categorical"
},
{
"key": "region_exposure",
"title": "Region of exposure",
"type": "categorical"
}
],
"geo_resolutions": ["location", "division", "country", "region"],
"display_defaults": {
"color_by": "clade_membership",
"distance_measure": "num_date",
"geo_resolution": "country",
"map_triplicate": true,
"branch_label": "clade",
"transmission_lines": false
},
"filters": [
"recency",
"region",
"country",
"division",
"location",
"host",
"S1_mutations",
"pango_lineage",
"pango_default",
"pango_usher",
"clade_membership",
"emerging_lineage",
"author"
],
"panels": ["tree", "map", "entropy", "frequencies"]
}
{
"title": "Genomic epidemiology of SARS-CoV-2 in Europe",
"build_url": "https://github.com/neherlab/ncov-simple",
"maintainers": [
{ "name": "Cornelius Roemer", "url": "https://neherlab.org" },
{ "name": "Richard Neher", "url": "https://neherlab.org" }
],
"data_provenance": [
{
"name": "GISAID"
}
],
"colorings": [
{
"key": "country",
"title": "Country",
"type": "categorical"
},
{
"key": "division",
"title": "Admin Division",
"type": "categorical"
},
{
"key": "pango_lineage",
"title": "PANGO Lineage by GISAID",
"type": "categorical"
},
{
"key": "pango_default",
"title": "PANGO Lineage by Pangolin",
"type": "categorical"
},
{
"key": "pango_usher",
"title": "PANGO Lineage by Usher",
"type": "categorical"
},
{
"key": "S1_mutations",
"title": "S1 mutations",
"type": "continuous"
},
{
"key": "GISAID_clade",
"title": "GISAID Clade",
"type": "categorical"
},
{
"key": "subclade_membership",
"title": "Emerging clade",
"type": "categorical"
},
{
"key": "region",
"title": "Region",
"type": "categorical"
},
{
"key": "host",
"title": "Host",
"type": "categorical"
},
{
"key": "age",
"title": "Age",
"type": "continuous"
},
{
"key": "sex",
"title": "Sex",
"type": "categorical"
},
{
"key": "author",
"title": "Authors",
"type": "categorical"
},
{
"key": "originating_lab",
"title": "Originating Lab",
"type": "categorical"
},
{
"key": "submitting_lab",
"title": "Submitting Lab",
"type": "categorical"
},
{
"key": "recency",
"title": "Submission Date",
"type": "categorical"
},
{
"key": "country_exposure",
"title": "Country of exposure",
"type": "categorical"
},
{
"key": "division_exposure",
"title": "Division of exposure",
"type": "categorical"
},
{
"key": "region_exposure",
"title": "Region of exposure",
"type": "categorical"
}
],
"geo_resolutions": ["location", "division", "country", "region"],
"display_defaults": {
"color_by": "clade_membership",
"distance_measure": "num_date",
"geo_resolution": "country",
"map_triplicate": true,
"branch_label": "clade",
"transmission_lines": false
},
"filters": [
"recency",
"region",
"country",
"division",
"location",
"host",
"S1_mutations",
"pango_lineage",
"pango_default",
"pango_usher",
"clade_membership",
"emerging_lineage",
"author"
],
"panels": ["tree", "map", "entropy", "frequencies"]
}
Loading