Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP BCF 2.2 #1550

Draft
wants to merge 22 commits into
base: cn_vcf_header
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
bf8d8da
Commit with raw GATK SequenceDictionaryUtils and SequenceDictionaryUt…
cmnbroad Nov 8, 2021
f66252a
VCFHeader and VCFHeaderLine refactoring to enable support for VCF4.3/…
cmnbroad Nov 8, 2021
3d08ef8
Eliminate redundant modeling of VCFHeaderVersion in VCFHeader.
cmnbroad Nov 15, 2021
ca31a2b
Eliminate redundant modeling of file format lines in VCFMetaDataLines.
cmnbroad Nov 15, 2021
d233891
More code review comments.
cmnbroad Nov 15, 2021
e917800
Add VCF 4.3 writing
Apr 15, 2021
6a2c193
BCF 2.2 writing WIP
Apr 15, 2021
8c47db2
Make scripts/install-bcftools.sh executable
Nov 29, 2021
eede351
Add installing bcftools step in github workflow
Nov 29, 2021
ed64146
Update to bcftools 1.14, set env variable in github workflow
Nov 30, 2021
f6fcac6
Add tests for BCF2Dictionary, refactor BCF2WriterUnitTest
Nov 30, 2021
9b2d77c
Begin removing BCF 2.1, update tests files
Dec 6, 2021
b6559a9
Cleanup various TODOs, remove/fix failing tests
Dec 7, 2021
44d3f34
Fully remove BCF 2.1 encoder and decoder
Dec 7, 2021
f795133
Tag BCF lazy data with version, only use lazy data in BCF2Writer if v…
Dec 7, 2021
b5b2649
Fix BCF lazy data version checking and genotype key computation
Dec 8, 2021
65cd7f7
Match bcftools behavior when writing empty vectors
Dec 9, 2021
56a07db
Fix spotbugs warning
Dec 10, 2021
e917a3e
Clean up BCF2Encoder, better error in BCF2FieldEncoder
Dec 10, 2021
4af3939
Cleanup in BCF code
Dec 14, 2021
a991f7e
Add disabled test for missing Character and String VCF types
Dec 22, 2021
b54ae82
Change BCF2Dictionary interface to be immutable
Dec 23, 2021
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ jobs:
test:
env:
HTSJDK_SAMTOOLS_BIN: /usr/bin/samtools
HTSJDK_BCFTOOLS_BIN: /usr/bin/bcftools
runs-on: ubuntu-latest
strategy:
matrix:
Expand All @@ -36,6 +37,8 @@ jobs:
run: ./gradlew compileJava
- name: Install Samtools
run: scripts/install-samtools.sh
- name: Install Bcftools
run: scripts/install-bcftools.sh
- name: Start the htsget server
run: scripts/htsget-scripts/start-htsget-test-server.sh
- name: Run tests
Expand Down
2 changes: 2 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ cache:
env:
global:
- HTSJDK_SAMTOOLS_BIN=/usr/bin/samtools
- HTSJDK_BCFTOOLS_BIN=/usr/bin/bcftools
jdk:
- oraclejdk8
- openjdk8
Expand All @@ -32,6 +33,7 @@ matrix:

before_install:
- scripts/install-samtools.sh
- scripts/install-bcftools.sh
- scripts/htsget-scripts/start-htsget-test-server.sh

script:
Expand Down
5 changes: 5 additions & 0 deletions scripts/install-bcftools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#!/bin/sh
set -ex
wget https://github.com/samtools/bcftools/releases/download/1.14/bcftools-1.14.tar.bz2
tar -xjvf bcftools-1.14.tar.bz2
cd bcftools-1.14 && ./configure --prefix=/usr && make && sudo make install
17 changes: 17 additions & 0 deletions src/main/java/htsjdk/samtools/Defaults.java
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package htsjdk.samtools;

import htsjdk.samtools.util.Log;
import htsjdk.variant.variantcontext.writer.VCFVersionUpgradePolicy;

import java.io.File;
import java.util.Collections;
Expand Down Expand Up @@ -110,6 +111,16 @@ public class Defaults {
*/
public static final boolean DISABLE_SNAPPY_COMPRESSOR;

/**
* Strict VCF version validation. Default = true.
*/
public static final boolean STRICT_VCF_VERSION_VALIDATION;

/**
* How to treat files from VCF versions older than the current version. Default = UPGRADE_OR_FALLBACK
*/
public static final VCFVersionUpgradePolicy VCF_VERSION_TRANSITION_POLICY;


public static final String SAMJDK_PREFIX = "samjdk.";
static {
Expand All @@ -134,6 +145,11 @@ public class Defaults {
SAM_FLAG_FIELD_FORMAT = SamFlagField.valueOf(getStringProperty("sam_flag_field_format", SamFlagField.DECIMAL.name()));
SRA_LIBRARIES_DOWNLOAD = getBooleanProperty("sra_libraries_download", false);
DISABLE_SNAPPY_COMPRESSOR = getBooleanProperty(DISABLE_SNAPPY_PROPERTY_NAME, false);
STRICT_VCF_VERSION_VALIDATION = getBooleanProperty("strict_version_validation", true);
VCF_VERSION_TRANSITION_POLICY = VCFVersionUpgradePolicy.valueOf(getStringProperty(
"vcf_version_transition_policy",
VCFVersionUpgradePolicy.UPGRADE_OR_FALLBACK.name()
));
}

/**
Expand All @@ -157,6 +173,7 @@ public static SortedMap<String, Object> allDefaults(){
result.put("CUSTOM_READER_FACTORY", CUSTOM_READER_FACTORY);
result.put("SAM_FLAG_FIELD_FORMAT", SAM_FLAG_FIELD_FORMAT);
result.put("DISABLE_SNAPPY_COMPRESSOR", DISABLE_SNAPPY_COMPRESSOR);
result.put("VCF_VERSION_TRANSITION_POLICY", VCF_VERSION_TRANSITION_POLICY);
return Collections.unmodifiableSortedMap(result);
}

Expand Down
15 changes: 15 additions & 0 deletions src/main/java/htsjdk/samtools/SAMSequenceDictionary.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,13 @@ public SAMSequenceDictionary(final List<SAMSequenceRecord> list) {
setSequences(list);
}

//TODO: this returns sequences in the internal list order instead of
// honoring each sequence's contigIndex
/**
* Get a list of sequences for this dictionary.
* @return the list of sequences for this dictionary in internal order (the order in which the sequences
* were added to this dictionary)
*/
public List<SAMSequenceRecord> getSequences() {
return Collections.unmodifiableList(mSequences);
}
Expand All @@ -75,6 +82,14 @@ public void setSequences(final List<SAMSequenceRecord> list) {
list.forEach(this::addSequence);
}

/**
* Add a sequence to the dictionary.
* @param sequenceRecord the sequence record to add - note that this method mutates the contig
* index of the sequenceRecord to match the newly added record's relative
* order in the list
*/
//TODO: this method ignores (and actually mutates) the sequenceRecord's contig index to make it match
// the record's relative placement in the dictionary's internal list
public void addSequence(final SAMSequenceRecord sequenceRecord) {
if (mSequenceMap.containsKey(sequenceRecord.getSequenceName())) {
throw new IllegalArgumentException("Cannot add sequence that already exists in SAMSequenceDictionary: " +
Expand Down
Loading