-
Notifications
You must be signed in to change notification settings - Fork 244
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Support reference bundles. #1713
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -9,9 +9,18 @@ | |||||
import htsjdk.beta.plugin.hapref.HaploidReferenceCodec; | ||||||
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoder; | ||||||
import htsjdk.beta.plugin.hapref.HaploidReferenceDecoderOptions; | ||||||
import htsjdk.io.HtsPath; | ||||||
import htsjdk.io.IOPath; | ||||||
import htsjdk.samtools.reference.ReferenceSequenceFileFactory; | ||||||
import htsjdk.samtools.util.GZIIndex; | ||||||
import htsjdk.samtools.util.IOUtil; | ||||||
import htsjdk.utils.ValidationUtils; | ||||||
|
||||||
import java.io.IOException; | ||||||
import java.nio.file.Files; | ||||||
import java.nio.file.Path; | ||||||
import java.util.function.Function; | ||||||
|
||||||
/** | ||||||
* Class with methods for resolving inputs and outputs to haploid reference encoders and decoders. | ||||||
* <p> | ||||||
|
@@ -66,9 +75,7 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder( | |||||
ValidationUtils.nonNull(inputPath, "Input path"); | ||||||
ValidationUtils.nonNull(HaploidReferenceDecoderOptions, "Decoder options"); | ||||||
|
||||||
final Bundle referenceBundle = new BundleBuilder().addPrimary( | ||||||
new IOPathResource(inputPath, BundleResourceType.CT_HAPLOID_REFERENCE)).build(); | ||||||
|
||||||
final Bundle referenceBundle = referenceBundleFromFastaPath(inputPath, HtsPath::new); | ||||||
return getHaploidReferenceDecoder(referenceBundle, HaploidReferenceDecoderOptions); | ||||||
} | ||||||
|
||||||
|
@@ -110,4 +117,47 @@ public HaploidReferenceDecoder getHaploidReferenceDecoder( | |||||
return (HaploidReferenceDecoder) resolveForDecoding(inputBundle).getDecoder(inputBundle, HaploidReferenceDecoderOptions); | ||||||
} | ||||||
|
||||||
/** | ||||||
* Create q reference bundle given only a fasta path, including an index and a dictionary | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* file if they are present and located in the same directory as the fasta. | ||||||
* | ||||||
* @param fastaPath location of the fasta | ||||||
* @param ioPathConstructor a constructor used to create IOPath-derived objects for the bundle | ||||||
* @return a reference Bundle | ||||||
* @param <T> | ||||||
*/ | ||||||
public static <T extends IOPath> Bundle referenceBundleFromFastaPath(final IOPath fastaPath, final Function<String, T> ioPathConstructor) { | ||||||
final BundleBuilder referenceBundleBuilder = new BundleBuilder(); | ||||||
referenceBundleBuilder.addPrimary(new IOPathResource(fastaPath, BundleResourceType.CT_HAPLOID_REFERENCE)); | ||||||
|
||||||
final Path dictPath = ReferenceSequenceFileFactory.getDefaultDictionaryForReferenceSequence(fastaPath.toPath()); | ||||||
if (Files.exists(dictPath)) { | ||||||
referenceBundleBuilder.addSecondary( | ||||||
new IOPathResource( | ||||||
ioPathConstructor.apply(dictPath.toUri().toString()), | ||||||
BundleResourceType.CT_REFERENCE_DICTIONARY)); | ||||||
} | ||||||
|
||||||
final Path idxPath = ReferenceSequenceFileFactory.getFastaIndexFileName(fastaPath.toPath()); | ||||||
if (Files.exists(idxPath)) { | ||||||
referenceBundleBuilder.addSecondary( | ||||||
new IOPathResource( | ||||||
ioPathConstructor.apply(idxPath.toUri().toString()), | ||||||
BundleResourceType.CT_REFERENCE_INDEX)); | ||||||
} | ||||||
|
||||||
try { | ||||||
if (IOUtil.isBlockCompressed(fastaPath.toPath(), true)) { | ||||||
final Path gziPath = GZIIndex.resolveIndexNameForBgzipFile(fastaPath.toPath()); | ||||||
referenceBundleBuilder.addSecondary( | ||||||
new IOPathResource( | ||||||
ioPathConstructor.apply(gziPath.toUri().toString()), | ||||||
BundleResourceType.CT_REFERENCE_INDEX_GZI)); | ||||||
} | ||||||
} catch (IOException e) { | ||||||
throw new HtsjdkException("Error while checking for block compression", e); | ||||||
} | ||||||
return referenceBundleBuilder.build(); | ||||||
} | ||||||
|
||||||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -24,6 +24,7 @@ | |||||
|
||||||
package htsjdk.samtools.reference; | ||||||
|
||||||
import htsjdk.io.IOPath; | ||||||
import htsjdk.samtools.Defaults; | ||||||
import htsjdk.samtools.SAMException; | ||||||
import htsjdk.samtools.SAMSequenceDictionary; | ||||||
|
@@ -64,6 +65,21 @@ public FastaSequenceFile(final Path path, final boolean truncateNamesAtWhitespac | |||||
this.in = new FastLineReader(IOUtil.openFileForReading(path)); | ||||||
} | ||||||
|
||||||
/** | ||||||
* Constructs a FastaSequenceFile that reads from the specified fasta and dictionary file. Makes no | ||||||
* assumptions that the fata and dict file are in the same directory. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* | ||||||
* @param fastaPath may not be null | ||||||
* @param dictPath may be null | ||||||
* @param truncateNamesAtWhitespace | ||||||
*/ | ||||||
public FastaSequenceFile(final IOPath fastaPath, final IOPath dictPath, final boolean truncateNamesAtWhitespace) { | ||||||
super(fastaPath.toPath(), fastaPath.toString(), dictPath == null ? null : loadSequenceDictionary(dictPath)); | ||||||
this.truncateNamesAtWhitespace = truncateNamesAtWhitespace; | ||||||
this.seekableStream = null; | ||||||
this.in = new FastLineReader(IOUtil.openFileForReading(fastaPath.toPath())); | ||||||
} | ||||||
|
||||||
/** | ||||||
* Constructs a FastaSequenceFile that reads from the specified stream (which must not be compressed, i.e. | ||||||
* the caller is responsible for decompressing the stream). | ||||||
|
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -24,18 +24,16 @@ | |||||
|
||||||
package htsjdk.samtools.reference; | ||||||
|
||||||
import htsjdk.io.IOPath; | ||||||
import htsjdk.samtools.SAMException; | ||||||
import htsjdk.samtools.SAMSequenceDictionary; | ||||||
import htsjdk.samtools.seekablestream.ReadableSeekableStreamByteChannel; | ||||||
import htsjdk.samtools.seekablestream.SeekableStream; | ||||||
import htsjdk.samtools.util.BlockCompressedInputStream; | ||||||
import htsjdk.samtools.util.IOUtil; | ||||||
|
||||||
import java.io.BufferedInputStream; | ||||||
import java.io.File; | ||||||
import java.io.FileNotFoundException; | ||||||
import java.io.IOException; | ||||||
import java.io.InputStream; | ||||||
import java.nio.ByteBuffer; | ||||||
import java.nio.channels.FileChannel; | ||||||
import java.nio.channels.SeekableByteChannel; | ||||||
|
@@ -89,6 +87,28 @@ public IndexedFastaSequenceFile(final Path path, final FastaSequenceIndex index) | |||||
} | ||||||
} | ||||||
|
||||||
/** | ||||||
*/ | ||||||
/** | ||||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. | ||||||
* | ||||||
* @param path The file to open. | ||||||
* @param dictPath the dictionar path (may be null) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
* @param index Pre-built FastaSequenceIndex, for the case in which one does not exist on disk. may not be null. | ||||||
*/ | ||||||
public IndexedFastaSequenceFile(final IOPath path, final IOPath dictPath, final FastaSequenceIndex index) { | ||||||
super(path, dictPath, index); | ||||||
try { | ||||||
// reject block-compressed files (use BlockCompressedIndexedFastaSequenceFile) | ||||||
if (IOUtil.isBlockCompressed(path.toPath(), true)) { | ||||||
throw new SAMException("Indexed block-compressed FASTA file cannot be handled: " + path); | ||||||
} | ||||||
this.channel = Files.newByteChannel(path.toPath()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I wonder if we should add getChannel methods to IOPath. We have getStream ones. |
||||||
} catch (IOException e) { | ||||||
throw new SAMException("FASTA file should be readable but is not: " + path, e); | ||||||
} | ||||||
} | ||||||
|
||||||
/** | ||||||
* Open the given indexed fasta sequence file. Throw an exception if the file cannot be opened. | ||||||
* @param path The file to open. | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
typo: q