-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Signed-off-by: Daniel Danis <[email protected]>
- Loading branch information
Showing
17 changed files
with
233 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
120 changes: 120 additions & 0 deletions
120
...in/java/org/monarchinitiative/squirls/cli/cmd/setup/PreprocessReferenceGenomeCommand.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
package org.monarchinitiative.squirls.cli.cmd.setup; | ||
|
||
import org.monarchinitiative.squirls.cli.Main; | ||
import org.monarchinitiative.squirls.io.download.UrlResourceDownloader; | ||
import org.slf4j.Logger; | ||
import org.slf4j.LoggerFactory; | ||
import picocli.CommandLine; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.BufferedWriter; | ||
import java.io.IOException; | ||
import java.net.URL; | ||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
import java.util.concurrent.Callable; | ||
import java.util.function.UnaryOperator; | ||
|
||
@CommandLine.Command(name = "ref-genome", | ||
header = "Download and preprocess reference genome", | ||
mixinStandardHelpOptions = true, | ||
version = Main.VERSION, | ||
usageHelpWidth = Main.WIDTH, | ||
footer = Main.FOOTER) | ||
public class PreprocessReferenceGenomeCommand implements Callable<Integer> { | ||
|
||
private static final Logger LOGGER = LoggerFactory.getLogger(PreprocessReferenceGenomeCommand.class); | ||
|
||
|
||
@CommandLine.Option(names = {"-d", "--data-directory"}, | ||
paramLabel = "path/to/datadir", | ||
required = true, | ||
description = "Path to Squirls data directory") | ||
public Path dataDirectory; | ||
|
||
@CommandLine.Option(names = {"-g", "--genome-assembly"}, | ||
required = true, | ||
description = "Genome assembly URL") | ||
public URL genomeUrl; | ||
|
||
@CommandLine.Option(names = {"-a", "--assembly-report"}, | ||
required = true, | ||
description = "Assembly report URL") | ||
public URL assemblyReportUrl; | ||
|
||
@CommandLine.Option(names = {"--overwrite"}, | ||
description = "Overwrite the genome files (default: ${DEFAULT-VALUE})") | ||
public boolean overwrite = false; | ||
|
||
@Override | ||
public Integer call() { | ||
if (!Files.isDirectory(dataDirectory)) { | ||
LOGGER.error("`-d | --data-directory` option must point to an existing directory"); | ||
return 1; | ||
} | ||
|
||
try { | ||
// First, reference genome | ||
LOGGER.info("Downloading reference genome ZIP"); | ||
downloadReferenceGenome(dataDirectory, genomeUrl, overwrite); | ||
|
||
// Then, assembly report. | ||
downloadAssemblyReport(dataDirectory, assemblyReportUrl, overwrite); | ||
return 0; | ||
} catch (Exception e) { | ||
LOGGER.error(e.getMessage(), e); | ||
return 1; | ||
} | ||
} | ||
|
||
/** | ||
* Download, decompress, and concatenate contigs into a single FASTA file. Then, index the FASTA file. | ||
* | ||
* @param buildDir path to directory where Squirls data files will be created | ||
* @param genomeUrl url pointing to reference genome FASTA file to be downloaded | ||
* @param overwrite overwrite existing FASTA file if true | ||
*/ | ||
private static void downloadReferenceGenome(Path buildDir, URL genomeUrl, boolean overwrite) { | ||
Path genomeFastaPath = buildDir.resolve("genome.fa"); | ||
GenomeAssemblyDownloader downloader = new GenomeAssemblyDownloader(genomeUrl, genomeFastaPath, overwrite); | ||
downloader.run(); // ! | ||
} | ||
|
||
private static void downloadAssemblyReport(Path dataDirectory, URL assemblyReportUrl, boolean overwrite) throws IOException { | ||
Path temporary = dataDirectory.resolve("assembly_report.tmp.txt"); | ||
UrlResourceDownloader downloader = new UrlResourceDownloader(assemblyReportUrl, temporary, overwrite); | ||
downloader.run(); // ! | ||
|
||
Path destination = dataDirectory.resolve("assembly_report.txt"); | ||
fixHg19MitochondrialLine(temporary, destination); | ||
Files.deleteIfExists(temporary); | ||
} | ||
|
||
private static void fixHg19MitochondrialLine(Path source, Path destination) throws IOException { | ||
try (BufferedReader reader = Files.newBufferedReader(source); | ||
BufferedWriter writer = Files.newBufferedWriter(destination)) { | ||
reader.lines() | ||
.map(fixIfNecessary()) | ||
.forEachOrdered(line -> { | ||
try { | ||
writer.write(line); | ||
writer.newLine(); | ||
} catch (IOException e) { | ||
throw new RuntimeException(e); | ||
} | ||
}); | ||
} | ||
} | ||
|
||
private static UnaryOperator<String> fixIfNecessary() { | ||
return line -> { | ||
if (line.equals("MT\tassembled-molecule\tMT\tMitochondrion\tJ01415.2\t=\tNC_012920.1\tnon-nuclear\t16569\tchrM")) { | ||
LOGGER.info("Fixing MT contig length (16569 -> 16571)"); | ||
return "MT\tassembled-molecule\tMT\tMitochondrion\tJ01415.2\t=\tNC_012920.1\tnon-nuclear\t16571\tchrM"; | ||
} | ||
return line; | ||
}; | ||
} | ||
|
||
|
||
} |
22 changes: 22 additions & 0 deletions
22
squirls-cli/src/main/java/org/monarchinitiative/squirls/cli/cmd/setup/SetupCommand.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
package org.monarchinitiative.squirls.cli.cmd.setup; | ||
|
||
import org.monarchinitiative.squirls.cli.Main; | ||
import picocli.CommandLine; | ||
|
||
import java.util.concurrent.Callable; | ||
|
||
@CommandLine.Command(name = "setup", | ||
aliases = {"S"}, | ||
header = "Setup Squirls resources", | ||
mixinStandardHelpOptions = true, | ||
version = Main.VERSION, | ||
usageHelpWidth = Main.WIDTH | ||
) | ||
public class SetupCommand implements Callable<Integer> { | ||
|
||
@Override | ||
public Integer call() { | ||
// work done in subcommands | ||
return 0; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
43 changes: 43 additions & 0 deletions
43
...ava/org/monarchinitiative/squirls/cli/cmd/setup/PreprocessReferenceGenomeCommandTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
package org.monarchinitiative.squirls.cli.cmd.setup; | ||
|
||
import org.junit.jupiter.api.Test; | ||
import org.junit.jupiter.api.io.TempDir; | ||
import org.monarchinitiative.squirls.cli.TestDataSourceConfig; | ||
|
||
import java.nio.file.Files; | ||
import java.nio.file.Path; | ||
|
||
import static org.hamcrest.MatcherAssert.assertThat; | ||
import static org.hamcrest.Matchers.equalTo; | ||
import static org.junit.jupiter.api.Assertions.assertFalse; | ||
import static org.junit.jupiter.api.Assertions.assertTrue; | ||
|
||
public class PreprocessReferenceGenomeCommandTest { | ||
|
||
private static final Path PARENT = TestDataSourceConfig.BASE_DIR.resolve("cmd").resolve("setup"); | ||
|
||
@TempDir | ||
public Path tempDataDirectory; | ||
|
||
private final PreprocessReferenceGenomeCommand cmd = new PreprocessReferenceGenomeCommand(); | ||
|
||
@Test | ||
public void call() throws Exception { | ||
cmd.dataDirectory = tempDataDirectory; | ||
cmd.genomeUrl = PARENT.resolve("shortHg19ChromFa.tar.gz").toUri().toURL(); | ||
cmd.assemblyReportUrl = PARENT.resolve("GCF_000001405.25_GRCh37.p13_assembly_report.short.txt").toUri().toURL(); | ||
|
||
assertFalse(Files.isRegularFile(tempDataDirectory.resolve("assembly_report.txt"))); | ||
assertFalse(Files.isRegularFile(tempDataDirectory.resolve("genome.fa"))); | ||
assertFalse(Files.isRegularFile(tempDataDirectory.resolve("genome.fa.fai"))); | ||
assertFalse(Files.isRegularFile(tempDataDirectory.resolve("genome.fa.dict"))); | ||
|
||
Integer result = cmd.call(); | ||
|
||
assertThat(result, equalTo(0)); | ||
assertTrue(Files.isRegularFile(tempDataDirectory.resolve("assembly_report.txt"))); | ||
assertTrue(Files.isRegularFile(tempDataDirectory.resolve("genome.fa"))); | ||
assertTrue(Files.isRegularFile(tempDataDirectory.resolve("genome.fa.fai"))); | ||
assertTrue(Files.isRegularFile(tempDataDirectory.resolve("genome.fa.dict"))); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
2 changes: 1 addition & 1 deletion
2
squirls-ingest/src/test/resources/org/monarchinitiative/squirls/ingest/data/funky.txt
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
This file is used to test download of an URL resource. | ||
This file is used to test Zip compression wrapper. |
Binary file removed
BIN
-1.04 MB
...gest/src/test/resources/org/monarchinitiative/squirls/ingest/data/shortHg19ChromFa.tar.gz
Binary file not shown.
Oops, something went wrong.