Skip to content

Commit

Permalink
Added run_haplotyping_germline function
Browse files Browse the repository at this point in the history
  • Loading branch information
nansari-pour authored May 14, 2021
1 parent d4c23db commit 49d40ed
Showing 1 changed file with 173 additions and 0 deletions.
173 changes: 173 additions & 0 deletions R/impute.R
Original file line number Diff line number Diff line change
Expand Up @@ -461,3 +461,176 @@ run_haplotyping = function(chrom, tumourname, normalname, ismale, imputeinfofile
chrom=chrom,
chr_names=chrom_names)
}

#' Construct haplotypes for a chromosome - germline WGS version
#'
#' This function takes preprocessed data and performs haplotype reconstruction.
#'
#' @param chrom The chromosome for which to reconstruct haplotypes
#' @param germlinename Identifier of the germline sample, used to match data files on disk
#' @param normalname Identifier of the reconstructed normal, used to match data files on disk
#' @param ismale Boolean, set to TRUE if the sample is male
#' @param imputeinfofile Full path to the imputeinfo reference file
#' @param problemloci Full path to the problematic loci reference file
#' @param impute_exe Path to the impute executable (can be found if its in $PATH)
#' @param min_normal_depth Minimal depth in the matched normal required for a SNP to be used
#' @param chrom_names A vector containing the names of chromosomes to be included
#' @param snp6_reference_info_file SNP6 only parameter Default: NA
#' @param heterozygousFilter SNP6 only parameter Default: NA
#' @param usebeagle Should use beagle5 instead of impute2 Default: FALSE
#' @param beaglejar Full path to Beagle java jar file Default: NA
#' @param beagleref Full path to Beagle reference file Default: NA
#' @param beagleplink Full path to Beagle plink file Default: NA
#' @param beaglemaxmem Integer Beagle max heap size in Gb Default: 10
#' @param beaglenthreads Integer number of threads used by beagle5 Default:1
#' @param beaglewindow Integer size of the genomic window for beagle5 (cM) Default:40
#' @param beagleoverlap Integer size of the overlap between windows beagle5 Default:4
#' @param javajre Path to the Java JRE executable (default java, i.e. in $PATH)
#' @author sd11, maxime.tarabichi, jdemeul, Naser Ansari-Pour (BDI, Oxford)
#' @export

run_haplotyping_germline = function(chrom, germlinename, normalname, ismale, imputeinfofile, problemloci, impute_exe, min_normal_depth, chrom_names,
externalhaplotypeprefix = NA,
use_previous_imputation=F,
snp6_reference_info_file=NA, heterozygousFilter=NA,
usebeagle=FALSE,
beaglejar=NA,
beagleref=NA,
beagleplink=NA,
beaglemaxmem=10,
beaglenthreads=1,
beaglewindow=40,
beagleoverlap=4,
javajre="java")
{

previoushaplotypefile <- list.files(pattern = paste0("_impute_output_chr", chrom, "_allHaplotypeInfo.txt"))[1]
if (use_previous_imputation & !is.na(previoushaplotypefile)) {

print(paste0("Previous imputation results found, copying info from", previoushaplotypefile, " to flip alleles"))
currenthaplotypefile <- paste(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep="")
if (previoushaplotypefile != currenthaplotypefile) {
file.copy(from = previoushaplotypefile, to = paste(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""))
}

} else {

if (file.exists(paste(germlinename, "_alleleFrequencies_chr", chrom, ".txt", sep=""))) {
generate.impute.input.wgs.germline(chrom=chrom,
germline.allele.counts.file=paste(germlinename,"_alleleFrequencies_chr", chrom, ".txt", sep=""),
normal.allele.counts.file=paste(normalname,"_alleleFrequencies_chr", chrom, ".txt", sep=""),
output.file=paste(germlinename, "_impute_input_chr", chrom, ".txt", sep=""),
imputeinfofile=imputeinfofile,
is.male=ismale,
problemLociFile=problemloci,
useLociFile=NA)
} else {
stop("Germline calling is currently on WGS data only - SNP array data is not sufficiently dense to detect all germline CNVs")
}

if(usebeagle){
## Convert input files for beagle5
imputeinputfile <- paste(germlinename,
"_impute_input_chr",
chrom, ".txt", sep="")
vcfbeagle <- convert.impute.input.to.beagle.input(imputeinput=imputeinputfile,
chrom=chrom)
vcfbeagle_path <- paste(germlinename,"_beagle5_input_chr",chrom,".txt",sep="")
outbeagle_path <- paste(germlinename,"_beagle5_output_chr",chrom,".txt",sep="")
writevcf.beagle(vcfbeagle, filepath=vcfbeagle_path)
## Run beagle5 on the files
run.beagle5(beaglejar=beaglejar,
vcfpath=vcfbeagle_path,
reffile=beagleref,
outpath=outbeagle_path,
plinkfile=beagleplink,
maxheap.gb=beaglemaxmem,
nthreads=beaglenthreads,
window=beaglewindow,
overlap=beagleoverlap,
javajre=javajre)
outfile <- paste(germlinename,
"_impute_output_chr",
chrom, "_allHaplotypeInfo.txt", sep="")
vcfout <- paste(outbeagle_path,".vcf.gz",sep="")
## Convert beagle output file to impute2-like file
writebeagle.as.impute(vcf=vcfout,
outfile=outfile)
}
else {
# Run impute on the files
run.impute(inputfile=paste(germlinename, "_impute_input_chr", chrom, ".txt", sep=""),
outputfile.prefix=paste(germlinename, "_impute_output_chr", chrom, ".txt", sep=""),
is.male=ismale,
imputeinfofile=imputeinfofile,
impute.exe=impute_exe,
region.size=5000000,
chrom=chrom)

# As impute runs in windows across a chromosome we need to assemble the output
combine.impute.output(inputfile.prefix=paste(germlinename, "_impute_output_chr", chrom, ".txt", sep=""),
outputfile=paste(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
is.male=ismale,
imputeinfofile=imputeinfofile,
region.size=5000000,
chrom=chrom)
# Cleanup temp Impute output
unlink(paste(germlinename, "_impute_output_chr", chrom, ".txt*K.txt*", sep=""))
}

}


# If an allele counts file exists we assume this is a WGS sample and run the corresponding step, otherwise it must be SNP6
allelefrequenciesfile <- paste0(germlinename, "_alleleFrequencies_chr", chrom, ".txt")
print(allelefrequenciesfile)
print(file.exists(allelefrequenciesfile))

if (file.exists(allelefrequenciesfile)) {
# WGS - Transform the impute output into haplotyped BAFs

# if present, input external haplotype blocks
if (!is.na(externalhaplotypeprefix) && file.exists(paste0(externalhaplotypeprefix, chrom, ".vcf"))) {
print("Adding in the external haplotype blocks")

# output BAFs to plot pre-external haplotyping
GetChromosomeBAFs(chrom=chrom,
SNP_file=allelefrequenciesfile,
haplotypeFile=paste(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
samplename=germlinename,
outfile=paste(germlinename, "_chr", chrom, "_heterozygousMutBAFs_haplotyped_noExt.txt", sep=""),
chr_names=chrom_names,
minCounts=min_normal_depth)

# Plot what we have before external haplotyping is incorporated
plot.haplotype.data(haplotyped.baf.file=paste(germlinename, "_chr", chrom, "_heterozygousMutBAFs_haplotyped_noExt.txt", sep=""),
imageFileName=paste(germlinename,"_chr",chrom,"_heterozygousData_noExt.png",sep=""),
samplename=germlinename,
chrom=chrom,
chr_names=chrom_names)

input_known_haplotypes(chrom = chrom,
chrom_names = chrom_names,
imputedHaplotypeFile = paste0(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt"),
externalHaplotypeFile = paste0(externalhaplotypeprefix, chrom, ".vcf"))

}

GetChromosomeBAFs(chrom=chrom,
SNP_file=paste(germlinename, "_alleleFrequencies_chr", chrom, ".txt", sep=""),
haplotypeFile=paste(germlinename, "_impute_output_chr", chrom, "_allHaplotypeInfo.txt", sep=""),
samplename=germlinename,
outfile=paste(germlinename, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
chr_names=chrom_names,
minCounts=min_normal_depth)
} else {
stop("Germline calling is only on WGS data - SNParray data not sufficiently dense")
}

# Plot what we have until this point
plot.haplotype.data(haplotyped.baf.file=paste(germlinename, "_chr", chrom, "_heterozygousMutBAFs_haplotyped.txt", sep=""),
imageFileName=paste(germlinename,"_chr",chrom,"_heterozygousData.png",sep=""),
samplename=germlinename,
chrom=chrom,
chr_names=chrom_names)
}

0 comments on commit 49d40ed

Please sign in to comment.