-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 6c71950
Showing
27 changed files
with
15,172 additions
and
0 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,115 @@ | ||
library("SKAT") | ||
|
||
# This is a template; needs to be modified before use. Everything named "_placeholder" needs to sed replaced with a normal value | ||
args = commandArgs(trailingOnly=TRUE) | ||
|
||
if(length(args) == 0) { | ||
args = c(FALSE,FALSE,FALSE,FALSE); | ||
} | ||
|
||
if(args[1]) { | ||
inName = args[1]; | ||
} else { | ||
inName = 'inName_placeholder'; | ||
} | ||
|
||
tCwd = 'cwd_placeholder' | ||
if(args[2]) { | ||
tCwd = args[2]; | ||
} | ||
|
||
if(tCwd != 'cwd_placeholder') { | ||
setwd(tCwd) | ||
} | ||
|
||
rCorr = 'rCorr_placeholder' | ||
if(args[3]) { | ||
rCorr = as.numeric(args[3]); | ||
} | ||
|
||
file.cov = 'cov_placeholder' | ||
if(args[4]) { | ||
file.cov = args[4]; | ||
} | ||
|
||
if(file.cov == 'cov_placeholder') { | ||
file.cov = NULL; | ||
} | ||
|
||
# Just for backward compat | ||
if(!is.numeric(rCorr)) { | ||
rCorr = 0 | ||
} | ||
|
||
baseName = paste(inName, "skat",collapse=".",sep=".") | ||
|
||
# Using only rare variants below | ||
file.bed <- paste(inName, 'bed', collapse=".",sep=".") | ||
file.bim <- paste(inName, 'bim', collapse=".",sep=".") | ||
file.fam <- paste(inName, 'fam', collapse=".",sep=".") | ||
|
||
file.geneSet <- paste(baseName, 'setid_genes.tsv', collapse=".",sep=".") | ||
|
||
file.gnomadBetaWeights = Read_SNP_WeightFile(FileName=paste(baseName, 'weights_gnomad_beta.tsv', collapse=".", sep=".")) | ||
file.gnomadBetaMadsenWeights = Read_SNP_WeightFile(FileName=paste(baseName, 'weights_gnomad_beta_madsen.tsv', collapse=".", sep=".")) | ||
|
||
file.ssdGenes <- paste(baseName, 'genes.ssd', collapse=".", sep=".") # This file is about to be created with the below command | ||
file.infoGenes <- paste(baseName, 'genes.info', collapse=".", sep=".") | ||
|
||
Generate_SSD_SetID(file.bed, file.bim, file.fam, file.geneSet, file.ssdGenes, file.infoGenes) | ||
ssd.infoGenes <- Open_SSD(file.ssdGenes, file.infoGenes) | ||
|
||
## DEFINE YOUR OWN COVARIATES | ||
if(!is.null(file.cov)) { | ||
famCov <- Read_Plink_FAM_Cov(file.fam, file.cov, Is.binary=TRUE, cov_header=FALSE) | ||
|
||
# Define your own | ||
x1 <- famCov$COV1 | ||
x2 <- famCov$COV2 | ||
x3 <- famCov$COV3 | ||
x4 <- famCov$COV4 | ||
|
||
yCov <- famCov$Phenotype | ||
sex <- famCov$Sex | ||
|
||
obj<-SKAT_Null_Model(yCov ~ x1 + x2 + x3 + x4 + sex, out_type="D", n.Resampling=1000, type.Resampling='bootstrap') | ||
} else { | ||
famCov <- Read_Plink_FAM(file.fam, Is.binary=TRUE) | ||
|
||
yCov <- famCov$Phenotype | ||
sex <- famCov$Sex | ||
|
||
obj<-SKAT_Null_Model(yCov ~ sex, out_type="D", n.Resampling=1000, type.Resampling='bootstrap') | ||
} | ||
|
||
|
||
warnings() | ||
|
||
#### Run basic model with gnomadBeta weights | ||
writeLines(paste("\nTesting",baseName,"gene set with gnomadBetaWeights",collapse=" ")) | ||
|
||
out <- SKATBinary.SSD.All(ssd.infoGenes, obj, kernel = "linear.weighted", method="optimal.adj", r.corr=rCorr, obj.SNPWeight=file.gnomadBetaWeights) | ||
out.results <- out$results | ||
toWrite = out.results[order(out.results$P.value), c(1, 2, 4)] | ||
write.table(toWrite, file=paste(baseName, 'genes_gnomadBetaWeights.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t') | ||
warnings() | ||
|
||
min(out.results$P.value) | ||
signif = Resampling_FWER(out,FWER=0.05) | ||
signif | ||
write.table(signif$result, file=paste(baseName, 'genes_gnomadBetaWeights_fwer.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t') | ||
|
||
|
||
#### Run basic model with no weights (should be used only for our < 1e-4 model) | ||
writeLines(paste("\nTesting",baseName,"gene set unweighted",collapse=" ")) | ||
|
||
out <- SKATBinary.SSD.All(ssd.infoGenes, obj, kernel = "linear.weighted", method="optimal.adj", r.corr=rCorr, weights.beta=c(1,1)) | ||
out.results <- out$results | ||
toWrite = out.results[order(out.results$P.value), c(1, 2, 4)] | ||
write.table(toWrite, file=paste(baseName, 'genes_unweighted.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t') | ||
warnings() | ||
|
||
min(out.results$P.value) | ||
signif = Resampling_FWER(out,FWER=0.05) | ||
signif | ||
write.table(signif$result, file=paste(baseName, 'genes_unweighted_fwer.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
#! /usr/bin/env perl | ||
use 5.10.0; | ||
use strict; | ||
use warnings; | ||
|
||
use List::Util qw/max/; | ||
|
||
use DDP; | ||
use Math::Complex; | ||
use PDL::Stats; | ||
use PDL::Stats::Distr; | ||
use List::MoreUtils qw/first_index uniq/; | ||
use Getopt::Long; | ||
use Yaml::XS qw/LoadFile/; | ||
use Scalar::Util qw/looks_like_number/; | ||
|
||
my ($sampleList, $out, $pathwayYaml); | ||
|
||
GetOptions ( | ||
'in|i=s' => \$sampleList, | ||
'out|i=s' => \$out, | ||
); | ||
|
||
if(!$sampleList) { | ||
die "--in|i|ann|a --out|o (optional)" | ||
} | ||
|
||
my $annFh; | ||
my $outName = $out; | ||
if($sampleList =~ /\.gz$/) { | ||
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.sample_list"; | ||
|
||
# Don't think this actually would die, since separate process forked via pipe | ||
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file."; | ||
} else { | ||
$outName //= $sampleList . ".fake.sample_list"; | ||
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file."; | ||
} | ||
|
||
open(my $outFh, '>', $outName) or die "Couldn't make sample_list file"; | ||
|
||
my @lines = <$annFh>; | ||
chomp @lines; | ||
|
||
my $i = -1; | ||
for my $id (@lines) { | ||
$i++; | ||
|
||
say $outFh "$id\t$id"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
gene: refSeq.nearest.name2 | ||
population: af_nfe |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#! /usr/bin/env perl | ||
use 5.10.0; | ||
use strict; | ||
use warnings; | ||
|
||
use List::Util qw/max/; | ||
|
||
use DDP; | ||
use Math::Complex; | ||
use PDL::Stats; | ||
use PDL::Stats::Distr; | ||
use List::MoreUtils qw/first_index uniq/; | ||
use Getopt::Long; | ||
use Yaml::XS qw/LoadFile/; | ||
use Scalar::Util qw/looks_like_number/; | ||
|
||
my ($sampleList, $out, $pathwayYaml); | ||
|
||
GetOptions ( | ||
'in|i=s' => \$sampleList, | ||
'out|o=s' => \$out, | ||
); | ||
|
||
if(!$sampleList) { | ||
die "--in|i sampleList --out|o (optional)" | ||
} | ||
|
||
my $annFh; | ||
my $outName = $out; | ||
if($sampleList =~ /\.gz$/) { | ||
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.cov"; | ||
|
||
# Don't think this actually would die, since separate process forked via pipe | ||
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file."; | ||
} else { | ||
$outName //= $sampleList . ".fake.cov"; | ||
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file."; | ||
} | ||
|
||
open(my $outFh, '>', $outName) or die "Couldn't make fam file"; | ||
|
||
my @lines = <$annFh>; | ||
chomp @lines; | ||
|
||
my @range = ( -10000 .. 10000 ); | ||
|
||
my $i = -1; | ||
for my $id (@lines) { | ||
$i++; | ||
|
||
my $cov1 = $range[rand(@range)] / 10_000; | ||
my $cov2 = $range[rand(@range)] / 10_000; | ||
my $cov3 = $range[rand(@range)] / 10_000; | ||
my $cov4 = $range[rand(@range)] / 10_000; | ||
|
||
my $fam = $id; | ||
|
||
my $dad = -9; | ||
my $mom = -9; | ||
|
||
say $outFh "$fam\t$id\t$cov1\t$cov2\t$cov3\t$cov4"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,62 @@ | ||
#! /usr/bin/env perl | ||
use 5.10.0; | ||
use strict; | ||
use warnings; | ||
|
||
use List::Util qw/max/; | ||
|
||
use DDP; | ||
use Math::Complex; | ||
use PDL::Stats; | ||
use PDL::Stats::Distr; | ||
use List::MoreUtils qw/first_index uniq/; | ||
use Getopt::Long; | ||
use Yaml::XS qw/LoadFile/; | ||
use Scalar::Util qw/looks_like_number/; | ||
|
||
my ($sampleList, $out, $pathwayYaml); | ||
|
||
GetOptions ( | ||
'in|i=s' => \$sampleList, | ||
'out|i=s' => \$out, | ||
); | ||
|
||
if(!$sampleList) { | ||
die "--in|i|ann|a --out|o (optional)" | ||
} | ||
|
||
my $annFh; | ||
my $outName = $out; | ||
if($sampleList =~ /\.gz$/) { | ||
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.fam"; | ||
|
||
# Don't think this actually would die, since separate process forked via pipe | ||
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file."; | ||
} else { | ||
$outName //= $sampleList . ".fake.fam"; | ||
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file."; | ||
} | ||
|
||
open(my $outFh, '>', $outName) or die "Couldn't make fam file"; | ||
|
||
my @lines = <$annFh>; | ||
chomp @lines; | ||
|
||
my $i = -1; | ||
for my $id (@lines) { | ||
$i++; | ||
|
||
my $sex = int(rand(3)); | ||
my $pheno = int(rand(3)); | ||
|
||
# If 0, raise to 1. | ||
$sex ||= 1; | ||
$pheno ||= 1; | ||
|
||
my $fam = $id; | ||
|
||
my $dad = -9; | ||
my $mom = -9; | ||
|
||
say $outFh "$fam\t$id\t-9\t-9\t$sex\t$pheno"; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
#!/bin/bash | ||
|
||
exFolder=./example | ||
|
||
outFolder=./out-example | ||
|
||
rm -rf $outFolder | ||
|
||
mkdir -p $outFolder && cd $_; | ||
mkdir -p ./log; | ||
|
||
# * needed instead of ./ to prevent ./filename.annotaiton.tsv.gz | ||
annotation=1000g_100klines_example_old.annotation.tsv.gz; | ||
annotationBaseName=1000g_100klines_example.annotation; | ||
annotationVcfGz=1000g_100klines_old_example.annotation.vcf.gz; | ||
annotationSampleList=1000g_100klines_example.annotation.sample_list; | ||
|
||
fam=1000g_100klines_example.fake.fam | ||
cov=1000g_100klines_example.fake.cov | ||
|
||
cp ../example/$annotation ./ | ||
cp ../example/$annotationVcfGz ./ | ||
cp ../example/$annotationSampleList ./ | ||
|
||
cp ../example/$fam ./ | ||
cp ../example/$cov ./ | ||
|
||
annotationSampleListWithFam=$annotationSampleList.with_fam | ||
perl ../bin/add_fake_fam_to_sample_list.pl --in $annotationSampleList --out $annotationSampleListWithFam | ||
|
||
annotationPlink="$annotationBaseName.plink"; | ||
annotationPlinkHwe="$annotationPlink.hwe_1e-6"; | ||
annotationPlinkHweSkat="$annotationPlinkHwe.skat"; | ||
|
||
skatFile="skat_$annotationPlinkHwe.R"; | ||
|
||
plink --vcf $annotationVcfGz --keep-allele-order --const-fid seq --make-bed --out $annotationPlink &> "./log/making_$annotationPlink.log"; | ||
mv "$annotationPlink.fam" "$annotationPlink.fam_nosex_affectation"; | ||
|
||
mv $fam "$annotationPlink.fam" | ||
plink --bfile $annotationPlink --hwe 1e-6 midp --keep $annotationSampleListWithFam --make-bed --out $annotationPlinkHwe &> "./log/making_$annotationPlinkHwe.log"; | ||
|
||
perl ../bin/skat_set_id.pl --in $annotation --out $annotationPlinkHweSkat &> "./log/making_weights_for_$annotationPlinkHweSkat"; | ||
|
||
cp ../bin/SKAT_template.R ./; | ||
# escape backslashes https://stackoverflow.com/questions/27787536/how-to-pass-a-variable-containing-slashes-to-sed | ||
mv SKAT_template.R $skatFile; | ||
# perl -pi -w -e "s/cwd_placeholder/.///\//\\/}/" $skatFile; | ||
perl -pi -w -e "s/inName_placeholder/$annotationPlinkHwe/" $skatFile; | ||
perl -pi -w -e "s/rCorr_placeholder/$rCorr/" $skatFile; | ||
perl -pi -w -e "s/cov_placeholder/$cov/" $skatFile; | ||
|
||
Rscript $skatFile; | ||
cd ../; | ||
|
Oops, something went wrong.