Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
akotlar committed Aug 29, 2018
0 parents commit 6c71950
Show file tree
Hide file tree
Showing 27 changed files with 15,172 additions and 0 deletions.
Binary file added bin/.DS_Store
Binary file not shown.
115 changes: 115 additions & 0 deletions bin/SKAT_template.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
library("SKAT")

# This is a template; needs to be modified before use. Everything named "_placeholder" needs to sed replaced with a normal value
args = commandArgs(trailingOnly=TRUE)

if(length(args) == 0) {
args = c(FALSE,FALSE,FALSE,FALSE);
}

if(args[1]) {
inName = args[1];
} else {
inName = 'inName_placeholder';
}

tCwd = 'cwd_placeholder'
if(args[2]) {
tCwd = args[2];
}

if(tCwd != 'cwd_placeholder') {
setwd(tCwd)
}

rCorr = 'rCorr_placeholder'
if(args[3]) {
rCorr = as.numeric(args[3]);
}

file.cov = 'cov_placeholder'
if(args[4]) {
file.cov = args[4];
}

if(file.cov == 'cov_placeholder') {
file.cov = NULL;
}

# Just for backward compat
if(!is.numeric(rCorr)) {
rCorr = 0
}

baseName = paste(inName, "skat",collapse=".",sep=".")

# Using only rare variants below
file.bed <- paste(inName, 'bed', collapse=".",sep=".")
file.bim <- paste(inName, 'bim', collapse=".",sep=".")
file.fam <- paste(inName, 'fam', collapse=".",sep=".")

file.geneSet <- paste(baseName, 'setid_genes.tsv', collapse=".",sep=".")

file.gnomadBetaWeights = Read_SNP_WeightFile(FileName=paste(baseName, 'weights_gnomad_beta.tsv', collapse=".", sep="."))
file.gnomadBetaMadsenWeights = Read_SNP_WeightFile(FileName=paste(baseName, 'weights_gnomad_beta_madsen.tsv', collapse=".", sep="."))

file.ssdGenes <- paste(baseName, 'genes.ssd', collapse=".", sep=".") # This file is about to be created with the below command
file.infoGenes <- paste(baseName, 'genes.info', collapse=".", sep=".")

Generate_SSD_SetID(file.bed, file.bim, file.fam, file.geneSet, file.ssdGenes, file.infoGenes)
ssd.infoGenes <- Open_SSD(file.ssdGenes, file.infoGenes)

## DEFINE YOUR OWN COVARIATES
if(!is.null(file.cov)) {
famCov <- Read_Plink_FAM_Cov(file.fam, file.cov, Is.binary=TRUE, cov_header=FALSE)

# Define your own
x1 <- famCov$COV1
x2 <- famCov$COV2
x3 <- famCov$COV3
x4 <- famCov$COV4

yCov <- famCov$Phenotype
sex <- famCov$Sex

obj<-SKAT_Null_Model(yCov ~ x1 + x2 + x3 + x4 + sex, out_type="D", n.Resampling=1000, type.Resampling='bootstrap')
} else {
famCov <- Read_Plink_FAM(file.fam, Is.binary=TRUE)

yCov <- famCov$Phenotype
sex <- famCov$Sex

obj<-SKAT_Null_Model(yCov ~ sex, out_type="D", n.Resampling=1000, type.Resampling='bootstrap')
}


warnings()

#### Run basic model with gnomadBeta weights
writeLines(paste("\nTesting",baseName,"gene set with gnomadBetaWeights",collapse=" "))

out <- SKATBinary.SSD.All(ssd.infoGenes, obj, kernel = "linear.weighted", method="optimal.adj", r.corr=rCorr, obj.SNPWeight=file.gnomadBetaWeights)
out.results <- out$results
toWrite = out.results[order(out.results$P.value), c(1, 2, 4)]
write.table(toWrite, file=paste(baseName, 'genes_gnomadBetaWeights.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t')
warnings()

min(out.results$P.value)
signif = Resampling_FWER(out,FWER=0.05)
signif
write.table(signif$result, file=paste(baseName, 'genes_gnomadBetaWeights_fwer.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t')


#### Run basic model with no weights (should be used only for our < 1e-4 model)
writeLines(paste("\nTesting",baseName,"gene set unweighted",collapse=" "))

out <- SKATBinary.SSD.All(ssd.infoGenes, obj, kernel = "linear.weighted", method="optimal.adj", r.corr=rCorr, weights.beta=c(1,1))
out.results <- out$results
toWrite = out.results[order(out.results$P.value), c(1, 2, 4)]
write.table(toWrite, file=paste(baseName, 'genes_unweighted.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t')
warnings()

min(out.results$P.value)
signif = Resampling_FWER(out,FWER=0.05)
signif
write.table(signif$result, file=paste(baseName, 'genes_unweighted_fwer.tsv',collapse=".", sep="."), col.names=T, row.names=F, quote=F, sep='\t')
50 changes: 50 additions & 0 deletions bin/add_fake_fam_to_sample_list.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
#! /usr/bin/env perl
use 5.10.0;
use strict;
use warnings;

use List::Util qw/max/;

use DDP;
use Math::Complex;
use PDL::Stats;
use PDL::Stats::Distr;
use List::MoreUtils qw/first_index uniq/;
use Getopt::Long;
use Yaml::XS qw/LoadFile/;
use Scalar::Util qw/looks_like_number/;

my ($sampleList, $out, $pathwayYaml);

GetOptions (
'in|i=s' => \$sampleList,
'out|i=s' => \$out,
);

if(!$sampleList) {
die "--in|i|ann|a --out|o (optional)"
}

my $annFh;
my $outName = $out;
if($sampleList =~ /\.gz$/) {
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.sample_list";

# Don't think this actually would die, since separate process forked via pipe
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file.";
} else {
$outName //= $sampleList . ".fake.sample_list";
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file.";
}

open(my $outFh, '>', $outName) or die "Couldn't make sample_list file";

my @lines = <$annFh>;
chomp @lines;

my $i = -1;
for my $id (@lines) {
$i++;

say $outFh "$id\t$id";
}
2 changes: 2 additions & 0 deletions bin/example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
gene: refSeq.nearest.name2
population: af_nfe
62 changes: 62 additions & 0 deletions bin/fake_cov_from_sample_list.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#! /usr/bin/env perl
use 5.10.0;
use strict;
use warnings;

use List::Util qw/max/;

use DDP;
use Math::Complex;
use PDL::Stats;
use PDL::Stats::Distr;
use List::MoreUtils qw/first_index uniq/;
use Getopt::Long;
use Yaml::XS qw/LoadFile/;
use Scalar::Util qw/looks_like_number/;

my ($sampleList, $out, $pathwayYaml);

GetOptions (
'in|i=s' => \$sampleList,
'out|o=s' => \$out,
);

if(!$sampleList) {
die "--in|i sampleList --out|o (optional)"
}

my $annFh;
my $outName = $out;
if($sampleList =~ /\.gz$/) {
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.cov";

# Don't think this actually would die, since separate process forked via pipe
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file.";
} else {
$outName //= $sampleList . ".fake.cov";
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file.";
}

open(my $outFh, '>', $outName) or die "Couldn't make fam file";

my @lines = <$annFh>;
chomp @lines;

my @range = ( -10000 .. 10000 );

my $i = -1;
for my $id (@lines) {
$i++;

my $cov1 = $range[rand(@range)] / 10_000;
my $cov2 = $range[rand(@range)] / 10_000;
my $cov3 = $range[rand(@range)] / 10_000;
my $cov4 = $range[rand(@range)] / 10_000;

my $fam = $id;

my $dad = -9;
my $mom = -9;

say $outFh "$fam\t$id\t$cov1\t$cov2\t$cov3\t$cov4";
}
62 changes: 62 additions & 0 deletions bin/fake_fam_from_sample_list.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
#! /usr/bin/env perl
use 5.10.0;
use strict;
use warnings;

use List::Util qw/max/;

use DDP;
use Math::Complex;
use PDL::Stats;
use PDL::Stats::Distr;
use List::MoreUtils qw/first_index uniq/;
use Getopt::Long;
use Yaml::XS qw/LoadFile/;
use Scalar::Util qw/looks_like_number/;

my ($sampleList, $out, $pathwayYaml);

GetOptions (
'in|i=s' => \$sampleList,
'out|i=s' => \$out,
);

if(!$sampleList) {
die "--in|i|ann|a --out|o (optional)"
}

my $annFh;
my $outName = $out;
if($sampleList =~ /\.gz$/) {
$outName //= substr($sampleList, 0, length($sampleList) - 3) . ".fake.fam";

# Don't think this actually would die, since separate process forked via pipe
open($annFh, '-|', "gunzip -c $sampleList") or die "Couldn't open sampleList file.";
} else {
$outName //= $sampleList . ".fake.fam";
open($annFh, '<', $sampleList) or die "Couldn't open annotated snp file.";
}

open(my $outFh, '>', $outName) or die "Couldn't make fam file";

my @lines = <$annFh>;
chomp @lines;

my $i = -1;
for my $id (@lines) {
$i++;

my $sex = int(rand(3));
my $pheno = int(rand(3));

# If 0, raise to 1.
$sex ||= 1;
$pheno ||= 1;

my $fam = $id;

my $dad = -9;
my $mom = -9;

say $outFh "$fam\t$id\t-9\t-9\t$sex\t$pheno";
}
55 changes: 55 additions & 0 deletions bin/run_skat_example.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#!/bin/bash

exFolder=./example

outFolder=./out-example

rm -rf $outFolder

mkdir -p $outFolder && cd $_;
mkdir -p ./log;

# * needed instead of ./ to prevent ./filename.annotaiton.tsv.gz
annotation=1000g_100klines_example_old.annotation.tsv.gz;
annotationBaseName=1000g_100klines_example.annotation;
annotationVcfGz=1000g_100klines_old_example.annotation.vcf.gz;
annotationSampleList=1000g_100klines_example.annotation.sample_list;

fam=1000g_100klines_example.fake.fam
cov=1000g_100klines_example.fake.cov

cp ../example/$annotation ./
cp ../example/$annotationVcfGz ./
cp ../example/$annotationSampleList ./

cp ../example/$fam ./
cp ../example/$cov ./

annotationSampleListWithFam=$annotationSampleList.with_fam
perl ../bin/add_fake_fam_to_sample_list.pl --in $annotationSampleList --out $annotationSampleListWithFam

annotationPlink="$annotationBaseName.plink";
annotationPlinkHwe="$annotationPlink.hwe_1e-6";
annotationPlinkHweSkat="$annotationPlinkHwe.skat";

skatFile="skat_$annotationPlinkHwe.R";

plink --vcf $annotationVcfGz --keep-allele-order --const-fid seq --make-bed --out $annotationPlink &> "./log/making_$annotationPlink.log";
mv "$annotationPlink.fam" "$annotationPlink.fam_nosex_affectation";

mv $fam "$annotationPlink.fam"
plink --bfile $annotationPlink --hwe 1e-6 midp --keep $annotationSampleListWithFam --make-bed --out $annotationPlinkHwe &> "./log/making_$annotationPlinkHwe.log";

perl ../bin/skat_set_id.pl --in $annotation --out $annotationPlinkHweSkat &> "./log/making_weights_for_$annotationPlinkHweSkat";

cp ../bin/SKAT_template.R ./;
# escape backslashes https://stackoverflow.com/questions/27787536/how-to-pass-a-variable-containing-slashes-to-sed
mv SKAT_template.R $skatFile;
# perl -pi -w -e "s/cwd_placeholder/.///\//\\/}/" $skatFile;
perl -pi -w -e "s/inName_placeholder/$annotationPlinkHwe/" $skatFile;
perl -pi -w -e "s/rCorr_placeholder/$rCorr/" $skatFile;
perl -pi -w -e "s/cov_placeholder/$cov/" $skatFile;

Rscript $skatFile;
cd ../;

Loading

0 comments on commit 6c71950

Please sign in to comment.