Skip to content

Commit

Permalink
add scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
valeu committed Aug 2, 2016
1 parent c57b719 commit 71f7242
Show file tree
Hide file tree
Showing 7 changed files with 650 additions and 0 deletions.
60 changes: 60 additions & 0 deletions scripts/assess_significance.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
library(rtracklayer)

args <- commandArgs()

dataTable <-read.table(args[5], header=TRUE);
ratio<-data.frame(dataTable)

dataTable <- read.table(args[4], header=FALSE)
cnvs<- data.frame(dataTable)

ratio$Ratio[which(ratio$Ratio==-1)]=NA

cnvs.bed=GRanges(cnvs[,1],IRanges(cnvs[,2],cnvs[,3]))
ratio.bed=GRanges(ratio$Chromosome,IRanges(ratio$Start,ratio$Start),score=ratio$Ratio)

overlaps <- subsetByOverlaps(ratio.bed,cnvs.bed)
normals <- setdiff(ratio.bed,cnvs.bed)
normals <- subsetByOverlaps(ratio.bed,normals)

#mu <- mean(score(normals),na.rm=TRUE)
#sigma<- sd(score(normals),na.rm=TRUE)

#hist(score(normals),n=500,xlim=c(0,2))
#hist(log(score(normals)),n=500,xlim=c(-1,1))

#shapiro.test(score(normals)[which(!is.na(score(normals)))][5001:10000])
#qqnorm (score(normals)[which(!is.na(score(normals)))],ylim=(c(0,10)))
#qqline(score(normals)[which(!is.na(score(normals)))], col = 2)

#shapiro.test(log(score(normals))[which(!is.na(score(normals)))][5001:10000])
#qqnorm (log(score(normals))[which(!is.na(score(normals)))],ylim=(c(-6,10)))
#qqline(log(score(normals))[which(!is.na(score(normals)))], col = 2)

numberOfCol=length(cnvs)

for (i in c(1:length(cnvs[,1]))) {
values <- score(subsetByOverlaps(ratio.bed,cnvs.bed[i]))
#wilcox.test(values,mu=mu)
W <- function(values,normals){resultw <- try(wilcox.test(values,score(normals)), silent = TRUE)
if(class(resultw)=="try-error") return(list("statistic"=NA,"parameter"=NA,"p.value"=NA,"null.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultw}
KS <- function(values,normals){resultks <- try(ks.test(values,score(normals)), silent = TRUE)
if(class(resultks)=="try-error") return(list("statistic"=NA,"p.value"=NA,"alternative"=NA,"method"=NA,"data.name"=NA)) else resultks}
#resultks <- try(KS <- ks.test(values,score(normals)), silent = TRUE)
# if(class(resultks)=="try-error") NA) else resultks
cnvs[i,numberOfCol+1]=W(values,normals)$p.value
cnvs[i,numberOfCol+2]=KS(values,normals)$p.value
}

if (numberOfCol==5) {
names(cnvs)=c("chr","start","end","copy number","status","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==7) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
if (numberOfCol==9) {
names(cnvs)=c("chr","start","end","copy number","status","genotype","uncertainty","somatic/germline","precentageOfGermline","WilcoxonRankSumTestPvalue","KolmogorovSmirnovPvalue")
}
write.table(cnvs, file=paste(args[4],".p.value.txt",sep=""),sep="\t",quote=F,row.names=F)


139 changes: 139 additions & 0 deletions scripts/freec2bed.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#!/usr/bin/perl -w
#translate *_ratio.txt (the out put of FREEC) into a BED track

use strict;

my $usage = qq{
$0
-----------------------------
mandatory parameters:
-f file file with ratio
-p ploidy ploidy (default 2)
-----------------------------
optional parameters:
-v verbose mode
};

if(scalar(@ARGV) == 0){
print $usage;
exit(0);
}

## arguments

my $filename = "";
my $verbose = 0;
my $ploidy = 2;

## parse command line arguments

while(scalar(@ARGV) > 0){
my $this_arg = shift @ARGV;
if ( $this_arg eq '-h') {print "$usage\n"; exit; }

elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
elsif ( $this_arg eq '-p') {$ploidy = shift @ARGV;}

elsif ( $this_arg eq '-v') {$verbose = 1;}

elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
}

if( $filename eq ""){
die "you should specify file with ratio\n";
}

#-----------read file with pairs-----
my $numberOfAllSites = 0;
my $totalCount=0;
my $lines = 0;


#open (OUT , ">$name") or die "Cannot open file $name!!!!: $!";

if ($filename eq "") {
print "Please specify a file with ratio!\n" if ($verbose) ;
exit();
}


if ($filename =~ /.gz$/) {
open(FILE, "gunzip -c $filename |") or die "$0: can't open ".$filename.":$!\n";
} else {
open (FILE, "<$filename") or die "Cannot open file $filename!!!!: $!";
}

#Chromosome Start Ratio MedianRatio CopyNumber
#1 1 -1 1.03966 2
#1 1001 -1 1.03966 2

# to

#chromA chromStartA chromEndA dataValueA



my ($chr,$start,$ratio,$MedRatio,$CPN) = ("","","","","");

my $eventStart = "";
my $eventEnd = "";
my $eventValue = "";
my $eventChr = "";
my $count = 0;

while (<FILE>) {
chomp;
($chr,$start,$ratio,$MedRatio,$CPN) = split /\t/;
next if ($chr =~ m/^Chromosome/);
$totalCount++;
$count++;
if ($eventStart eq "") {
$eventStart = $start;
$eventEnd = $start;
$eventValue = $MedRatio;
$eventChr =$chr;
$count =0;

} else {
if ($eventChr ne $chr) {
#new Chromosome, print the last event

print "chr$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventValue >= 0);

$eventStart = $start;
$eventEnd = $start;
$eventValue = $MedRatio;
$eventChr =$chr;
$count =0;

} else {
if ($eventValue ne $MedRatio) {
#new event, print the last event
print "chr$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventValue >= 0);

$eventStart = $start;
$eventEnd = $start;
$eventValue = $MedRatio;
$eventChr =$chr;
$count =0;

} else {
#move forward
$eventEnd = $start;
}
}
}
}
close FILE;

#print the last event
print "chr$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventValue >= 0);

print STDERR "Read: $filename\t$totalCount\n";


156 changes: 156 additions & 0 deletions scripts/freec2circos.pl
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
#!/usr/bin/perl -w
#translate "*_ratio.txt" (output of FREEC) into a Circos track

use strict;

my $usage = qq{
$0
-----------------------------
mandatory parameters:
-f file file with ratio
-p ploidy ploidy (default 2)
-----------------------------
optional parameters:
-v verbose mode
};

if(scalar(@ARGV) == 0){
print $usage;
exit(0);
}

## arguments

my $filename = "";
my $verbose = 0;
my $ploidy = 2;

## parse command line arguments

while(scalar(@ARGV) > 0){
my $this_arg = shift @ARGV;
if ( $this_arg eq '-h') {print "$usage\n"; exit; }

elsif ( $this_arg eq '-f') {$filename = shift @ARGV;}
elsif ( $this_arg eq '-p') {$ploidy = shift @ARGV;}

elsif ( $this_arg eq '-v') {$verbose = 1;}

elsif ( $this_arg =~ m/^-/ ) { print "unknown flag: $this_arg\n";}
}

if( $filename eq ""){
die "you should specify file with ratio\n";
}



#-----------read file with pairs-----
my $numberOfAllSites = 0;
my $totalCount=0;
my $lines = 0;


#open (OUT , ">$name") or die "Cannot open file $name!!!!: $!";

if ($filename eq "") {
print "Please specify a file with ratio!\n" if ($verbose) ;
exit();
}


if ($filename =~ /.gz$/) {
open(FILE, "gunzip -c $filename |") or die "$0: can't open ".$filename.":$!\n";
} else {
open (FILE, "<$filename") or die "Cannot open file $filename!!!!: $!";
}

#Chromosome Start Ratio MedianRatio CopyNumber
#1 1 -1 1.03966 2
#1 1001 -1 1.03966 2

# to

#hsY 0 999999 0.005786
#hsY 1000000 1999999 0.003958
#hsY 2000000 2999999 0.002578
#hsY 3000000 3999999 0.007451


my ($chr,$start,$ratio,$MedRatio,$CPN) = ("","","","","");

my $eventStart = "";
my $eventEnd = "";
my $eventValue = "";
my $eventChr = "";
my $count = 0;
my $eventRatio = "";

while (<FILE>) {
chomp;
($chr,$start,$ratio,$MedRatio,$CPN) = split /\t/;
next if ($chr =~ m/^Chromosome/);
$totalCount++;
$count++;
if ($eventStart eq "") {
$eventStart = $start;
$eventEnd = $start+999;
$eventValue = $MedRatio;
$eventRatio = $ratio;
$eventChr =$chr;
$count =0;
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventRatio >= 0);

} else {
if ($eventChr ne $chr) {
#new Chromosome, print the last event

print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventValue >= 0);

$eventStart = $start;
$eventEnd = $start+999;
$eventValue = $MedRatio;
$eventRatio = $ratio;
$eventChr =$chr;
$count =0;
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventRatio >= 0);

} else {
if ($eventValue ne $MedRatio) {
#new event, print the last event
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventValue >= 0);

$eventStart = $start;
$eventEnd = $start+999;
$eventValue = $MedRatio;
$eventRatio = $ratio;
$eventChr =$chr;
$count =0;
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventRatio >= 0);

} else {
#move forward
$eventEnd = $start+999;;
}
}
if ($count>=10) {
$count =0;
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($ratio >= 0); #($eventValue >= 0); $eventRatio >0
$eventStart=$eventEnd;

}
}
}
close FILE;

#print the last event
print "hs$eventChr $eventStart $eventEnd ".$eventValue*$ploidy."\n" if ($eventRatio >= 0);

print STDERR "Read: $filename\t$totalCount\n";


Loading

0 comments on commit 71f7242

Please sign in to comment.