Skip to content

Commit

Permalink
Merge pull request #66 from calkan/master
Browse files Browse the repository at this point in the history
CRAM support added w/samtools. Multithreaded computation is added for…
  • Loading branch information
valeu authored Oct 14, 2019
2 parents b0281f2 + fba734b commit 800ae6d
Show file tree
Hide file tree
Showing 6 changed files with 100 additions and 16 deletions.
50 changes: 43 additions & 7 deletions src/GenomeCopyNumber.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ GenomeCopyNumber::GenomeCopyNumber(void)
sex_="";
SeekingSubc_ = false;
isMappUsed_=false;
totalNumberOfPairs_=0;
totalNumberOfPairs_=0;
normalNumberOfPairs_=0;
}

Expand All @@ -53,7 +53,7 @@ void GenomeCopyNumber::readCopyNumber(std::string const& mateFileName ,std::stri
if ((inputFormat.compare("pileup")==0 || inputFormat.compare("SAMtools pileup")==0)) {
readNumber = getReadNumberFromPileup(mateFileName);
} else {
readNumber = getLineNumber(mateFileName, pathToSamtools_, pathToSambamba_, SambambaThreads_);
readNumber = getLineNumber(mateFileName, pathToReference_, pathToSamtools_, pathToSambamba_, SambambaThreads_);
}
cout << "\t read number:\t" << readNumber << "\n";
cout << "\t coefficientOfVariation:\t" << coefficientOfVariation << "\n";
Expand Down Expand Up @@ -273,7 +273,7 @@ void GenomeCopyNumber::fillMyHash(std::string const& mateFileName ,std::string c
long normalCount = 0;
int bin = 0;

cout << "..Starting reading "<< mateFileName << "\n";
cout << "..[genomecopynumber] Starting reading "<< mateFileName << "\n";
//if (mateFileName.substr(mateFileName.size()-1,3).compare(".gz")==0) {
// igzstream in( mateFileName );
// if ( ! in.good()) {
Expand Down Expand Up @@ -310,24 +310,55 @@ void GenomeCopyNumber::fillMyHash(std::string const& mateFileName ,std::string c

string myInputFormat=inputFormat_str;
if (mateFileName.substr(mateFileName.size()-3,3).compare(".gz")!=0) {
if (mateFileName.substr(mateFileName.size()-4,4).compare(".bam")==0) {
if (pathToSambamba_ != "")
{
command = pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName;
myInputFormat="sam"; //will try to use existing sambamba
cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
//cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<< command << "\n";
}
else
{
command = pathToSamtools_ + " view "+mateFileName;
command = pathToSamtools_ + " view -@ " + SambambaThreads_ + " " +mateFileName;
myInputFormat="sam"; //will try to use existing samtools
cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
//cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
cout << "..samtools should be installed to be able to read BAM files; will use the following command for samtools: "<< command <<"\n";
}
}
else if (mateFileName.substr(mateFileName.size()-5,5).compare(".cram")==0) {
if (pathToReference_ == "")
{
cout << "..Reference FASTA file (config.fastaFile) should be given to be able to read CRAM files\n";
exit (1);
}
if (pathToSambamba_ != "")
{
/* calkan - sambamba is using a very old version of htslib. CRAM support is buggy */
cout << "Sambamba has a bug in reading CRAM files. Please use samtools instead.\n";
exit (1);
/*
command = pathToSambamba_ + " view -t " + SambambaThreads_ + " -C -T " + pathToReference_ + " " + mateFileName;
myInputFormat="sam"; //will try to use existing sambamba
// cout << "..sambamba should be installed to be able to read BAM files; will use the following command for sambamba: "<<pathToSambamba_ + " view -t " + SambambaThreads_ + " " + mateFileName << "\n";
cout << "..sambamba should be installed to be able to read CRAM files; will use the following command for sambamba: "<< command << "\n";
*/
}
else
{
command = pathToSamtools_ + " view -T "+ pathToReference_ + " -@ " + SambambaThreads_ + " " +mateFileName;
myInputFormat="sam"; //will try to use existing samtools
//cout << "..samtools should be installed to be able to read CRAM files; will use the following command for samtools: "<<pathToSamtools_ + " view "+mateFileName<<"\n";
cout << "..samtools should be installed to be able to read CRAM files; will use the following command for samtools: "<< command << "\n";
}
}
}
else {
command = "gzip -cd "+mateFileName;
}

inputFormat = getInputFormat(myInputFormat);

stream =
#if defined(_WIN32)
_popen(command.c_str(), "r");
Expand Down Expand Up @@ -4261,8 +4292,13 @@ int GenomeCopyNumber::focusOnCapture (std::string const& captureFile) {
return int(minRegion);
}

void GenomeCopyNumber::setSamtools(std::string const& pathToSamtools) {
void GenomeCopyNumber::setSamtools(std::string const& pathToSamtools, std::string const& SambambaThreads) {
pathToSamtools_=pathToSamtools;
SambambaThreads_ = SambambaThreads;
}

void GenomeCopyNumber::setReference(std::string const& pathToReference) {
pathToReference_=pathToReference;
}

void GenomeCopyNumber::setSambamba(std::string const& pathToSambamba, std::string const& SambambaThreads) {
Expand Down
4 changes: 3 additions & 1 deletion src/GenomeCopyNumber.h
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,9 @@ class GenomeCopyNumber
void setBAFtrue();
void setNormalContamination(float normalContamination) ;
void setAllNormal ();
void setSamtools(std::string const& pathToSamtools);
void setSamtools(std::string const& pathToSamtools, std::string const& SambambaThreads_);
void setSambamba(std::string const& pathToSambamba, std::string const& SambambaThreads_);
void setReference(std::string const& pathToReference);
bool ifHasBAF();
void setSex(std::string sex);
void setSeekSubclones(bool seekSubclones);
Expand Down Expand Up @@ -162,6 +163,7 @@ class GenomeCopyNumber
std::string sex_;
std::string pathToSamtools_;
std::string pathToSambamba_;
std::string pathToReference_;
std::string SambambaThreads_;
};
#endif
Expand Down
Binary file removed src/freec
Binary file not shown.
11 changes: 8 additions & 3 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,9 @@ int main(int argc, char *argv[])

string pathToSambamba = (std::string)cf.Value("general","sambamba","");
string SambambaThreads = "";

SambambaThreads = (std::string)cf.Value("general","maxThreads",""); /* initialize SambambaThreads using maxThreads -- calkan */

if (pathToSambamba != "") {
SambambaThreads = (std::string)cf.Value("general","SambambaThreads","");
if (SambambaThreads == "") {
Expand All @@ -262,7 +265,7 @@ int main(int argc, char *argv[])
}
}

bool has_window = cf.hasValue("general","window");
bool has_window = cf.hasValue("general","window");
int window = (int)cf.Value("general","window",NA);
bool ifTargeted = cf.hasValue("target","captureRegions");

Expand Down Expand Up @@ -792,16 +795,18 @@ int main(int argc, char *argv[])
}

GenomeCopyNumber sampleCopyNumber;
sampleCopyNumber.setSamtools(pathToSamtools);
sampleCopyNumber.setSamtools(pathToSamtools, SambambaThreads);
sampleCopyNumber.setSambamba(pathToSambamba, SambambaThreads);
sampleCopyNumber.setReference(fastaFile);
sampleCopyNumber.setWESanalysis(WESanalysis);
sampleCopyNumber.setmakingPileup(makingPileup);

sampleCopyNumber.setIfLogged(logLogNorm);

GenomeCopyNumber controlCopyNumber;
controlCopyNumber.setSamtools(pathToSamtools);
controlCopyNumber.setSamtools(pathToSamtools, SambambaThreads);
controlCopyNumber.setSambamba(pathToSambamba, SambambaThreads);
controlCopyNumber.setReference(fastaFile);
controlCopyNumber.setWESanalysis(WESanalysis);
controlCopyNumber.setmakingPileup(makingPileup);
controlCopyNumber.setIfLogged(logLogNorm);
Expand Down
49 changes: 45 additions & 4 deletions src/myFunc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ unsigned long sum(const std::vector<int>& data) {
return sum;
}

long getLineNumber(std::string const& fileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads) {
long getLineNumber(std::string const& fileName, std::string const& refFileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads) {
string line ;
long count = 0;
FILE *stream;
Expand All @@ -374,7 +374,8 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
#else
pclose(stream);
#endif
} else if (fileName.substr(fileName.size()-4,4).compare(".bam")==0) {
}
else if (fileName.substr(fileName.size()-4,4).compare(".bam")==0) {
string command = "";
if (pathToSambamba != "")
{
Expand All @@ -384,7 +385,7 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
}
else
{
command = pathToSamtools + " view "+fileName;
command = pathToSamtools + " view -@ "+ SambambaThreads + " " + fileName;
//myInputFormat="sam"; //will try to use existing samtools
cout << "..samtools should be installed to be able to read BAM files\n";
}
Expand All @@ -403,7 +404,47 @@ long getLineNumber(std::string const& fileName, const std::string& pathToSamtool
#else
pclose(stream);
#endif
}else {
}
else if (fileName.substr(fileName.size()-5,5).compare(".cram")==0) {
string command = "";
if (refFileName == "")
{
cout << "..Reference FASTA file (config.fastaFile) should be given to be able to read CRAM files\n";
exit (1);
}
if (pathToSambamba != "")
{
/* calkan - sambamba is using a very old version of htslib. CRAM support is buggy */
cout << "Sambamba has a bug in reading CRAM files. Please use samtools instead.\n";
exit (1);
/*
command = pathToSambamba + " view -t " + SambambaThreads + " -C -T " + refFileName + " " +fileName;
//myInputFormat="sam"; //will try to use existing samtools
cout << "..sambamba should be installed to be able to read CRAM files\n"; */
}
else
{
command = pathToSamtools + " view -T "+ refFileName + " -@" + SambambaThreads + " "+ fileName;
//myInputFormat="sam"; //will try to use existing samtools
cout << "..samtools should be installed to be able to read CRAM files\n";
}
stream =
#if defined(_WIN32)
_popen(command.c_str(), "r");
#else
popen(command.c_str(), "r");
#endif

while ( fgets(buffer, MAX_BUFFER, stream) != NULL ) {
count++;
}
#if defined(_WIN32)
_pclose(stream);
#else
pclose(stream);
#endif
}
else {
ifstream file(fileName.c_str()) ;
while( getline( file, line ) ) count++ ;
file.close();
Expand Down
2 changes: 1 addition & 1 deletion src/myFunc.h
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ float get_iqr(const std::vector<float>& data);
void readFileWithGenomeInfo(const std::string &chrLenFileName, std::vector<std::string>& names, std::vector<int>& lengths);
void readChrNamesInBed(const std::string &targetBed, std::vector<std::string>&names_bed);
unsigned long sum(const std::vector<int>& data);
long getLineNumber(std::string const& file, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads);
long getLineNumber(std::string const& file, std::string const& refFileName, const std::string& pathToSamtools, const std::string& pathToSambamba, const std::string& SambambaThreads);
long getReadNumberFromPileup(std::string const& file);
int factorial (int num);
int get_max_index(const std::vector<float>& data);
Expand Down

0 comments on commit 800ae6d

Please sign in to comment.