Skip to content

Commit

Permalink
Update shared, add error messages, and update tests
Browse files Browse the repository at this point in the history
  • Loading branch information
fgvieira committed Aug 11, 2023
1 parent 76c462a commit a088458
Show file tree
Hide file tree
Showing 10 changed files with 276 additions and 171 deletions.
7 changes: 5 additions & 2 deletions EM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,8 +232,11 @@ void iter_EM(params *pars) {
}

// Calculate haplotype frequency through an EM
if(pars->freq_est == 2 || pars->e_prob_calc == 2)
haplo_freq(hap_freq, prev_site, curr_site, pars->freq[s-1], pars->freq[s], pars->n_ind);
if(pars->freq_est == 2 || pars->e_prob_calc == 2) {
double loglkl;
uint64_t n_iter, n_ind_data;
n_iter = haplo_freq(hap_freq, &loglkl, &n_ind_data, prev_site, curr_site, pars->freq[s-1], pars->freq[s], pars->n_ind, false);
}

// Estimated MAF
if(pars->freq_est == 1 || s == 1){
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ Executables are built into the main directory. If you wish to clean all binaries
* `--n_ind INT`: sample size (number of individuals).
* `--n_sites INT`: total number of sites.
* `--call_geno`: call genotypes before running analyses.
* `--freqs DOUBLE or CHAR`: initial frequency values. Can be defined by user as a DOUBLE, (r)andom, (e)stimated or read from a FILE.
* `--freqs_est INT`: allele frequency estimation method: 0) fixed (no optimizatopn); 1) assuming independence of sites; and 2) through haplotype frequency.
* `--freq DOUBLE or CHAR`: initial frequency values. Can be defined by user as a DOUBLE, (r)andom, (e)stimated or read from a FILE.
* `--freq_est INT`: allele frequency estimation method: 0) fixed (no optimizatopn); 1) assuming independence of sites; and 2) through haplotype frequency.
* `--indF DOUBLE-DOUBLE or CHAR`: initial inbreeding and transition parameter values. Can be defined by user as a DOUBLE-DOUBLE, (r)andom, or read from a FILE.
* `--indF_fixed`: inbreeding parameter values are fixed (will not be optimized).
* `--alpha_fixed`: transition parameter values are fixed (will not be optimized).
Expand Down
69 changes: 33 additions & 36 deletions examples/test.md5
Original file line number Diff line number Diff line change
@@ -1,43 +1,43 @@
06dbb6d8b15bb7c8aa97297f2cb0d40d testF.beagle.gz
f809191933e77148ce85254c3559c384 testF_bin.geno
5ec31ced80a4f086cbb2a1a1fa45b478 testF_bin.geno
2ddcb40175c35547d97cc8f55e30a13e testF_bin.ibd
64e067756c140c5af6673c83119a089e testF_bin.indF
50cb262174948a72874d4553f0c0cfbe testF.geno
e805a3d7f028ab1343dc016891e935bd testF.glf
50be53151b3268ef6c87475b544eedd2 testF.geno
5a40f2f602ebb557f8e293b9a7ed3d0c testF.glf
8b5f30aa4b1f0ac93ef01f8fe9b36370 testF.glf.pos.gz
f425fcc5042d5a87ffe9cea5d7e0b057 testF-HMM.BEST.GL_CG.geno
0fae8c569303e2440850f11f49ea03cf testF-HMM.BEST.GL_CG.ibd
5b51c2c994a21d71d0f489ca6fb8f6b6 testF-HMM.BEST.GL_CG.indF
0393efb2cb232b283daf3d1eed6e43f8 testF-HMM.BEST.GL.geno
becab95cbf2bd41caf1319fc0f45da3f testF-HMM.BEST.GL.ibd
da58cef5784acd955a1ba2f7ba6d574e testF-HMM.BEST.GL.indF
75d86e3554eff714189919f5d6cef4e4 testF-HMM.BEST.GL_CG.geno
fed5119ba96d07891a50d9dad610d1c9 testF-HMM.BEST.GL_CG.ibd
4768bd76d37c10159d78c3db0043d96d testF-HMM.BEST.GL_CG.indF
8e94d9457e13316cfb6b0c79295cbc2d testF-HMM.BEST.GL.geno
ebf3a379e8cd1cc4dd5e77284120350d testF-HMM.BEST.GL.ibd
762f821a1c86625561892c689cd06c87 testF-HMM.BEST.GL.indF
4836adda5d53c99a70f22450fe1d30c9 testF-HMM.BEST.TG.geno
9965cf9810bb4674c9b5f0c85697a4bd testF-HMM.BEST.TG.ibd
b2d323b0a5869882b398a492e89a5177 testF-HMM.BEST.TG.ibd
5aadc82d8483079f442ba1e47823eb2d testF-HMM.BEST.TG.indF
77bd1bbf058b47a2a68372089dbcd3ef testF-HMM.freq_fixed.GL_CG.geno
49bfe6a396952fdd1741c111fc226bc9 testF-HMM.freq_fixed.GL_CG.ibd
2c25607718692f256369371e1f5f2ecf testF-HMM.freq_fixed.GL_CG.indF
a1de30fca216caeb7b101fbf73a76da1 testF-HMM.freq_fixed.GL.geno
496df56ac6c657f98916994ea99e06e6 testF-HMM.freq_fixed.GL.geno
01e13bb192a06174c63dbc499dad4492 testF-HMM.freq_fixed.GL.ibd
01bc2290096cd0a51f07c34a70a197f5 testF-HMM.freq_fixed.GL.indF
4836adda5d53c99a70f22450fe1d30c9 testF-HMM.freq_fixed.TG.geno
2341ff5e868792ee7091b0a435d3857a testF-HMM.freq_fixed.TG.ibd
8fe8e11d48299b4207dbd827daf7b154 testF-HMM.freq_fixed.TG.indF
4646ce064447f26a42a9dd063f928d08 testF-HMM.indF_fixed.GL_CG.geno
c35e2e9fcfeb405eae3966d037897781 testF-HMM.indF_fixed.GL_CG.ibd
f78a734cdab56aec4ab2d8bbfb79176f testF-HMM.indF_fixed.GL_CG.indF
b7dd8d76a379ed13d71e1dfce1691acd testF-HMM.indF_fixed.GL.geno
ddacf4c62f0c1225fb6e08f503a801b6 testF-HMM.indF_fixed.GL.ibd
a2988806771ff76418fe6789010bbdfd testF-HMM.indF_fixed.GL.indF
f5209003af4ec478e5976f8cc4417c18 testF-HMM.indF_fixed.GL_CG.geno
7a51a585d19422af1e6681fba4052093 testF-HMM.indF_fixed.GL_CG.ibd
8773eb6f4f543923e662a0a726ba0263 testF-HMM.indF_fixed.GL_CG.indF
ab064ce55571884e1fefd7bf901dd710 testF-HMM.indF_fixed.GL.geno
4c15e461e7d0dc5456f6b4fb0a4519a6 testF-HMM.indF_fixed.GL.ibd
b64f86b78d8976eefbe82007739eec6b testF-HMM.indF_fixed.GL.indF
4836adda5d53c99a70f22450fe1d30c9 testF-HMM.indF_fixed.TG.geno
e9236ff0e4437c1b4cebd6070be906f7 testF-HMM.indF_fixed.TG.ibd
56c6e65125ddc1cba63abf63b4ddcf01 testF-HMM.indF_fixed.TG.indF
5e5057888f4e53cc1b6c76fefa412468 testF-HMM.normal.GL_CG.geno
8e24e93507007187a25677ac7d9ef6f8 testF-HMM.normal.GL_CG.ibd
8eb4fe26d7670105094520f075663e65 testF-HMM.normal.GL_CG.indF
6cd61843237d970db416e629016985b5 testF-HMM.normal.GL.geno
8c4dd7b6e6bff4e10dce5ec28d66cd56 testF-HMM.normal.GL.ibd
2198fc897349861c386ff983b9038608 testF-HMM.normal.GL.indF
421f7ce46c2dbee083c5b0e5ce514b5e testF-HMM.normal.GL_CG.geno
1d9f870d261fa1621a2252bf6a5dcfa9 testF-HMM.normal.GL_CG.ibd
3222a6ee595aa7bd514aed276288c25b testF-HMM.normal.GL_CG.indF
7503a5c303969caacb50908400525e4e testF-HMM.normal.GL.geno
3d0a4b5470a5fd9f5027d04b0180d15e testF-HMM.normal.GL.ibd
c37b8cda295f6a1509fcb20783f467ee testF-HMM.normal.GL.indF
4836adda5d53c99a70f22450fe1d30c9 testF-HMM.normal.TG.geno
2215b4b26ee9a1de82b8e24147c0a927 testF-HMM.normal.TG.ibd
e3d04214e6cd993a1c811c4863c0ae75 testF-HMM.normal.TG.indF
Expand All @@ -47,23 +47,20 @@ ff7b62494e2b93d581ece30f69f58fb8 testF-HMM.SIM.ind.txt
c0961d931e776b37256ae9807d20642b testF-HMM.SIM.path.gz
e2d8e19eab30f1277ba3d2f254b68b53 testF-HMM.SIM.pos.gz
9308e324bc321fc5901fcc5e86693b53 testF-HMM.TRUE.GL_CG.geno
eae74dc0f068c09a166099a4b220c947 testF-HMM.TRUE.GL_CG.ibd
ba176ac8e6f98a1676df0813dc9317e1 testF-HMM.TRUE.GL_CG.indF
a1de30fca216caeb7b101fbf73a76da1 testF-HMM.TRUE.GL.geno
9f8880579527ff497d81644c2c98dd01 testF-HMM.TRUE.GL_CG.ibd
e6d825dc155914ba6e9f936b0893ee3f testF-HMM.TRUE.GL_CG.indF
496df56ac6c657f98916994ea99e06e6 testF-HMM.TRUE.GL.geno
461a5d4dd239ca0f6b17d34f45ebba9e testF-HMM.TRUE.GL.ibd
96869c9bb47bd08ad5e312e47f160219 testF-HMM.TRUE.GL.indF
4836adda5d53c99a70f22450fe1d30c9 testF-HMM.TRUE.TG.geno
4bc08c20aca7b5227faf557190a44baf testF-HMM.TRUE.TG.ibd
395dd8b4e806db6107c0440c5a455f80 testF-HMM.TRUE.TG.indF
499556e587414e87de5d2846c8f053eb testF-HMM.TRUE.TG.indF
d4232b6c418c59185cdf29fcdca5ff44 testF.ibd
783472ff6c0f24cf751c470a01427cab testF.indF
a5c71036fa8a37c777e39b347bbceb96 testF.indF.covar
fb56775924907f0621d70adc3e3bba30 testF.indF.geno
c4ec5559c448513ed0b88530d96a408d testF.indF.mafs.gz
15ceb4e0c707ec5c1144b2af35c9df26 testF.indF.saf
f0316e26f22d9bdc3c65c0d7524c11fe testF.indF.saf.idx
29505bbbb7887ebff8a6abe777dcc05c testF.indF.saf.pos.gz
2c53be7c0ffb1d46227e5211f7d40216 testF.indF.saf_sum
1c93b42615ead6513c9efeb714b52cbf testF.indF.stat
e7b2342d017bb45e8e872e9f8f1117ef testF.mafs.gz
335eb65dd8d981a8a34db3bb65111576 testF.indF.geno.gz
121b9eafcd7f4e7c59f1dbd97ea98d6a testF.indF.mafs.gz
502bf22705bdc54ed5a2cad0ad31460a testF.indF.saf.gz
cea9130a37aeb9287c47be48c1df0c96 testF.indF.saf.idx
6f83eaaf65f997d906766432694ba10d testF.indF.saf.pos.gz
aa3d17b6088fd53a7814d5a2a343134d testF.mafs.gz
109d81d0dc8630beeebce1fbed4700b6 testF.pos
16 changes: 0 additions & 16 deletions examples/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,6 @@ $ANGSD/angsd -glf $SIM_DATA/testF.glf.gz -fai $SIM_DATA/testAF.ANC.fas.fai -nInd



##### Calculate covariance matrix
gunzip -f testF.indF.geno.gz testF.indF.saf.gz
$NGSPOPGEN/ngsCovar -probfile testF.indF.geno -nind $N_IND -nsites $N_SITES -call 0 -sfsfile testF.indF.saf -norm 0 -outfile testF.indF.covar



##### Calculate population genetics statistics
$NGSPOPGEN/ngsStat -npop 1 -postfiles testF.indF.saf -nsites $N_SITES -iswin 1 -nind $N_IND -block_size $N_SITES -outfile testF.indF.stat



##### SFS
hexdump -v -s 8 -e "$((2*N_IND+1))/4 \"%.10g\t\"\"\n\"" testF.indF.saf | perl -na -e '$sum=0; $sum+=exp($_) for @F; next if($sum==0); for $i (0..$#F){$frq[$i]+=exp($F[$i])/$sum}; END{$tsum+=$_ for @frq; $_/=$tsum for @frq; print join("\t",@frq)."\n"}' > testF.indF.saf_sum



##### Check MD5
rm -f *.arg
TMP=`mktemp --suffix .ngsF-HMM`
Expand Down
28 changes: 20 additions & 8 deletions ngsF-HMM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,22 @@ int main (int argc, char** argv) {
printf("> Sites coordinates\n");
}
if(pars->in_pos){
pars->pos_dist = read_pos(pars->in_pos, pars->n_sites);
// Temp FIX since ngsF-HMM uses 1-based arrays
double *pos_dist = read_dist(pars->in_pos, 0, pars->n_sites);
pars->pos_dist = init_ptr(pars->n_sites+1, (double) INFINITY);
memcpy(pars->pos_dist+1, pos_dist, pars->n_sites * sizeof(double));
free_ptr((void*) pos_dist);
}else{
pars->pos_dist = init_ptr(pars->n_sites+1, (double) INFINITY);
}
// Convert position distances to Mb
for(uint64_t s = 1; s <= pars->n_sites; s++)
pars->pos_dist[s] /= 1e6;
if(pars->verbose >= 7){
for(uint64_t s = 1; s <= min(10,pars->n_sites); s++){
printf("%f\n", pars->pos_dist[s]);
}
}

// Read data from GENO file
if(pars->verbose >= 1)
Expand All @@ -93,15 +102,15 @@ int main (int argc, char** argv) {
for(uint64_t s = 1; s <= pars->n_sites; s++){
// Call genotypes
if(pars->call_geno)
call_geno(pars->geno_lkl[i][s], N_GENO);
call_geno(pars->geno_lkl[i][s], N_GENO);
/*
// Ensure minimum GL allowed
if( pars->geno_lkl[i][s][array_min_pos(pars->geno_lkl[i][s], N_GENO)] < log(0.001) ){
for(uint64_t g = 0; g < N_GENO; g++)
if(pars->geno_lkl[i][s][g] < log(0.001))
pars->geno_lkl[i][s][g] = log(0.001);
// Re-normalize GL
post_prob(pars->geno_lkl[i][s], pars->geno_lkl[i][s], NULL, N_GENO);
for(uint64_t g = 0; g < N_GENO; g++)
if(pars->geno_lkl[i][s][g] < log(0.001))
pars->geno_lkl[i][s][g] = log(0.001);
// Re-normalize GL
post_prob(pars->geno_lkl[i][s], pars->geno_lkl[i][s], NULL, N_GENO);
}
*/
post_prob(pars->geno_lkl[i][s], pars->geno_lkl[i][s], NULL, N_GENO);
Expand All @@ -112,14 +121,17 @@ int main (int argc, char** argv) {
/////////////////////////////////////////////
// Declare and initialize output variables //
/////////////////////////////////////////////
if(pars->verbose >= 6)
printf("> Init output\n");
init_output(pars);



//////////////////
// Analyze Data //
//////////////////
// Create thread pool
if(pars->verbose >= 6)
printf("> Create threadpool\n");
if( (pars->thread_pool = threadpool_create(pars->n_threads, 2*pars->n_ind, 0)) == NULL )
error(__FUNCTION__, "failed to create thread pool!");

Expand Down
11 changes: 8 additions & 3 deletions parse_args.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,9 @@ int init_output(params* pars) {
if(pars->freq_est == 1 || s == 1){
pars->freq[s] = est_maf(pars->n_ind, pars->geno_lkl_s[s], (double) 0);
}else if(pars->freq_est == 2){
haplo_freq(hap_freq, pars->geno_lkl_s[s-1], pars->geno_lkl_s[s], pars->freq[s-1], pars->freq[s], pars->n_ind);
double loglkl;
uint64_t n_iter, n_ind_data;
n_iter = haplo_freq(hap_freq, &loglkl, &n_ind_data, pars->geno_lkl_s[s-1], pars->geno_lkl_s[s], pars->freq[s-1], pars->freq[s], pars->n_ind, false);
pars->freq[s] = hap_freq[1] + hap_freq[3];
}

Expand Down Expand Up @@ -370,8 +372,11 @@ int init_output(params* pars) {
pars->e_prob = init_ptr(pars->n_ind, pars->n_sites+1, N_STATES, (double) 0);

for(uint64_t s = 1; s <= pars->n_sites; s++){
if(pars->e_prob_calc == 2)
haplo_freq(hap_freq, pars->geno_lkl_s[s-1], pars->geno_lkl_s[s], pars->freq[s-1], pars->freq[s], pars->n_ind);
if(pars->e_prob_calc == 2) {
double loglkl;
uint64_t n_iter, n_ind_data;
n_iter = haplo_freq(hap_freq, &loglkl, &n_ind_data, pars->geno_lkl_s[s-1], pars->geno_lkl_s[s], pars->freq[s-1], pars->freq[s], pars->n_ind, false);
}

for(uint64_t i = 0; i < pars->n_ind; i++)
for(uint64_t k = 0; k < N_STATES; k++)
Expand Down
Loading

0 comments on commit a088458

Please sign in to comment.