diff --git a/global/src/sparse.array.c b/global/src/sparse.array.c index 4899e7a82..64795f28a 100644 --- a/global/src/sparse.array.c +++ b/global/src/sparse.array.c @@ -134,6 +134,11 @@ void sai_terminate_sparse_arrays() Integer pnga_sprs_array_create(Integer idim, Integer jdim, Integer type, Integer size) { Integer i, hdl, s_a; + int local_sync_begin,local_sync_end; + + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(pnga_pgroup_get_default()); GAvalidtypeM(pnga_type_f2c((int)type)); if (idim <= 0 || jdim <= 0) pnga_error("(ga_sprs_array_create) Invalid array dimenensions",0); @@ -157,6 +162,7 @@ Integer pnga_sprs_array_create(Integer idim, Integer jdim, Integer type, Integer break; } } + if (local_sync_end) pnga_pgroup_sync(pnga_pgroup_get_default()); return s_a; } @@ -261,6 +267,7 @@ void find_lims(Integer dim, Integer proc, Integer nproc, Integer *lo, Integer *h logical pnga_sprs_array_assemble(Integer s_a) { Integer hdl = GA_OFFSET + s_a; + int local_sync_begin,local_sync_end; Integer lo, hi, ld; Integer i,j,ilo,ihi,jlo,jhi; int64_t *offset; @@ -294,6 +301,10 @@ logical pnga_sprs_array_assemble(Integer s_a) Integer *row_nnz; Integer nnz; + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[hdl].grp); + /* set variable that distinguishes between long and ints for indices */ if (SPA[hdl].idx_size == sizeof(int64_t)) { longidx = 1; @@ -861,6 +872,7 @@ logical pnga_sprs_array_assemble(Integer s_a) free(map); SPA[hdl].ready = 1; + if (local_sync_end) pnga_pgroup_sync(SPA[hdl].grp); return ret; } @@ -1150,9 +1162,9 @@ void pnga_sprs_array_matvec_multiply(Integer s_a, Integer g_a, Integer g_v) local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(s_grp); /* Check that g_hdl and v_hdl are both vectors and that sizes * match */ - if (local_sync_begin) pnga_sync(); pnga_inquire(g_a, &atype, &arank, dims); adim = dims[0]; pnga_inquire(g_v, &vtype, &vrank, dims); @@ -1288,7 +1300,7 @@ void pnga_sprs_array_matvec_multiply(Integer s_a, Integer g_a, Integer g_v) khi = ihi + 1; pnga_acc(g_v,&klo,&khi,vsum,&one,&one_r); } - if (local_sync_end) pnga_sync(); + if (local_sync_end) pnga_pgroup_sync(s_grp); free(vsum); } @@ -1303,6 +1315,10 @@ logical pnga_sprs_array_destroy(Integer s_a) { Integer hdl = GA_OFFSET + s_a; Integer ret = 1; + int local_sync_begin,local_sync_end; + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[hdl].grp); if (SPA[hdl].ready) { if (!pnga_destroy(SPA[hdl].g_data)) ret = 0; if (!pnga_destroy(SPA[hdl].g_i)) ret = 0; @@ -1330,6 +1346,7 @@ logical pnga_sprs_array_destroy(Integer s_a) } SPA[hdl].active = 0; SPA[hdl].ready = 0; + if (local_sync_end) pnga_pgroup_sync(SPA[hdl].grp); return ret; } @@ -1348,6 +1365,7 @@ void pnga_sprs_array_export(Integer s_a, const char* file) Integer hdl = GA_OFFSET + s_a; int size = SPA[hdl].size; int type = SPA[hdl].type; + int local_sync_begin,local_sync_end; char frmt[32]; char *cptr; int offset; @@ -1377,6 +1395,10 @@ void pnga_sprs_array_export(Integer s_a, const char* file) int *blksize; int64_t nnz; + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[hdl].grp); + /* find the total number of nonzero elements on each process */ nnz = 0; for (iproc = 0; iproc0) offset[i] = offset[i-1]+tmp[i-1]; map[i] = offset[i]+1; /* 1-based indexing for map array */ } + pnga_mask_sync(local_sync_begin,local_sync_end); SPA[hdl_c].g_j = pnga_create_handle(); + pnga_mask_sync(local_sync_begin,local_sync_end); SPA[hdl_c].g_data = pnga_create_handle(); nnz = totalsize; if (longidx) { @@ -2737,7 +2805,9 @@ Integer pnga_sprs_array_matmat_multiply(Integer s_a, Integer s_b) pnga_set_irreg_distr(SPA[hdl_c].g_j,map,&nprocs); pnga_set_pgroup(SPA[hdl_c].g_data,SPA[hdl_c].grp); pnga_set_irreg_distr(SPA[hdl_c].g_data,map,&nprocs); + pnga_mask_sync(local_sync_begin,local_sync_end); pnga_allocate(SPA[hdl_c].g_j); + pnga_mask_sync(local_sync_begin,local_sync_end); pnga_allocate(SPA[hdl_c].g_data); free(map); free(tmp); @@ -2937,10 +3007,12 @@ Integer pnga_sprs_array_matmat_multiply(Integer s_a, Integer s_b) * values for block * blkend: last index g_j and g_data for block */ + pnga_mask_sync(local_sync_begin,local_sync_end); g_blk = pnga_create_handle(); pnga_set_pgroup(g_blk,SPA[hdl_c].grp); pnga_set_data(g_blk,three,dims,C_LONG); pnga_set_chunk(g_blk,chunk); + pnga_mask_sync(local_sync_begin,local_sync_end); if (!pnga_allocate(g_blk)) { pnga_error("(pnga_sprs_matmat_multiply) Failure allocating g_blk",0); } @@ -3042,6 +3114,7 @@ Integer pnga_sprs_array_get_column(Integer s_a, Integer icol) { Integer g_v; Integer handle = s_a + GA_OFFSET; + int local_sync_begin,local_sync_end; Integer type = SPA[handle].type; Integer one = 1; Integer idim = SPA[handle].idim; @@ -3055,6 +3128,10 @@ Integer pnga_sprs_array_get_column(Integer s_a, Integer icol) cplus[0] = '+'; cplus[1] = '\0'; + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[handle].grp); + /* Create map array containing row offsets */ hi = SPA[handle].ihi; lo = SPA[handle].ilo; @@ -3071,11 +3148,14 @@ Integer pnga_sprs_array_get_column(Integer s_a, Integer icol) for (i=1; i= lo) { Integer ilo = lo+1; @@ -3135,7 +3215,7 @@ Integer pnga_sprs_array_get_column(Integer s_a, Integer icol) } free(map); free(size); - pnga_pgroup_sync(SPA[handle].grp); + if (local_sync_end) pnga_pgroup_sync(SPA[handle].grp); return g_v; } @@ -3211,6 +3291,7 @@ Integer pnga_sprs_array_create_from_dense(Integer g_a, Integer idx_size, Integer trans) { Integer handle = g_a + GA_OFFSET, s_a; + int local_sync_begin,local_sync_end; Integer i, j, idx, jdx, lo[2], hi[2], ld; Integer idim, jdim; int grp = GA[handle].p_handle; @@ -3218,6 +3299,10 @@ Integer pnga_sprs_array_create_from_dense(Integer g_a, Integer idx_size, void *vptr; int type = GA[handle].type; + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(grp); + /* Check dimension */ if (GA[handle].ndim != 2) { pnga_error("(ga_sprs_array_create_from_dense) global array" @@ -3278,10 +3363,12 @@ Integer pnga_sprs_array_create_from_dense(Integer g_a, Integer idx_size, " encountered",type); } } + pnga_mask_sync(local_sync_begin,local_sync_end); if (!pnga_sprs_array_assemble(s_a)) { pnga_error("(ga_sprs_array_create_from_dense) failed to create" " sparse array from dense array",0); } + if (local_sync_end) pnga_pgroup_sync(grp); return s_a; } #undef SPRS_REAL_FILTER_M @@ -3371,6 +3458,7 @@ Integer pnga_sprs_array_sprsdns_multiply(Integer s_a, Integer g_b, Integer trans Integer hdl_a = s_a+GA_OFFSET; Integer hdl_b = g_b+GA_OFFSET; Integer hdl_c; + int local_sync_begin,local_sync_end; Integer elemsize; Integer idim, jdim; Integer i, j, k, l, m, n, nn; @@ -3390,6 +3478,11 @@ Integer pnga_sprs_array_sprsdns_multiply(Integer s_a, Integer g_b, Integer trans Integer *row_nnz; int64_t max_nnz; Integer *map, *size; + + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[hdl_a].grp); + /* Do some initial verification to see if matrix multiply is possible */ if (SPA[hdl_a].type != GA[hdl_b].type) { pnga_error("(ga_sprs_array_sprsdns_multiply) types of sparse matrices" @@ -3422,6 +3515,7 @@ Integer pnga_sprs_array_sprsdns_multiply(Integer s_a, Integer g_b, Integer trans jdim = GA[hdl_b].dims[1]; } /* Construct product array C*/ + pnga_mask_sync(local_sync_begin,local_sync_end); g_c = pnga_create_handle(); hdl_c = g_c + GA_OFFSET; if (trans) { @@ -3460,10 +3554,12 @@ Integer pnga_sprs_array_sprsdns_multiply(Integer s_a, Integer g_b, Integer trans } pnga_set_irreg_distr(g_c,map,blocks); pnga_set_pgroup(g_c,SPA[hdl_a].grp); + pnga_mask_sync(local_sync_begin,local_sync_end); if (!pnga_allocate(g_c)) { pnga_error("(ga_sprs_array_sprsdns_multiply) could not allocate" " product array C",0); } + pnga_mask_sync(local_sync_begin,local_sync_end); pnga_zero(g_c); /* loop over processors in row to get target block and then loop over * processors to get all block pairs that contribute to target block. @@ -3599,6 +3695,7 @@ Integer pnga_sprs_array_sprsdns_multiply(Integer s_a, Integer g_b, Integer trans free(buf_b); } } + if (local_sync_end) pnga_pgroup_sync(SPA[hdl_a].grp); return g_c; } #undef REAL_SPRSDNS_MULTIPLY_M @@ -3710,6 +3807,7 @@ Integer pnga_sprs_array_dnssprs_multiply(Integer g_a, Integer s_b, Integer trans Integer hdl_a = g_a+GA_OFFSET; Integer hdl_b = s_b+GA_OFFSET; Integer hdl_c; + int local_sync_begin,local_sync_end; Integer elemsize; Integer idim, jdim; Integer i, j, k, l, m, n, nn; @@ -3729,6 +3827,11 @@ Integer pnga_sprs_array_dnssprs_multiply(Integer g_a, Integer s_b, Integer trans Integer *row_nnz; int64_t max_nnz; Integer *map, *size; + + local_sync_begin = _ga_sync_begin; local_sync_end = _ga_sync_end; + _ga_sync_begin = 1; _ga_sync_end=1; /*remove any previous masking*/ + if (local_sync_begin) pnga_pgroup_sync(SPA[hdl_b].grp); + /* Do some initial verification to see if matrix multiply is possible */ if (GA[hdl_a].type != SPA[hdl_b].type) { pnga_error("(ga_sprs_array_dnssprs_multiply) types of sparse matrices" @@ -3961,6 +4064,7 @@ Integer pnga_sprs_array_dnssprs_multiply(Integer g_a, Integer s_b, Integer trans free(buf_a); } } + if (local_sync_end) pnga_pgroup_sync(SPA[hdl_b].grp); return g_c; } #undef REAL_DNSSPRS_MULTIPLY_M