diff --git a/Jenkinsfile b/Jenkinsfile index 04bfddf..387cb79 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -41,11 +41,10 @@ AL4SAN steps { sh '''#!/bin/bash -el module purge - module load ecrc-extras - module load gcc/5.5.0 - module load mkl/2018-update-2 - module load openmpi/3.0.0-gcc-5.5.0 - module load parsec/master-gcc-5.5.0-mkl-openmpi-plasma-2.8.0 + module load gcc/7.2.0 + module load mkl/2020.0.166 + module load openmpi/3.0.0-gcc-7.2.0 + module load parsec/master-gcc-7.2.0-mkl-openmpi-plasma-3.0.0 mkdir -p build-parsec cd build-parsec diff --git a/README.md b/README.md index 221a479..d0aac4c 100644 --- a/README.md +++ b/README.md @@ -81,3 +81,24 @@ shared-memory systems, GPU-based nodes, distributed-memory hardware configuratio make test make install ``` +## References +1. AL4SAN: Abstraction Layer For Standardizing APIs of Task-Based Engines, +R. Alomairy, H. Ltaief, M. Abduljabbar & D. Keyes. IEEE Transactions on +Parallel and Distributed Systems, vol. 31, no. 11, pp. 2482-2495, 1 Nov. 2020, +doi: 10.1109/TPDS.2020.2992923. + +2. AL4SAN: Abstraction Layer For Standardizing APIs of Task-Based Engines, +R. Alomairy, H. Ltaief, M. Abduljabbar & D. Keyes. Poster at ISC High Performance +conference, 2020. Available at: https://www.youtube.com/watch?v=xgrQT-igXp8 + +## Acknowledgment +The authors would like to thank Yu Pei from Innovative Computing Laboratory at University of Tennessee for his helping to extend AL4SAN to support PaRSEC (DTD) runtime system in shared- and distributed- memory system. + +## Contact +Rabab Alomairy: rabab.omairy@kaust.edu.sa + +Hatem Ltaief: hatem.ltaief@kaust.edu.sa + +Mustafa Abduljabbar: mustafa.abduljabbar@kaust.edu.sa + + diff --git a/cmake_modules/al4san_cmake/modules/find/FindPARSEC.cmake b/cmake_modules/al4san_cmake/modules/find/FindPARSEC.cmake index 13c07a3..c72742b 100644 --- a/cmake_modules/al4san_cmake/modules/find/FindPARSEC.cmake +++ b/cmake_modules/al4san_cmake/modules/find/FindPARSEC.cmake @@ -648,13 +648,13 @@ mark_as_advanced(PARSEC_DIR_FOUND) include(FindPackageHandleStandardArgs) find_package_handle_standard_args(PARSEC DEFAULT_MSG PARSEC_LIBRARIES - PARSEC_parsec_ptgpp_BIN_DIR + #PARSEC_parsec_ptgpp_BIN_DIR PARSEC_WORKS) -if ( PARSEC_parsec_ptgpp_BIN_DIR ) - find_program(PARSEC_PARSEC_PTGPP - NAMES parsec_ptgpp - HINTS ${PARSEC_parsec_ptgpp_BIN_DIR}) -else() - set(PARSEC_PARSEC_PTGPP "PARSEC_PARSEC_PTGPP-NOTFOUND") -endif() +#if ( PARSEC_parsec_ptgpp_BIN_DIR ) +# find_program(PARSEC_PARSEC_PTGPP +# NAMES parsec_ptgpp +# HINTS ${PARSEC_parsec_ptgpp_BIN_DIR}) +#else() +# set(PARSEC_PARSEC_PTGPP "PARSEC_PARSEC_PTGPP-NOTFOUND") +#endif() diff --git a/control/al4san_descriptor.h b/control/al4san_descriptor.h index ce0e58f..8dafbcd 100644 --- a/control/al4san_descriptor.h +++ b/control/al4san_descriptor.h @@ -229,61 +229,61 @@ inline static int al4san_desc_islocal( const AL4SAN_desc_t *A, int m, int n ) #if defined(AL4SAN_SCHED_STARPU) #define AL4SAN_BEGIN_ACCESS_DECLARATION { \ - unsigned __al4san_need_submit = 0; \ + unsigned __al4san_starpu_need_submit = 0; \ AL4SAN_STARPU_BEGIN_ACCESS_DECLARATION #define AL4SAN_ACCESS_R3(A, Am, An) do { \ - if (al4san_desc_islocal(A, Am, An)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, An)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_R(A, Am, An); \ } while(0) #define AL4SAN_ACCESS_R2(A, Am) do { \ - if (al4san_desc_islocal(A, Am, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_R(A, Am, 0); \ } while(0) #define AL4SAN_ACCESS_R1(A) do { \ - if (al4san_desc_islocal(A, 0, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, 0, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_R(A, 0, 0); \ } while(0) #define AL4SAN_ACCESS_W3(A, Am, An) do { \ - if (al4san_desc_islocal(A, Am, An)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, An)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_W(A, Am, An); \ } while(0) #define AL4SAN_ACCESS_W2(A, Am) do { \ - if (al4san_desc_islocal(A, Am, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_W(A, Am, 0); \ } while(0) #define AL4SAN_ACCESS_W1(A) do { \ - if (al4san_desc_islocal(A, 0, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, 0, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_W(A, 0, 0); \ } while(0) #define AL4SAN_ACCESS_RW3(A, Am, An) do { \ - if (al4san_desc_islocal(A, Am, An)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, An)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_RW(A, Am, An); \ } while(0) #define AL4SAN_ACCESS_RW2(A, Am) do { \ - if (al4san_desc_islocal(A, Am, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, Am, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_RW(A, Am, 0); \ } while(0) #define AL4SAN_ACCESS_RW1(A) do { \ - if (al4san_desc_islocal(A, 0, 0)) __al4san_need_submit = 1; \ + if (al4san_desc_islocal(A, 0, 0)) __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_ACCESS_RW(A, 0, 0); \ } while(0) #define AL4SAN_RANK_CHANGED(rank) do {\ - __al4san_need_submit = 1; \ + __al4san_starpu_need_submit = 1; \ AL4SAN_STARPU_RANK_CHANGED(rank); \ } while (0) #define AL4SAN_END_ACCESS_DECLARATION \ AL4SAN_STARPU_END_ACCESS_DECLARATION; \ - if (!__al4san_need_submit) return; \ + if (!__al4san_starpu_need_submit) return; \ } #else #define AL4SAN_BEGIN_ACCESS_DECLARATION diff --git a/control/async.c b/control/async.c index 887cf6d..906a1b1 100644 --- a/control/async.c +++ b/control/async.c @@ -121,7 +121,7 @@ int al4san_sequence_wait(AL4SAN_context_t *al4san, AL4SAN_sequence_t *sequence) #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - AL4SAN_Runtime_sequence_wait( al4san, sequence ); + AL4SAN_Openmp_sequence_wait( al4san, sequence ); #endif return AL4SAN_SUCCESS; diff --git a/control/control.c b/control/control.c index da3668d..fc5a8c0 100644 --- a/control/control.c +++ b/control/control.c @@ -155,19 +155,19 @@ AL4SAN_context_t* AL4SAN_InitPar(char *runtime, int ncpus, int ncudas, int nthr #ifdef AL4SAN_SCHED_QUARK if(al4san->scheduler==0) - al4san->my_mpi_rank = AL4SAN_Quark_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Quark_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_STARPU if(al4san->scheduler==1) - al4san->my_mpi_rank = AL4SAN_Starpu_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Starpu_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_PARSEC if(al4san->scheduler==2) - al4san->my_mpi_rank = AL4SAN_Parsec_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Parsec_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - al4san->my_mpi_rank = AL4SAN_Openmp_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Openmp_comm_size( al4san ); #endif //al4san->mpi_comm_size = AL4SAN_Runtime_comm_size( al4san ); @@ -286,7 +286,7 @@ AL4SAN_context_t* AL4SAN_Switch_Runtime(char *runtime, int ncpus, int ncudas) #ifdef AL4SAN_SCHED_OPENMP if(strcmp(runtime, "Openmp")){ AL4SAN_Openmp_context_create(al4san); - AL4SAN_Openmp_init( al4san, ncpus, ncudas, nthreads_per_worker ); + AL4SAN_Openmp_init( al4san, ncpus, ncudas, -1 ); AL4SAN_Openmp_task_option_init(); } #endif @@ -312,19 +312,19 @@ AL4SAN_context_t* AL4SAN_Switch_Runtime(char *runtime, int ncpus, int ncudas) #ifdef AL4SAN_SCHED_QUARK if(al4san->scheduler==0) - al4san->my_mpi_rank = AL4SAN_Quark_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Quark_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_STARPU if(al4san->scheduler==1) - al4san->my_mpi_rank = AL4SAN_Starpu_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Starpu_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_PARSEC if(al4san->scheduler==2) - al4san->my_mpi_rank = AL4SAN_Parsec_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Parsec_comm_size( al4san ); #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - al4san->my_mpi_rank = AL4SAN_Openmp_comm_size( al4san ); + al4san->mpi_comm_size = AL4SAN_Openmp_comm_size( al4san ); #endif #endif @@ -641,7 +641,7 @@ int AL4SAN_Progress() #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - AL4SAN_Runtime_progress(al4san); + AL4SAN_Openmp_progress(al4san); #endif return AL4SAN_SUCCESS; @@ -857,7 +857,7 @@ void AL4SAN_Init_Processor_Grid(int p, int q){ al4sanctxt = al4san_context_self(); if (al4sanctxt == NULL) { - al4sab_error("AL4SAN_Init_Processor_Grid", "AL4SAN not initialized"); + al4san_error("AL4SAN_Init_Processor_Grid", "AL4SAN not initialized"); //return CHAMELEON_ERR_NOT_INITIALIZED; } diff --git a/control/descriptor.c b/control/descriptor.c index a0085d2..5688a54 100644 --- a/control/descriptor.c +++ b/control/descriptor.c @@ -2135,7 +2135,7 @@ void *AL4SAN_Vector_getaddr( const AL4SAN_desc_t *desc, int m) #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - return AL4SAN_Openmp_vector_getaddr( desc, m, n); + return AL4SAN_Openmp_vector_getaddr( desc, m); #endif } @@ -2182,7 +2182,7 @@ void *AL4SAN_Scaler_getaddr( const AL4SAN_desc_t *desc) #endif #ifdef AL4SAN_SCHED_OPENMP if(al4san->scheduler==3) - return AL4SAN_Openmp_scaler_getaddr( desc, m, n); + return AL4SAN_Openmp_scaler_getaddr( desc); #endif } /** diff --git a/example/potrf/Make.inc b/example/potrf/Make.inc index 06b8e4a..f5f211a 100644 --- a/example/potrf/Make.inc +++ b/example/potrf/Make.inc @@ -1,22 +1,53 @@ -CC =gcc +#CC =gcc +CC=clang LINK = $(CC) #LDFLAGS = -O3 -CFLAGS = -g -ldl -Wall -Wno-unused-function -DAL4SAN_USE_CUDA -DAL4SAN_CUDA_ASYNC1 - +#CFLAGS = -g -ldl -Wall -Wno-unused-function -DAL4SAN_USE_CUDA -DAL4SAN_CUDA_ASYNC1 +CFLAGS = -g -ldl -Wno-unused-parameter #F77BLASL= -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -fopenmp -lpthread -lm -lgfortran +#LIBS = -L/usr/lib -lm -llapacke -lblas -lhwloc -lpthread -L/opt/ecrc/cuda/9.0/lib64/ -lcuda -lcudart -lcublas -L/home/omairyrm/al4san-switch/al4san-dev/build/install/lib -lal4san -lal4san_quark -lal4san_starpu -lal4san_parsec -L/home/omairyrm/chameleon/build-starpu-gpu-gcc/install/lib -lchameleon -lchameleon_starpu -lcoreblas -lcudablas -L/home/omairyrm/quark -lquark -L/home/omairyrm/parsec/build/install/lib -lparsec -L/home/omairyrm/starpu-1.3.3.28Oct.2019/install/lib -lstarpu-1.3 + +##starpu mpi +#LIBS = -lpthread -L../../hwloc-install-mpi/lib/ -lhwloc -L/opt/ecrc/openmpi/3.0.0-gcc-7.2.0/ub16/lib -lmpi -L../../build-mpi/install/lib -lal4san -lal4san_starpu -L../../starpu-1.2-install-mpi/lib/ -lstarpu-1.2 -lstarpumpi-1.2 -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core + +#starpu +#LIBS = -lpthread -L../../hwloc-install/lib/ -lhwloc -L../../build/install/lib -lal4san -lal4san_starpu -L../../starpu-1.2-install/lib/ -lstarpu-1.2 -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core + +#parsec mpi +#LIBS = -lpthread -lhwloc -L/opt/ecrc/openmpi/3.0.0-gcc-7.2.0/ub16/lib -lmpi -L../../build-parsec2/installdir/lib/ -lal4san -lal4san_parsec -L/home/omairyrm/parsec/build/installdir/lib -lparsec -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#LIBS = -lpthread -lhwloc -L/opt/ecrc/openmpi/3.0.0-gcc-5.5.0/ub16/lib -lmpi -L../../build-parsec/install/lib -lal4san -lal4san_parsec -L/opt/ecrc/parsec/master-gcc-5.5.0-mkl-openmpi-plasma-2.8.0/ub16/lib -lparsec -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#quark + +#LIBS = -lhwloc -lpthread -L/home/omairyrm/quark -lquark -L../../build-quark/install/lib -lal4san -lal4san_quark -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#openmp +LIBS = -fopenmp -lhwloc -lpthread -L../../build-openmp/installdir/lib -lal4san -lal4san_openmp -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + + +#INCLUDES = -I/home/omairyrm/al4san-switch/al4san-dev/build/install/include -I/home/omairyrm/chameleon/build-starpu-gpu-gcc/install/include -I/opt/ecrc/starpu/1.3.1-gcc-5.5.0-mkl-openmpi-4.0.1/ub16/include/starpu/1.3/ +#starpu mpi +#INCLUDES = -I${MKLROOT}/include -I../../build-mpi/install/include -I../../starpu-1.2-install-mpi/include/starpu/1.2/ +#INCLUDES = -I${MKLROOT}/include -I../../build/install/include -I../../starpu-1.2-install/include/starpu/1.2/ -LIBS = -L/usr/lib -lm -llapacke -lblas -lhwloc -lpthread -L/opt/ecrc/cuda/9.0/lib64/ -lcuda -lcudart -lcublas -L/home/omairyrm/al4san-switch/al4san-dev/build/installdir/lib -lal4san -lal4san_quark -lal4san_starpu -lal4san_parsec -L/home/omairyrm/chameleon/build-starpu-gpu-gcc/installdir/lib -lchameleon -lchameleon_starpu -lcoreblas -lcudablas -L/home/omairyrm/quark -lquark -L/home/omairyrm/parsec/build/installdir/lib -lparsec -L/home/omairyrm/starpu-1.3.3.28Oct.2019/install/lib -lstarpu-1.3 +#INCLUDES = -I${MKLROOT}/include -I../../build/install/include -I../../starpu-1.3-install/include/starpu/1.3/ +#parsec mpi +#INCLUDES = -I${MKLROOT}/include -I../../build-parsec2/installdir/include -I/home/omairyrm/parsec/build/installdir/include/ +#quark +#INCLUDES = -I${MKLROOT}/include -I../../build-quark/install/include -I/home/omairyrm/quark -INCLUDES = -I/home/omairyrm/al4san-switch/al4san-dev/build/installdir/include -I/home/omairyrm/chameleon/build-starpu-gpu-gcc/installdir/include -I/home/omairyrm/quark/ -I/home/omairyrm/parsec/build/installdir/include -I/home/omairyrm/starpu-1.3.3.28Oct.2019/install/include/starpu/1.3/ -I/home/omairyrm/parsec/build/installdir/include +#openmp +INCLUDES = -I${MKLROOT}/include -I../../build-openmp/installdir/include LOADLIBES = $(LIBS) diff --git a/example/potrf/Makefile b/example/potrf/Makefile index 2624494..551859b 100644 --- a/example/potrf/Makefile +++ b/example/potrf/Makefile @@ -1,7 +1,15 @@ # -*- Makefile -*- include Make.inc -CSHOBJS = potrf.o codelets/codelet_dgemm.o codelets/codelet_dpotrf.o codelets/codelet_dtrsm.o codelets/codelet_dsyrk.o +CSHOBJS = potrf.o\ + compute/dplgsy.o\ + compute/pdplgsy.o\ + coreblas/core_dplgsy.o\ + codelets/codelet_dgemm.o\ + codelets/codelet_dpotrf.o\ + codelets/codelet_dtrsm.o\ + codelets/codelet_dsyrk.o\ + codelets/codelet_dplgsy.o\ all: potrf @@ -14,5 +22,5 @@ potrf: $(CSHOBJS) .PHONY: clean all potrf clean: - (rm -f *.o codelets/*.o potrf *~) + (rm -f *.o codelets/*.o compute/*.o coreblas/*.o potrf *~) diff --git a/example/potrf/codelets/codelet_dgemm.c b/example/potrf/codelets/codelet_dgemm.c index aca930e..66ae056 100644 --- a/example/potrf/codelets/codelet_dgemm.c +++ b/example/potrf/codelets/codelet_dgemm.c @@ -58,11 +58,15 @@ void gemm_cpu_func( AL4SAN_arg_list *al4san_arg) AL4SAN_Unpack_Arg(al4san_arg, &transA, &transB, &m, &n, &k, &alpha, &A, &lda, &B, &ldb, &beta, &C, &ldc); - CORE_dgemm(transA, transB, - m, n, k, - alpha, A, lda, - B, ldb, - beta, C, ldc); + + cblas_dgemm( + CblasColMajor, + (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + m, n, k, + (alpha), A, lda, + B, ldb, + (beta), C, ldc); + } #ifdef AL4SAN_USE_CUDA @@ -108,8 +112,8 @@ void gemm_cuda_func(AL4SAN_arg_list *al4san_arg) } #endif // defined(AL4SAN_USE_CUDA) -void INSERT_Task_dgemm( const AL4SAN_option_t *options, - cham_trans_t transA, cham_trans_t transB, +void Task_dgemm( const AL4SAN_option_t *options, + al4san_trans_t transA, al4san_trans_t transB, int m, int n, int k, int nb, double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, const AL4SAN_desc_t *B, int Bm, int Bn, int ldb, @@ -124,6 +128,12 @@ void INSERT_Task_dgemm( const AL4SAN_option_t *options, * @param[in] Parameter list of va_list type to represent data and the dependencies */ + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_R(B, Bm, Bn); + AL4SAN_ACCESS_RW(C, Cm, Cn); + AL4SAN_END_ACCESS_DECLARATION; + AL4SAN_Insert_Task(AL4SAN_TASK(gemm), (AL4SAN_option_t*)options, AL4SAN_VALUE, &transA, sizeof(int), AL4SAN_VALUE, &transB, sizeof(int), @@ -138,10 +148,13 @@ void INSERT_Task_dgemm( const AL4SAN_option_t *options, AL4SAN_VALUE, &beta, sizeof(double), AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(C, double, Cm, Cn), AL4SAN_DEP, AL4SAN_VALUE, &ldc, sizeof(int), +#ifdef AL4SAN_USE_CUDA + AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif AL4SAN_PRIORITY, options->priority, sizeof(int), AL4SAN_LABEL, "zgemm", sizeof(char), - AL4SAN_COLOR, "yellow", sizeof(char), +// AL4SAN_COLOR, "yellow", sizeof(char), ARG_END); } diff --git a/example/potrf/codelets/codelet_dplgsy.c b/example/potrf/codelets/codelet_dplgsy.c new file mode 100644 index 0000000..dc58406 --- /dev/null +++ b/example/potrf/codelets/codelet_dplgsy.c @@ -0,0 +1,83 @@ +/** + * + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + *** + * + * @brief Chameleon dplgsy AL4SAN codelet + * + * @version 1.0.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rabab Alomairy + * @date 2019-02-06 + * @precisions normal z -> c d s + * + */ + +#include "../potrf.h" + + +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument user function name +*/ + + +AL4SAN_TASK_CPU(plgsy, plgsy_cpu_fun) + +/* Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +void plgsy_cpu_fun(AL4SAN_arg_list *al4san_arg) +{ + double bump; + int m; + int n; + double *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + AL4SAN_Unpack_Arg(al4san_arg, &bump, &m, &n, &A, &lda, &bigM, &m0, &n0, &seed ); + CORE_dplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); +} + + +void TASK_dplgsy( const AL4SAN_option_t *options, + double bump, int m, int n, AL4SAN_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_W(A, Am, An); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(plgsy), (AL4SAN_option_t * )options, + AL4SAN_VALUE, &bump, sizeof(double), + AL4SAN_VALUE, &m, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_OUTPUT | AL4SAN_AFFINITY, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_VALUE, &bigM, sizeof(int), + AL4SAN_VALUE, &m0, sizeof(int), + AL4SAN_VALUE, &n0, sizeof(int), + AL4SAN_VALUE, &seed, sizeof(unsigned long long int), +// AL4SAN_PRIORITY, options->priority, sizeof(int), +// AL4SAN_LABEL, "dplgsy", sizeof(char), + ARG_END); +} + + diff --git a/example/potrf/codelets/codelet_dpotrf.c b/example/potrf/codelets/codelet_dpotrf.c index cc7628f..a5f2b4d 100644 --- a/example/potrf/codelets/codelet_dpotrf.c +++ b/example/potrf/codelets/codelet_dpotrf.c @@ -38,8 +38,8 @@ AL4SAN_TASK_CPU(potrf, potrf_cpu_fun) -void INSERT_Task_dpotrf( const AL4SAN_option_t *options, - cham_uplo_t uplo, int n, int nb, +void Task_dpotrf( const AL4SAN_option_t *options, + al4san_uplo_t uplo, int n, int nb, const AL4SAN_desc_t *A, int Am, int An, int lda, int iinfo ) { @@ -51,6 +51,10 @@ void INSERT_Task_dpotrf( const AL4SAN_option_t *options, * @param[in] Parameter list of va_list type to represent data and the dependencies */ + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_RW(A, Am, An); + AL4SAN_END_ACCESS_DECLARATION; + AL4SAN_Insert_Task(AL4SAN_TASK(potrf), (AL4SAN_option_t * )options, AL4SAN_VALUE, &uplo, sizeof(int), AL4SAN_VALUE, &n, sizeof(int), @@ -59,9 +63,10 @@ void INSERT_Task_dpotrf( const AL4SAN_option_t *options, AL4SAN_VALUE, &(options->sequence), sizeof(AL4SAN_sequence_t*), AL4SAN_VALUE, &(options->request), sizeof(AL4SAN_request_t*), AL4SAN_VALUE, &iinfo, sizeof(int), - AL4SAN_PRIORITY, options->priority, sizeof(int), +/* AL4SAN_PRIORITY, options->priority, sizeof(int), AL4SAN_LABEL, "zpotrf", sizeof(char), - AL4SAN_COLOR, "green", sizeof(char), +*/ +// AL4SAN_COLOR, "green", sizeof(char), ARG_END); } @@ -85,7 +90,14 @@ void potrf_cpu_fun(AL4SAN_arg_list *al4san_arg) */ AL4SAN_Unpack_Arg(al4san_arg, &uplo, &n, &A, &lda, &sequence, &request, &iinfo); - CORE_dpotrf(uplo, n, A, lda, &info); - + info = LAPACKE_dpotrf_work( + LAPACK_COL_MAJOR, + 'U', + n, A, lda ); +// printf("\n &uplo:%d, &n:%d, &A:%p, &lda:%d, &sequence:%p, &request:%p, &iinfo:%d\n", uplo, n, A, lda, sequence, request, iinfo); + if (info !=0){ + printf("\nMatrix is not SPD:%d\n", info); + exit(0); + } } diff --git a/example/potrf/codelets/codelet_dsyrk.c b/example/potrf/codelets/codelet_dsyrk.c index bed02fc..78f82cf 100644 --- a/example/potrf/codelets/codelet_dsyrk.c +++ b/example/potrf/codelets/codelet_dsyrk.c @@ -36,8 +36,8 @@ AL4SAN_TASK_CPU_GPU(syrk, syrk_cpu_func, syrk_cuda_func) -void INSERT_Task_dsyrk( const AL4SAN_option_t *options, - cham_uplo_t uplo, cham_trans_t trans, +void Task_dsyrk( const AL4SAN_option_t *options, + al4san_uplo_t uplo, al4san_trans_t trans, int n, int k, int nb, double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ) @@ -49,6 +49,11 @@ void INSERT_Task_dsyrk( const AL4SAN_option_t *options, * @param[in] options argument which holds sequence data sturcture * @param[in] Parameter list of va_list type to represent data and the dependencies */ + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_RW(C, Cm, Cn); + AL4SAN_END_ACCESS_DECLARATION; + AL4SAN_Insert_Task(AL4SAN_TASK(syrk), (AL4SAN_option_t * )options, AL4SAN_VALUE, &uplo, sizeof(int), AL4SAN_VALUE, &trans, sizeof(int), @@ -60,10 +65,12 @@ void INSERT_Task_dsyrk( const AL4SAN_option_t *options, AL4SAN_VALUE, &beta, sizeof(double), AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(C, double, Cm, Cn), AL4SAN_DEP, AL4SAN_VALUE, &ldc, sizeof(int), +#ifdef AL4SAN_USE_CUDA AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif AL4SAN_PRIORITY, options->priority, sizeof(int), AL4SAN_LABEL, "zsyrk", sizeof(char), - AL4SAN_COLOR, "red", sizeof(char), +// AL4SAN_COLOR, "red", sizeof(char), ARG_END); } @@ -89,10 +96,13 @@ void syrk_cpu_func(AL4SAN_arg_list *al4san_arg) AL4SAN_Unpack_Arg(al4san_arg, &uplo, &trans, &n, &k, &alpha, &A, &lda, &beta, &C, &ldc); - CORE_dsyrk(uplo, trans, + cblas_dsyrk( + CblasColMajor, + (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, n, k, - alpha, A, lda, - beta, C, ldc); + (alpha), A, lda, + (beta), C, ldc); + } #ifdef AL4SAN_USE_CUDA diff --git a/example/potrf/codelets/codelet_dtrsm.c b/example/potrf/codelets/codelet_dtrsm.c index 8f30e17..84fc3f3 100644 --- a/example/potrf/codelets/codelet_dtrsm.c +++ b/example/potrf/codelets/codelet_dtrsm.c @@ -35,8 +35,8 @@ AL4SAN_TASK_CPU_GPU(trsm, trsm_cpu_func, trsm_cuda_func) -void INSERT_Task_dtrsm( const AL4SAN_option_t *options, - cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, +void Task_dtrsm( const AL4SAN_option_t *options, + al4san_side_t side, al4san_uplo_t uplo, al4san_trans_t transA, al4san_diag_t diag, int m, int n, int nb, double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, const AL4SAN_desc_t *B, int Bm, int Bn, int ldb ) @@ -49,6 +49,12 @@ void INSERT_Task_dtrsm( const AL4SAN_option_t *options, * @param[in] Parameter list of va_list type to represent data and the dependencies */ + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_RW(B, Bm, Bn); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(trsm), (AL4SAN_option_t * )options, AL4SAN_VALUE, &side, sizeof(int), AL4SAN_VALUE, &uplo, sizeof(int), @@ -61,10 +67,11 @@ void INSERT_Task_dtrsm( const AL4SAN_option_t *options, AL4SAN_VALUE, &lda, sizeof(int), AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(B, double, Bm, Bn), AL4SAN_DEP, AL4SAN_VALUE, &ldb, sizeof(int), +#ifdef AL4SAN_USE_CUDA AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif AL4SAN_PRIORITY, options->priority, sizeof(int), AL4SAN_LABEL, "ztrsm", sizeof(char), - AL4SAN_COLOR, "yellow", sizeof(char), ARG_END); } @@ -90,11 +97,14 @@ void trsm_cpu_func(AL4SAN_arg_list *al4san_arg) AL4SAN_Unpack_Arg(al4san_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &A, &lda, &B, &ldb); - CORE_dtrsm(side, uplo, - transA, diag, + cblas_dtrsm( + CblasColMajor, + (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, m, n, - alpha, A, lda, + (alpha), A, lda, B, ldb); + } #ifdef AL4SAN_USE_CUDA void trsm_cuda_func(AL4SAN_arg_list *al4san_arg) diff --git a/example/potrf/compute/dplgsy.c b/example/potrf/compute/dplgsy.c new file mode 100644 index 0000000..05f7f4f --- /dev/null +++ b/example/potrf/compute/dplgsy.c @@ -0,0 +1,159 @@ +/** + * + * @file dplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon dplgsy wrappers + * + * @version 0.9.2 + * @comment This file is a copy of dplgsy.c, + * wich has been automatically generated + * from Plasma 2.5.0 for AL4SAN 0.9.2 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rade Mathis + * @author Florent Pruvost + * @date 2014-11-16 + * @generated from /home/omairyrm/modified-al4san/chameleon/compute/zplgsy.c, normal z -> d, Sun Jun 14 16:04:55 2020 + * + */ + +#include "../potrf.h" + + +/** + ******************************************************************************** + * + * @ingroup double_Tile + * + * AL4SAN_dplgsy_Tile - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + * Tile equivalent of AL4SAN_dplgsy(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] bump + * The value to add to the diagonal to be sure + * to have a positive definite matrix. + * + * @param[in] uplo + * The half of the matrix that will be generated. + * + * @param[in] A + * On exit, The random symmetric matrix A generated. + * + * @param[in] seed + * The seed used in the random generation. + * + ******************************************************************************* + * + * @retval AL4SAN_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa AL4SAN_dplgsy + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_cplgsy_Tile + * @sa AL4SAN_dplgsy_Tile + * @sa AL4SAN_splgsy_Tile + * @sa AL4SAN_dplgsy_Tile + * + */ +int dplgsy_Tile( double bump, al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed ) +{ + AL4SAN_context_t *chamctxt; + AL4SAN_sequence_t *sequence = NULL; + AL4SAN_request_t request = AL4SAN_REQUEST_INITIALIZER; + int status; + + chamctxt = al4san_context_self(); + if (chamctxt == NULL) { + al4san_fatal_error("dplgsy_Tile", "AL4SAN not initialized"); + return AL4SAN_ERR_NOT_INITIALIZED; + } + sequence = AL4SAN_Sequence_Create(); + + dplgsy_Tile_Async( bump, uplo, A, seed, sequence, &request ); + + AL4SAN_Desc_Flush( A, sequence ); + + + AL4SAN_Sequence_Wait(sequence); + status = sequence->status; + + AL4SAN_Sequence_Destroy( sequence ); + + + return status; +} + +/** + ******************************************************************************** + * + * @ingroup double_Tile_Async + * + * AL4SAN_dplgsy_Tile_Async - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + * Non-blocking equivalent of AL4SAN_dplgsy_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa AL4SAN_dplgsy + * @sa AL4SAN_dplgsy_Tile + * @sa AL4SAN_cplgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_splgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * + */ +int dplgsy_Tile_Async( double bump, + al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, + AL4SAN_request_t *request ) +{ + AL4SAN_context_t *al4sanctxt; + + al4sanctxt = al4san_context_self(); + if (al4sanctxt == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "AL4SAN not initialized"); + return AL4SAN_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "NULL sequence"); + return AL4SAN_ERR_UNALLOCATED; + } + if (request == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "NULL request"); + return AL4SAN_ERR_UNALLOCATED; + } + /* Check sequence status */ + + pdplgsy( bump, uplo, A, seed, sequence, request ); + + return AL4SAN_SUCCESS; +} + diff --git a/example/potrf/compute/pdplgsy.c b/example/potrf/compute/pdplgsy.c new file mode 100644 index 0000000..31c39e3 --- /dev/null +++ b/example/potrf/compute/pdplgsy.c @@ -0,0 +1,97 @@ +/** + * + * @file pdplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon dplgsy parallel algorithm + * + * @version 0.9.2 + * @comment This file is a copy of pdplgsy.c, + wich has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 0.9.2 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rade Mathis + * @author Florent Pruvost + * @date 2014-11-16 + * @generated from /home/omairyrm/modified-chameleon/chameleon/compute/pzplgsy.c, normal z -> d, Sun Jun 14 16:04:55 2020 + * + */ +#include "../potrf.h" +#define A(m,n) A, m, n +#define BLKLDD(A, k) A->get_blkldd( A, k ) + +/** + * pdplgsy - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + */ +void pdplgsy( double bump, al4san_uplo_t uplo, AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, AL4SAN_request_t *request ) +{ + AL4SAN_context_t *chamctxt; + AL4SAN_option_t options; + + int m, n; + int ldam; + int tempmm, tempnn; + + chamctxt = al4san_context_self(); + + AL4SAN_Options_Init(&options, sequence, request); + + for (m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + + /* + * Al4sanLower + */ + if (uplo == Al4sanLower) { + for (n = 0; n <= m; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + options.priority = m + n; + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + /* + * Al4sanUpper + */ + else if (uplo == Al4sanUpper) { + for (n = m; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + options.priority = m + n; + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + /* + * Al4sanUpperLower + */ + else { + for (n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + } + AL4SAN_Options_Finalize(&options); + +} diff --git a/example/potrf/coreblas/core_dplgsy.c b/example/potrf/coreblas/core_dplgsy.c new file mode 100644 index 0000000..0924190 --- /dev/null +++ b/example/potrf/coreblas/core_dplgsy.c @@ -0,0 +1,147 @@ +/** + * + * @file core_dplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_dplgsy CPU kernel + * + * @version 0.9.2 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 0.9.2 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2014-11-16 + * @generated from /home/omairyrm/al4san-dev/modified-chameleon/chameleon/coreblas/compute/core_zplgsy.c, normal z -> d, Sat Jun 27 18:00:10 2020 + * + */ +#include "../potrf.h" + +/* + Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix + generating threads only read Rnd64seed. It is safe to set Rnd64seed before + and after any calls to create_tile(). The only problem can be caused if + Rnd64seed is changed during the matrix generation time. + */ + +//static unsigned long long int Rnd64seed = 100; +#define Rnd64_A 6364136223846793005ULL +#define Rnd64_C 1ULL +#define RndF_Mul 5.4210108624275222e-20f +#define RndD_Mul 5.4210108624275222e-20 + +#if defined(PRECISION_z) || defined(PRECISION_c) +#define NBELEM 2 +#else +#define NBELEM 1 +#endif + +static unsigned long long int +Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { + unsigned long long int a_k, c_k, ran; + int i; + + a_k = Rnd64_A; + c_k = Rnd64_C; + + ran = seed; + for (i = 0; n; n >>= 1, i++) { + if (n & 1) + ran = a_k * ran + c_k; + c_k *= (a_k + 1); + a_k *= a_k; + } + + return ran; +} + + +// CORE_dplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. + +void CORE_dplgsy( double bump, int m, int n, double *A, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + double *tmp = A; + int64_t i, j; + unsigned long long int ran, jump; + + jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; + + /* + * Tile diagonal + */ + if ( m0 == n0 ) { + for (j = 0; j < n; j++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (i = j; i < m; i++) { + *tmp = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + tmp++; + } + tmp += (lda - i + j + 1); + jump += bigM + 1; + } + + for (j = 0; j < n; j++) { + A[j+j*lda] += bump; + + for (i=0; i n0 ) { + for (j = 0; j < n; j++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (i = 0; i < m; i++) { + *tmp = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + tmp++; + } + tmp += (lda - i); + jump += bigM; + } + } + /* + * Upper part + */ + else if ( m0 < n0 ) { + /* Overwrite jump */ + jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; + + for (i = 0; i < m; i++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (j = 0; j < n; j++) { + A[j*lda+i] = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + A[j*lda+i] += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + } + jump += bigM; + } + } +} diff --git a/example/potrf/multiplerun.sh b/example/potrf/multiplerun.sh new file mode 100644 index 0000000..b68c097 --- /dev/null +++ b/example/potrf/multiplerun.sh @@ -0,0 +1,44 @@ +#!/bin/bash + +PROGRAM=potrf + +export IFS=";" + +#THREADS="72" +#MSIZES="2048" +#BSIZES="256" + +#for MS in $MSIZES; do + #for BS in $BSIZES; do + # for thread in $THREADS; do + # NX_THREADS=$thread ./$PROGRAM $MS $BS 0 + # done + # done +#done + +#MSIZES="1024;2048;4096;6144;8192;10240;12288;14336;16384;18432;20480;22528;24576;26624;28672;30720;32768;34816;36864;38912;40960" +MSIZES2="51200;61440;71680;81920;102400;122880;143360" +MSIZES1="20480;30720;40960" +BSIZES1="256;512;768;1024" +BSIZES2="512;768;1024;1280;1536;1792" +REPEAT="1;2;3" +MSIZES3="153600;163840;174080;184320;194560;204800" +MSIZES4="163840" + + +for MS in $MSIZES1; do + for (( BS=256; BS<=1024; BS=BS+256)); do + for RP in $REPEAT ;do + ./$PROGRAM --n=$MS --nrhs=$MS --nb=$BS --threads=55 --gpus=0 --p=1 --q=1 --runtime="Parsec" + done + done +done + + +for MS in $MSIZES2; do + for (( BS=512; BS<=2048; BS=BS+256)); do + for RP in $REPEAT ;do + ./$PROGRAM --n=$MS --nrhs=$MS --nb=$BS --threads=55 --gpus=0 --p=1 --q=1 --runtime="Parsec" + done + done +done diff --git a/example/potrf/parsec.log b/example/potrf/parsec.log new file mode 100644 index 0000000..0779938 --- /dev/null +++ b/example/potrf/parsec.log @@ -0,0 +1,2281 @@ +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.477 1938.42 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.492 1919.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.507 1900.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.900 1507.28 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.908 1500.52 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.923 1489.19 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.832 1563.29 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.889 1516.16 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.801 1589.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.043 1401.73 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.102 1362.05 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.061 1389.36 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.130 1883.71 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.138 1880.96 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.128 1884.64 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.375 1515.90 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.479 1491.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.214 1555.27 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.677 1702.37 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.575 1733.58 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.654 1709.21 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 6.061 1594.58 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 6.068 1592.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 6.004 1609.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.234 1872.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.219 1874.77 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.231 1872.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.663 1562.25 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.752 1552.82 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.540 1575.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 13.185 1737.34 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 13.390 1710.72 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 13.086 1750.58 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.489 1698.20 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.562 1689.09 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.290 1723.67 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 28.777 1554.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 27.981 1598.99 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 28.089 1592.82 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.923 1725.89 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.631 1745.59 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.355 1764.53 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 25.868 1729.56 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 25.267 1770.69 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 25.737 1738.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.611 1817.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.549 1822.49 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.852 1800.24 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 25.051 1785.98 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 24.713 1810.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 25.305 1768.03 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 24.976 1791.35 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 25.434 1759.05 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 24.492 1826.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 26.406 1694.31 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 26.637 1679.66 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 26.955 1659.82 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 48.317 1600.10 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 48.301 1600.62 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 48.197 1604.05 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 43.186 1790.21 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 42.731 1809.26 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 42.743 1808.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 43.841 1763.45 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 44.470 1738.52 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 44.241 1747.50 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 43.096 1793.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 43.152 1791.61 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 42.632 1813.45 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 41.577 1859.48 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 40.800 1894.90 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 40.497 1909.05 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 41.536 1861.30 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 40.852 1892.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 42.190 1832.47 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 43.115 1793.15 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 45.008 1717.73 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 43.410 1780.95 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 76.558 1603.58 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 76.816 1598.19 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 76.927 1595.89 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 69.327 1770.83 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 69.375 1769.62 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 69.316 1771.13 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 512, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 768, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1024, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1280, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1536, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 1792, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 2048, 55, # +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 143360 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 143360, 143360, 143360, 2048, 55, \ No newline at end of file diff --git a/example/potrf/parsec2.log b/example/potrf/parsec2.log new file mode 100644 index 0000000..ffab9bb --- /dev/null +++ b/example/potrf/parsec2.log @@ -0,0 +1,78 @@ +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3584 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3584, 55, 835.914 1753.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3584 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3584, 55, 895.981 1636.23 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3584 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3584, 55, 832.869 1760.22 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 4096 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 4096, 55, 865.124 1694.59 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 4096 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 4096, 55, 908.302 1614.03 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 4096 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 4096, 55, 872.316 1680.62 diff --git a/example/potrf/parsec3.log b/example/potrf/parsec3.log new file mode 100644 index 0000000..fe13531 --- /dev/null +++ b/example/potrf/parsec3.log @@ -0,0 +1,2130 @@ +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1024, 55, 679.271 1778.33 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1024, 55, 714.810 1689.92 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1024, 55, 690.109 1750.41 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1536, 55, 644.388 1874.60 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1536, 55, 624.039 1935.73 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 1536, 55, 620.771 1945.92 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2048, 55, 618.285 1953.74 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2048, 55, 617.255 1957.01 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2048, 55, 624.166 1935.34 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2560, 55, 594.757 2031.03 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2560, 55, 596.402 2025.43 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 2560, 55, 598.407 2018.65 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 3072, 55, 614.829 1964.73 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 3072, 55, 588.144 2053.87 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 153600 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 153600, 153600, 153600, 3072, 55, 610.364 1979.10 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1024, 55, 1033.723 1418.20 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1024, 55, 1003.142 1461.44 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1024, 55, 1022.899 1433.21 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1536, 55, 942.773 1555.02 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1536, 55, 933.917 1569.76 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 1536, 55, 950.478 1542.41 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2048, 55, 910.626 1609.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2048, 55, 907.290 1615.83 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2048, 55, 925.611 1583.85 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2560, 55, 861.534 1701.65 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2560, 55, 870.727 1683.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 2560, 55, 870.799 1683.54 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3072, 55, 838.904 1747.55 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3072, 55, 835.032 1755.66 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 163840 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 163840, 163840, 163840, 3072, 55, 837.525 1750.43 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54229] *** Process received signal *** +[flamingo:54229] Signal: Segmentation fault (11) +[flamingo:54229] Signal code: Address not mapped (1) +[flamingo:54229] Failing at address: 0xb0 +[flamingo:54229] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fc6c5adb390] +[flamingo:54229] [ 1] ./potrf[0x408244] +[flamingo:54229] [ 2] ./potrf[0x406b85] +[flamingo:54229] [ 3] ./potrf[0x402abc] +[flamingo:54229] [ 4] ./potrf[0x4021c5] +[flamingo:54229] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fc6be595830] +[flamingo:54229] [ 6] ./potrf[0x4024f9] +[flamingo:54229] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54292] *** Process received signal *** +[flamingo:54292] Signal: Segmentation fault (11) +[flamingo:54292] Signal code: Address not mapped (1) +[flamingo:54292] Failing at address: 0xb0 +[flamingo:54292] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f384f939390] +[flamingo:54292] [ 1] ./potrf[0x408244] +[flamingo:54292] [ 2] ./potrf[0x406b85] +[flamingo:54292] [ 3] ./potrf[0x402abc] +[flamingo:54292] [ 4] ./potrf[0x4021c5] +[flamingo:54292] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f38483f3830] +[flamingo:54292] [ 6] ./potrf[0x4024f9] +[flamingo:54292] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54355] *** Process received signal *** +[flamingo:54355] Signal: Segmentation fault (11) +[flamingo:54355] Signal code: Address not mapped (1) +[flamingo:54355] Failing at address: 0xb0 +[flamingo:54355] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f36f39fc390] +[flamingo:54355] [ 1] ./potrf[0x408244] +[flamingo:54355] [ 2] ./potrf[0x406b85] +[flamingo:54355] [ 3] ./potrf[0x402abc] +[flamingo:54355] [ 4] ./potrf[0x4021c5] +[flamingo:54355] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f36ec4b6830] +[flamingo:54355] [ 6] ./potrf[0x4024f9] +[flamingo:54355] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54418] *** Process received signal *** +[flamingo:54418] Signal: Segmentation fault (11) +[flamingo:54418] Signal code: Address not mapped (1) +[flamingo:54418] Failing at address: 0xb0 +[flamingo:54418] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f6d7733d390] +[flamingo:54418] [ 1] ./potrf[0x408244] +[flamingo:54418] [ 2] ./potrf[0x406b85] +[flamingo:54418] [ 3] ./potrf[0x402abc] +[flamingo:54418] [ 4] ./potrf[0x4021c5] +[flamingo:54418] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6d6fdf7830] +[flamingo:54418] [ 6] ./potrf[0x4024f9] +[flamingo:54418] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54481] *** Process received signal *** +[flamingo:54481] Signal: Segmentation fault (11) +[flamingo:54481] Signal code: Address not mapped (1) +[flamingo:54481] Failing at address: 0xb0 +[flamingo:54481] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fe098e43390] +[flamingo:54481] [ 1] ./potrf[0x408244] +[flamingo:54481] [ 2] ./potrf[0x406b85] +[flamingo:54481] [ 3] ./potrf[0x402abc] +[flamingo:54481] [ 4] ./potrf[0x4021c5] +[flamingo:54481] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fe0918fd830] +[flamingo:54481] [ 6] ./potrf[0x4024f9] +[flamingo:54481] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54544] *** Process received signal *** +[flamingo:54544] Signal: Segmentation fault (11) +[flamingo:54544] Signal code: Address not mapped (1) +[flamingo:54544] Failing at address: 0xb0 +[flamingo:54544] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f62b2925390] +[flamingo:54544] [ 1] ./potrf[0x408244] +[flamingo:54544] [ 2] ./potrf[0x406b85] +[flamingo:54544] [ 3] ./potrf[0x402abc] +[flamingo:54544] [ 4] ./potrf[0x4021c5] +[flamingo:54544] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f62ab3df830] +[flamingo:54544] [ 6] ./potrf[0x4024f9] +[flamingo:54544] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54607] *** Process received signal *** +[flamingo:54607] Signal: Segmentation fault (11) +[flamingo:54607] Signal code: Address not mapped (1) +[flamingo:54607] Failing at address: 0xb0 +[flamingo:54607] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f1200fe1390] +[flamingo:54607] [ 1] ./potrf[0x408244] +[flamingo:54607] [ 2] ./potrf[0x406b85] +[flamingo:54607] [ 3] ./potrf[0x402abc] +[flamingo:54607] [ 4] ./potrf[0x4021c5] +[flamingo:54607] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f11f9a9b830] +[flamingo:54607] [ 6] ./potrf[0x4024f9] +[flamingo:54607] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54670] *** Process received signal *** +[flamingo:54670] Signal: Segmentation fault (11) +[flamingo:54670] Signal code: Address not mapped (1) +[flamingo:54670] Failing at address: 0xb0 +[flamingo:54670] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ffa0f3de390] +[flamingo:54670] [ 1] ./potrf[0x408244] +[flamingo:54670] [ 2] ./potrf[0x406b85] +[flamingo:54670] [ 3] ./potrf[0x402abc] +[flamingo:54670] [ 4] ./potrf[0x4021c5] +[flamingo:54670] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ffa07e98830] +[flamingo:54670] [ 6] ./potrf[0x4024f9] +[flamingo:54670] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54733] *** Process received signal *** +[flamingo:54733] Signal: Segmentation fault (11) +[flamingo:54733] Signal code: Address not mapped (1) +[flamingo:54733] Failing at address: 0xb0 +[flamingo:54733] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ff3b812c390] +[flamingo:54733] [ 1] ./potrf[0x408244] +[flamingo:54733] [ 2] ./potrf[0x406b85] +[flamingo:54733] [ 3] ./potrf[0x402abc] +[flamingo:54733] [ 4] ./potrf[0x4021c5] +[flamingo:54733] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ff3b0be6830] +[flamingo:54733] [ 6] ./potrf[0x4024f9] +[flamingo:54733] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54796] *** Process received signal *** +[flamingo:54796] Signal: Segmentation fault (11) +[flamingo:54796] Signal code: Address not mapped (1) +[flamingo:54796] Failing at address: 0xb0 +[flamingo:54796] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f2f259a7390] +[flamingo:54796] [ 1] ./potrf[0x408244] +[flamingo:54796] [ 2] ./potrf[0x406b85] +[flamingo:54796] [ 3] ./potrf[0x402abc] +[flamingo:54796] [ 4] ./potrf[0x4021c5] +[flamingo:54796] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f2f1e461830] +[flamingo:54796] [ 6] ./potrf[0x4024f9] +[flamingo:54796] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54859] *** Process received signal *** +[flamingo:54859] Signal: Segmentation fault (11) +[flamingo:54859] Signal code: Address not mapped (1) +[flamingo:54859] Failing at address: 0xb0 +[flamingo:54859] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f6e61aaa390] +[flamingo:54859] [ 1] ./potrf[0x408244] +[flamingo:54859] [ 2] ./potrf[0x406b85] +[flamingo:54859] [ 3] ./potrf[0x402abc] +[flamingo:54859] [ 4] ./potrf[0x4021c5] +[flamingo:54859] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6e5a564830] +[flamingo:54859] [ 6] ./potrf[0x4024f9] +[flamingo:54859] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54922] *** Process received signal *** +[flamingo:54922] Signal: Segmentation fault (11) +[flamingo:54922] Signal code: Address not mapped (1) +[flamingo:54922] Failing at address: 0xb0 +[flamingo:54922] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f3a44c17390] +[flamingo:54922] [ 1] ./potrf[0x408244] +[flamingo:54922] [ 2] ./potrf[0x406b85] +[flamingo:54922] [ 3] ./potrf[0x402abc] +[flamingo:54922] [ 4] ./potrf[0x4021c5] +[flamingo:54922] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f3a3d6d1830] +[flamingo:54922] [ 6] ./potrf[0x4024f9] +[flamingo:54922] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:54985] *** Process received signal *** +[flamingo:54985] Signal: Segmentation fault (11) +[flamingo:54985] Signal code: Address not mapped (1) +[flamingo:54985] Failing at address: 0xb0 +[flamingo:54985] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f1fd2490390] +[flamingo:54985] [ 1] ./potrf[0x408244] +[flamingo:54985] [ 2] ./potrf[0x406b85] +[flamingo:54985] [ 3] ./potrf[0x402abc] +[flamingo:54985] [ 4] ./potrf[0x4021c5] +[flamingo:54985] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f1fcaf4a830] +[flamingo:54985] [ 6] ./potrf[0x4024f9] +[flamingo:54985] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55048] *** Process received signal *** +[flamingo:55048] Signal: Segmentation fault (11) +[flamingo:55048] Signal code: Address not mapped (1) +[flamingo:55048] Failing at address: 0xb0 +[flamingo:55048] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f67bbef9390] +[flamingo:55048] [ 1] ./potrf[0x408244] +[flamingo:55048] [ 2] ./potrf[0x406b85] +[flamingo:55048] [ 3] ./potrf[0x402abc] +[flamingo:55048] [ 4] ./potrf[0x4021c5] +[flamingo:55048] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f67b49b3830] +[flamingo:55048] [ 6] ./potrf[0x4024f9] +[flamingo:55048] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 174080 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 174080, 174080, 174080, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55111] *** Process received signal *** +[flamingo:55111] Signal: Segmentation fault (11) +[flamingo:55111] Signal code: Address not mapped (1) +[flamingo:55111] Failing at address: 0xb0 +[flamingo:55111] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f3f68275390] +[flamingo:55111] [ 1] ./potrf[0x408244] +[flamingo:55111] [ 2] ./potrf[0x406b85] +[flamingo:55111] [ 3] ./potrf[0x402abc] +[flamingo:55111] [ 4] ./potrf[0x4021c5] +[flamingo:55111] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f3f60d2f830] +[flamingo:55111] [ 6] ./potrf[0x4024f9] +[flamingo:55111] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55175] *** Process received signal *** +[flamingo:55175] Signal: Segmentation fault (11) +[flamingo:55175] Signal code: Address not mapped (1) +[flamingo:55175] Failing at address: 0xb0 +[flamingo:55175] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f7a69381390] +[flamingo:55175] [ 1] ./potrf[0x408244] +[flamingo:55175] [ 2] ./potrf[0x406b85] +[flamingo:55175] [ 3] ./potrf[0x402abc] +[flamingo:55175] [ 4] ./potrf[0x4021c5] +[flamingo:55175] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f7a61e3b830] +[flamingo:55175] [ 6] ./potrf[0x4024f9] +[flamingo:55175] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55239] *** Process received signal *** +[flamingo:55239] Signal: Segmentation fault (11) +[flamingo:55239] Signal code: Address not mapped (1) +[flamingo:55239] Failing at address: 0xb0 +[flamingo:55239] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f678b666390] +[flamingo:55239] [ 1] ./potrf[0x408244] +[flamingo:55239] [ 2] ./potrf[0x406b85] +[flamingo:55239] [ 3] ./potrf[0x402abc] +[flamingo:55239] [ 4] ./potrf[0x4021c5] +[flamingo:55239] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6784120830] +[flamingo:55239] [ 6] ./potrf[0x4024f9] +[flamingo:55239] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55302] *** Process received signal *** +[flamingo:55302] Signal: Segmentation fault (11) +[flamingo:55302] Signal code: Address not mapped (1) +[flamingo:55302] Failing at address: 0xb0 +[flamingo:55302] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ff6dc15c390] +[flamingo:55302] [ 1] ./potrf[0x408244] +[flamingo:55302] [ 2] ./potrf[0x406b85] +[flamingo:55302] [ 3] ./potrf[0x402abc] +[flamingo:55302] [ 4] ./potrf[0x4021c5] +[flamingo:55302] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ff6d4c16830] +[flamingo:55302] [ 6] ./potrf[0x4024f9] +[flamingo:55302] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55365] *** Process received signal *** +[flamingo:55365] Signal: Segmentation fault (11) +[flamingo:55365] Signal code: Address not mapped (1) +[flamingo:55365] Failing at address: 0xb0 +[flamingo:55365] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f14fb378390] +[flamingo:55365] [ 1] ./potrf[0x408244] +[flamingo:55365] [ 2] ./potrf[0x406b85] +[flamingo:55365] [ 3] ./potrf[0x402abc] +[flamingo:55365] [ 4] ./potrf[0x4021c5] +[flamingo:55365] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f14f3e32830] +[flamingo:55365] [ 6] ./potrf[0x4024f9] +[flamingo:55365] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55428] *** Process received signal *** +[flamingo:55428] Signal: Segmentation fault (11) +[flamingo:55428] Signal code: Address not mapped (1) +[flamingo:55428] Failing at address: 0xb0 +[flamingo:55428] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fa2bc90f390] +[flamingo:55428] [ 1] ./potrf[0x408244] +[flamingo:55428] [ 2] ./potrf[0x406b85] +[flamingo:55428] [ 3] ./potrf[0x402abc] +[flamingo:55428] [ 4] ./potrf[0x4021c5] +[flamingo:55428] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fa2b53c9830] +[flamingo:55428] [ 6] ./potrf[0x4024f9] +[flamingo:55428] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55491] *** Process received signal *** +[flamingo:55491] Signal: Segmentation fault (11) +[flamingo:55491] Signal code: Address not mapped (1) +[flamingo:55491] Failing at address: 0xb0 +[flamingo:55491] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f37d4cb1390] +[flamingo:55491] [ 1] ./potrf[0x408244] +[flamingo:55491] [ 2] ./potrf[0x406b85] +[flamingo:55491] [ 3] ./potrf[0x402abc] +[flamingo:55491] [ 4] ./potrf[0x4021c5] +[flamingo:55491] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f37cd76b830] +[flamingo:55491] [ 6] ./potrf[0x4024f9] +[flamingo:55491] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55554] *** Process received signal *** +[flamingo:55554] Signal: Segmentation fault (11) +[flamingo:55554] Signal code: Address not mapped (1) +[flamingo:55554] Failing at address: 0xb0 +[flamingo:55554] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f326d75e390] +[flamingo:55554] [ 1] ./potrf[0x408244] +[flamingo:55554] [ 2] ./potrf[0x406b85] +[flamingo:55554] [ 3] ./potrf[0x402abc] +[flamingo:55554] [ 4] ./potrf[0x4021c5] +[flamingo:55554] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f3266218830] +[flamingo:55554] [ 6] ./potrf[0x4024f9] +[flamingo:55554] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55617] *** Process received signal *** +[flamingo:55617] Signal: Segmentation fault (11) +[flamingo:55617] Signal code: Address not mapped (1) +[flamingo:55617] Failing at address: 0xb0 +[flamingo:55617] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f61af074390] +[flamingo:55617] [ 1] ./potrf[0x408244] +[flamingo:55617] [ 2] ./potrf[0x406b85] +[flamingo:55617] [ 3] ./potrf[0x402abc] +[flamingo:55617] [ 4] ./potrf[0x4021c5] +[flamingo:55617] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f61a7b2e830] +[flamingo:55617] [ 6] ./potrf[0x4024f9] +[flamingo:55617] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55680] *** Process received signal *** +[flamingo:55680] Signal: Segmentation fault (11) +[flamingo:55680] Signal code: Address not mapped (1) +[flamingo:55680] Failing at address: 0xb0 +[flamingo:55680] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f2586955390] +[flamingo:55680] [ 1] ./potrf[0x408244] +[flamingo:55680] [ 2] ./potrf[0x406b85] +[flamingo:55680] [ 3] ./potrf[0x402abc] +[flamingo:55680] [ 4] ./potrf[0x4021c5] +[flamingo:55680] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f257f40f830] +[flamingo:55680] [ 6] ./potrf[0x4024f9] +[flamingo:55680] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55743] *** Process received signal *** +[flamingo:55743] Signal: Segmentation fault (11) +[flamingo:55743] Signal code: Address not mapped (1) +[flamingo:55743] Failing at address: 0xb0 +[flamingo:55743] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fb5c0491390] +[flamingo:55743] [ 1] ./potrf[0x408244] +[flamingo:55743] [ 2] ./potrf[0x406b85] +[flamingo:55743] [ 3] ./potrf[0x402abc] +[flamingo:55743] [ 4] ./potrf[0x4021c5] +[flamingo:55743] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fb5b8f4b830] +[flamingo:55743] [ 6] ./potrf[0x4024f9] +[flamingo:55743] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55806] *** Process received signal *** +[flamingo:55806] Signal: Segmentation fault (11) +[flamingo:55806] Signal code: Address not mapped (1) +[flamingo:55806] Failing at address: 0xb0 +[flamingo:55806] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f6daaf4a390] +[flamingo:55806] [ 1] ./potrf[0x408244] +[flamingo:55806] [ 2] ./potrf[0x406b85] +[flamingo:55806] [ 3] ./potrf[0x402abc] +[flamingo:55806] [ 4] ./potrf[0x4021c5] +[flamingo:55806] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6da3a04830] +[flamingo:55806] [ 6] ./potrf[0x4024f9] +[flamingo:55806] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55869] *** Process received signal *** +[flamingo:55869] Signal: Segmentation fault (11) +[flamingo:55869] Signal code: Address not mapped (1) +[flamingo:55869] Failing at address: 0xb0 +[flamingo:55869] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fcbc8cc1390] +[flamingo:55869] [ 1] ./potrf[0x408244] +[flamingo:55869] [ 2] ./potrf[0x406b85] +[flamingo:55869] [ 3] ./potrf[0x402abc] +[flamingo:55869] [ 4] ./potrf[0x4021c5] +[flamingo:55869] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fcbc177b830] +[flamingo:55869] [ 6] ./potrf[0x4024f9] +[flamingo:55869] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55932] *** Process received signal *** +[flamingo:55932] Signal: Segmentation fault (11) +[flamingo:55932] Signal code: Address not mapped (1) +[flamingo:55932] Failing at address: 0xb0 +[flamingo:55932] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f2be0e87390] +[flamingo:55932] [ 1] ./potrf[0x408244] +[flamingo:55932] [ 2] ./potrf[0x406b85] +[flamingo:55932] [ 3] ./potrf[0x402abc] +[flamingo:55932] [ 4] ./potrf[0x4021c5] +[flamingo:55932] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f2bd9941830] +[flamingo:55932] [ 6] ./potrf[0x4024f9] +[flamingo:55932] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:55995] *** Process received signal *** +[flamingo:55995] Signal: Segmentation fault (11) +[flamingo:55995] Signal code: Address not mapped (1) +[flamingo:55995] Failing at address: 0xb0 +[flamingo:55995] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f3ab1935390] +[flamingo:55995] [ 1] ./potrf[0x408244] +[flamingo:55995] [ 2] ./potrf[0x406b85] +[flamingo:55995] [ 3] ./potrf[0x402abc] +[flamingo:55995] [ 4] ./potrf[0x4021c5] +[flamingo:55995] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f3aaa3ef830] +[flamingo:55995] [ 6] ./potrf[0x4024f9] +[flamingo:55995] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 184320 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 184320, 184320, 184320, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56058] *** Process received signal *** +[flamingo:56058] Signal: Segmentation fault (11) +[flamingo:56058] Signal code: Address not mapped (1) +[flamingo:56058] Failing at address: 0xb0 +[flamingo:56058] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f59b23a9390] +[flamingo:56058] [ 1] ./potrf[0x408244] +[flamingo:56058] [ 2] ./potrf[0x406b85] +[flamingo:56058] [ 3] ./potrf[0x402abc] +[flamingo:56058] [ 4] ./potrf[0x4021c5] +[flamingo:56058] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f59aae63830] +[flamingo:56058] [ 6] ./potrf[0x4024f9] +[flamingo:56058] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56121] *** Process received signal *** +[flamingo:56121] Signal: Segmentation fault (11) +[flamingo:56121] Signal code: Address not mapped (1) +[flamingo:56121] Failing at address: 0xb0 +[flamingo:56121] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f6745297390] +[flamingo:56121] [ 1] ./potrf[0x408244] +[flamingo:56121] [ 2] ./potrf[0x406b85] +[flamingo:56121] [ 3] ./potrf[0x402abc] +[flamingo:56121] [ 4] ./potrf[0x4021c5] +[flamingo:56121] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f673dd51830] +[flamingo:56121] [ 6] ./potrf[0x4024f9] +[flamingo:56121] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56184] *** Process received signal *** +[flamingo:56184] Signal: Segmentation fault (11) +[flamingo:56184] Signal code: Address not mapped (1) +[flamingo:56184] Failing at address: 0xb0 +[flamingo:56184] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7feec9458390] +[flamingo:56184] [ 1] ./potrf[0x408244] +[flamingo:56184] [ 2] ./potrf[0x406b85] +[flamingo:56184] [ 3] ./potrf[0x402abc] +[flamingo:56184] [ 4] ./potrf[0x4021c5] +[flamingo:56184] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7feec1f12830] +[flamingo:56184] [ 6] ./potrf[0x4024f9] +[flamingo:56184] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56247] *** Process received signal *** +[flamingo:56247] Signal: Segmentation fault (11) +[flamingo:56247] Signal code: Address not mapped (1) +[flamingo:56247] Failing at address: 0xb0 +[flamingo:56247] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f530aae1390] +[flamingo:56247] [ 1] ./potrf[0x408244] +[flamingo:56247] [ 2] ./potrf[0x406b85] +[flamingo:56247] [ 3] ./potrf[0x402abc] +[flamingo:56247] [ 4] ./potrf[0x4021c5] +[flamingo:56247] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f530359b830] +[flamingo:56247] [ 6] ./potrf[0x4024f9] +[flamingo:56247] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56310] *** Process received signal *** +[flamingo:56310] Signal: Segmentation fault (11) +[flamingo:56310] Signal code: Address not mapped (1) +[flamingo:56310] Failing at address: 0xb0 +[flamingo:56310] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f57e2b64390] +[flamingo:56310] [ 1] ./potrf[0x408244] +[flamingo:56310] [ 2] ./potrf[0x406b85] +[flamingo:56310] [ 3] ./potrf[0x402abc] +[flamingo:56310] [ 4] ./potrf[0x4021c5] +[flamingo:56310] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f57db61e830] +[flamingo:56310] [ 6] ./potrf[0x4024f9] +[flamingo:56310] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56373] *** Process received signal *** +[flamingo:56373] Signal: Segmentation fault (11) +[flamingo:56373] Signal code: Address not mapped (1) +[flamingo:56373] Failing at address: 0xb0 +[flamingo:56373] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f6a1ce6c390] +[flamingo:56373] [ 1] ./potrf[0x408244] +[flamingo:56373] [ 2] ./potrf[0x406b85] +[flamingo:56373] [ 3] ./potrf[0x402abc] +[flamingo:56373] [ 4] ./potrf[0x4021c5] +[flamingo:56373] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f6a15926830] +[flamingo:56373] [ 6] ./potrf[0x4024f9] +[flamingo:56373] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56436] *** Process received signal *** +[flamingo:56436] Signal: Segmentation fault (11) +[flamingo:56436] Signal code: Address not mapped (1) +[flamingo:56436] Failing at address: 0xb0 +[flamingo:56436] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7ff303372390] +[flamingo:56436] [ 1] ./potrf[0x408244] +[flamingo:56436] [ 2] ./potrf[0x406b85] +[flamingo:56436] [ 3] ./potrf[0x402abc] +[flamingo:56436] [ 4] ./potrf[0x4021c5] +[flamingo:56436] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7ff2fbe2c830] +[flamingo:56436] [ 6] ./potrf[0x4024f9] +[flamingo:56436] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56499] *** Process received signal *** +[flamingo:56499] Signal: Segmentation fault (11) +[flamingo:56499] Signal code: Address not mapped (1) +[flamingo:56499] Failing at address: 0xb0 +[flamingo:56499] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f3faa5cb390] +[flamingo:56499] [ 1] ./potrf[0x408244] +[flamingo:56499] [ 2] ./potrf[0x406b85] +[flamingo:56499] [ 3] ./potrf[0x402abc] +[flamingo:56499] [ 4] ./potrf[0x4021c5] +[flamingo:56499] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f3fa3085830] +[flamingo:56499] [ 6] ./potrf[0x4024f9] +[flamingo:56499] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56562] *** Process received signal *** +[flamingo:56562] Signal: Segmentation fault (11) +[flamingo:56562] Signal code: Address not mapped (1) +[flamingo:56562] Failing at address: 0xb0 +[flamingo:56562] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fdf59345390] +[flamingo:56562] [ 1] ./potrf[0x408244] +[flamingo:56562] [ 2] ./potrf[0x406b85] +[flamingo:56562] [ 3] ./potrf[0x402abc] +[flamingo:56562] [ 4] ./potrf[0x4021c5] +[flamingo:56562] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fdf51dff830] +[flamingo:56562] [ 6] ./potrf[0x4024f9] +[flamingo:56562] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56625] *** Process received signal *** +[flamingo:56625] Signal: Segmentation fault (11) +[flamingo:56625] Signal code: Address not mapped (1) +[flamingo:56625] Failing at address: 0xb0 +[flamingo:56625] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f74fe8be390] +[flamingo:56625] [ 1] ./potrf[0x408244] +[flamingo:56625] [ 2] ./potrf[0x406b85] +[flamingo:56625] [ 3] ./potrf[0x402abc] +[flamingo:56625] [ 4] ./potrf[0x4021c5] +[flamingo:56625] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f74f7378830] +[flamingo:56625] [ 6] ./potrf[0x4024f9] +[flamingo:56625] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56688] *** Process received signal *** +[flamingo:56688] Signal: Segmentation fault (11) +[flamingo:56688] Signal code: Address not mapped (1) +[flamingo:56688] Failing at address: 0xb0 +[flamingo:56688] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f7348abb390] +[flamingo:56688] [ 1] ./potrf[0x408244] +[flamingo:56688] [ 2] ./potrf[0x406b85] +[flamingo:56688] [ 3] ./potrf[0x402abc] +[flamingo:56688] [ 4] ./potrf[0x4021c5] +[flamingo:56688] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f7341575830] +[flamingo:56688] [ 6] ./potrf[0x4024f9] +[flamingo:56688] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56751] *** Process received signal *** +[flamingo:56751] Signal: Segmentation fault (11) +[flamingo:56751] Signal code: Address not mapped (1) +[flamingo:56751] Failing at address: 0xb0 +[flamingo:56751] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f1f6047a390] +[flamingo:56751] [ 1] ./potrf[0x408244] +[flamingo:56751] [ 2] ./potrf[0x406b85] +[flamingo:56751] [ 3] ./potrf[0x402abc] +[flamingo:56751] [ 4] ./potrf[0x4021c5] +[flamingo:56751] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f1f58f34830] +[flamingo:56751] [ 6] ./potrf[0x4024f9] +[flamingo:56751] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56814] *** Process received signal *** +[flamingo:56814] Signal: Segmentation fault (11) +[flamingo:56814] Signal code: Address not mapped (1) +[flamingo:56814] Failing at address: 0xb0 +[flamingo:56814] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f4f7e60f390] +[flamingo:56814] [ 1] ./potrf[0x408244] +[flamingo:56814] [ 2] ./potrf[0x406b85] +[flamingo:56814] [ 3] ./potrf[0x402abc] +[flamingo:56814] [ 4] ./potrf[0x4021c5] +[flamingo:56814] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f4f770c9830] +[flamingo:56814] [ 6] ./potrf[0x4024f9] +[flamingo:56814] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56877] *** Process received signal *** +[flamingo:56877] Signal: Segmentation fault (11) +[flamingo:56877] Signal code: Address not mapped (1) +[flamingo:56877] Failing at address: 0xb0 +[flamingo:56877] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f1e9ec45390] +[flamingo:56877] [ 1] ./potrf[0x408244] +[flamingo:56877] [ 2] ./potrf[0x406b85] +[flamingo:56877] [ 3] ./potrf[0x402abc] +[flamingo:56877] [ 4] ./potrf[0x4021c5] +[flamingo:56877] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f1e976ff830] +[flamingo:56877] [ 6] ./potrf[0x4024f9] +[flamingo:56877] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:56940] *** Process received signal *** +[flamingo:56940] Signal: Segmentation fault (11) +[flamingo:56940] Signal code: Address not mapped (1) +[flamingo:56940] Failing at address: 0xb0 +[flamingo:56940] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f10f13fe390] +[flamingo:56940] [ 1] ./potrf[0x408244] +[flamingo:56940] [ 2] ./potrf[0x406b85] +[flamingo:56940] [ 3] ./potrf[0x402abc] +[flamingo:56940] [ 4] ./potrf[0x4021c5] +[flamingo:56940] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f10e9eb8830] +[flamingo:56940] [ 6] ./potrf[0x4024f9] +[flamingo:56940] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 194560 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 194560, 194560, 194560, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57003] *** Process received signal *** +[flamingo:57003] Signal: Segmentation fault (11) +[flamingo:57003] Signal code: Address not mapped (1) +[flamingo:57003] Failing at address: 0xb0 +[flamingo:57003] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f53412e0390] +[flamingo:57003] [ 1] ./potrf[0x408244] +[flamingo:57003] [ 2] ./potrf[0x406b85] +[flamingo:57003] [ 3] ./potrf[0x402abc] +[flamingo:57003] [ 4] ./potrf[0x4021c5] +[flamingo:57003] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f5339d9a830] +[flamingo:57003] [ 6] ./potrf[0x4024f9] +[flamingo:57003] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57066] *** Process received signal *** +[flamingo:57066] Signal: Segmentation fault (11) +[flamingo:57066] Signal code: Address not mapped (1) +[flamingo:57066] Failing at address: 0xb0 +[flamingo:57066] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fa281769390] +[flamingo:57066] [ 1] ./potrf[0x408244] +[flamingo:57066] [ 2] ./potrf[0x406b85] +[flamingo:57066] [ 3] ./potrf[0x402abc] +[flamingo:57066] [ 4] ./potrf[0x4021c5] +[flamingo:57066] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fa27a223830] +[flamingo:57066] [ 6] ./potrf[0x4024f9] +[flamingo:57066] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57129] *** Process received signal *** +[flamingo:57129] Signal: Segmentation fault (11) +[flamingo:57129] Signal code: Address not mapped (1) +[flamingo:57129] Failing at address: 0xb0 +[flamingo:57129] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f0261577390] +[flamingo:57129] [ 1] ./potrf[0x408244] +[flamingo:57129] [ 2] ./potrf[0x406b85] +[flamingo:57129] [ 3] ./potrf[0x402abc] +[flamingo:57129] [ 4] ./potrf[0x4021c5] +[flamingo:57129] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f025a031830] +[flamingo:57129] [ 6] ./potrf[0x4024f9] +[flamingo:57129] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1024, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57192] *** Process received signal *** +[flamingo:57192] Signal: Segmentation fault (11) +[flamingo:57192] Signal code: Address not mapped (1) +[flamingo:57192] Failing at address: 0xb0 +[flamingo:57192] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f96841b9390] +[flamingo:57192] [ 1] ./potrf[0x408244] +[flamingo:57192] [ 2] ./potrf[0x406b85] +[flamingo:57192] [ 3] ./potrf[0x402abc] +[flamingo:57192] [ 4] ./potrf[0x4021c5] +[flamingo:57192] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f967cc73830] +[flamingo:57192] [ 6] ./potrf[0x4024f9] +[flamingo:57192] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57255] *** Process received signal *** +[flamingo:57255] Signal: Segmentation fault (11) +[flamingo:57255] Signal code: Address not mapped (1) +[flamingo:57255] Failing at address: 0xb0 +[flamingo:57255] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f265faa7390] +[flamingo:57255] [ 1] ./potrf[0x408244] +[flamingo:57255] [ 2] ./potrf[0x406b85] +[flamingo:57255] [ 3] ./potrf[0x402abc] +[flamingo:57255] [ 4] ./potrf[0x4021c5] +[flamingo:57255] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f2658561830] +[flamingo:57255] [ 6] ./potrf[0x4024f9] +[flamingo:57255] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57318] *** Process received signal *** +[flamingo:57318] Signal: Segmentation fault (11) +[flamingo:57318] Signal code: Address not mapped (1) +[flamingo:57318] Failing at address: 0xb0 +[flamingo:57318] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fb4fe63e390] +[flamingo:57318] [ 1] ./potrf[0x408244] +[flamingo:57318] [ 2] ./potrf[0x406b85] +[flamingo:57318] [ 3] ./potrf[0x402abc] +[flamingo:57318] [ 4] ./potrf[0x4021c5] +[flamingo:57318] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fb4f70f8830] +[flamingo:57318] [ 6] ./potrf[0x4024f9] +[flamingo:57318] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 1536, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57381] *** Process received signal *** +[flamingo:57381] Signal: Segmentation fault (11) +[flamingo:57381] Signal code: Address not mapped (1) +[flamingo:57381] Failing at address: 0xb0 +[flamingo:57381] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f77cc345390] +[flamingo:57381] [ 1] ./potrf[0x408244] +[flamingo:57381] [ 2] ./potrf[0x406b85] +[flamingo:57381] [ 3] ./potrf[0x402abc] +[flamingo:57381] [ 4] ./potrf[0x4021c5] +[flamingo:57381] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f77c4dff830] +[flamingo:57381] [ 6] ./potrf[0x4024f9] +[flamingo:57381] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57444] *** Process received signal *** +[flamingo:57444] Signal: Segmentation fault (11) +[flamingo:57444] Signal code: Address not mapped (1) +[flamingo:57444] Failing at address: 0xb0 +[flamingo:57444] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f80af27b390] +[flamingo:57444] [ 1] ./potrf[0x408244] +[flamingo:57444] [ 2] ./potrf[0x406b85] +[flamingo:57444] [ 3] ./potrf[0x402abc] +[flamingo:57444] [ 4] ./potrf[0x4021c5] +[flamingo:57444] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f80a7d35830] +[flamingo:57444] [ 6] ./potrf[0x4024f9] +[flamingo:57444] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57507] *** Process received signal *** +[flamingo:57507] Signal: Segmentation fault (11) +[flamingo:57507] Signal code: Address not mapped (1) +[flamingo:57507] Failing at address: 0xb0 +[flamingo:57507] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f9c0d14e390] +[flamingo:57507] [ 1] ./potrf[0x408244] +[flamingo:57507] [ 2] ./potrf[0x406b85] +[flamingo:57507] [ 3] ./potrf[0x402abc] +[flamingo:57507] [ 4] ./potrf[0x4021c5] +[flamingo:57507] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f9c05c08830] +[flamingo:57507] [ 6] ./potrf[0x4024f9] +[flamingo:57507] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2048, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57570] *** Process received signal *** +[flamingo:57570] Signal: Segmentation fault (11) +[flamingo:57570] Signal code: Address not mapped (1) +[flamingo:57570] Failing at address: 0xb0 +[flamingo:57570] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fee22eae390] +[flamingo:57570] [ 1] ./potrf[0x408244] +[flamingo:57570] [ 2] ./potrf[0x406b85] +[flamingo:57570] [ 3] ./potrf[0x402abc] +[flamingo:57570] [ 4] ./potrf[0x4021c5] +[flamingo:57570] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fee1b968830] +[flamingo:57570] [ 6] ./potrf[0x4024f9] +[flamingo:57570] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57633] *** Process received signal *** +[flamingo:57633] Signal: Segmentation fault (11) +[flamingo:57633] Signal code: Address not mapped (1) +[flamingo:57633] Failing at address: 0xb0 +[flamingo:57633] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f57b92f1390] +[flamingo:57633] [ 1] ./potrf[0x408244] +[flamingo:57633] [ 2] ./potrf[0x406b85] +[flamingo:57633] [ 3] ./potrf[0x402abc] +[flamingo:57633] [ 4] ./potrf[0x4021c5] +[flamingo:57633] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f57b1dab830] +[flamingo:57633] [ 6] ./potrf[0x4024f9] +[flamingo:57633] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57696] *** Process received signal *** +[flamingo:57696] Signal: Segmentation fault (11) +[flamingo:57696] Signal code: Address not mapped (1) +[flamingo:57696] Failing at address: 0xb0 +[flamingo:57696] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fddefcbe390] +[flamingo:57696] [ 1] ./potrf[0x408244] +[flamingo:57696] [ 2] ./potrf[0x406b85] +[flamingo:57696] [ 3] ./potrf[0x402abc] +[flamingo:57696] [ 4] ./potrf[0x4021c5] +[flamingo:57696] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fdde8778830] +[flamingo:57696] [ 6] ./potrf[0x4024f9] +[flamingo:57696] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 2560 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 2560, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57759] *** Process received signal *** +[flamingo:57759] Signal: Segmentation fault (11) +[flamingo:57759] Signal code: Address not mapped (1) +[flamingo:57759] Failing at address: 0xb0 +[flamingo:57759] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7fdfbfe5c390] +[flamingo:57759] [ 1] ./potrf[0x408244] +[flamingo:57759] [ 2] ./potrf[0x406b85] +[flamingo:57759] [ 3] ./potrf[0x402abc] +[flamingo:57759] [ 4] ./potrf[0x4021c5] +[flamingo:57759] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7fdfb8916830] +[flamingo:57759] [ 6] ./potrf[0x4024f9] +[flamingo:57759] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57822] *** Process received signal *** +[flamingo:57822] Signal: Segmentation fault (11) +[flamingo:57822] Signal code: Address not mapped (1) +[flamingo:57822] Failing at address: 0xb0 +[flamingo:57822] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f516cd25390] +[flamingo:57822] [ 1] ./potrf[0x408244] +[flamingo:57822] [ 2] ./potrf[0x406b85] +[flamingo:57822] [ 3] ./potrf[0x402abc] +[flamingo:57822] [ 4] ./potrf[0x4021c5] +[flamingo:57822] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f51657df830] +[flamingo:57822] [ 6] ./potrf[0x4024f9] +[flamingo:57822] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57886] *** Process received signal *** +[flamingo:57886] Signal: Segmentation fault (11) +[flamingo:57886] Signal code: Address not mapped (1) +[flamingo:57886] Failing at address: 0xb0 +[flamingo:57886] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f26a68cd390] +[flamingo:57886] [ 1] ./potrf[0x408244] +[flamingo:57886] [ 2] ./potrf[0x406b85] +[flamingo:57886] [ 3] ./potrf[0x402abc] +[flamingo:57886] [ 4] ./potrf[0x4021c5] +[flamingo:57886] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f269f387830] +[flamingo:57886] [ 6] ./potrf[0x4024f9] +[flamingo:57886] *** End of error message *** +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 204800 +# NB: 3072 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 204800, 204800, 204800, 3072, 55, AL4SAN ERROR: al4san_desc_mat_alloc(): malloc() failed +AL4SAN ERROR: al4san_desc_check(): NULL matrix pointer +AL4SAN ERROR: AL4SAN_Desc_Create_User(): invalid descriptor +AL4SAN ERROR: al4san_desc_check(): NULL descriptor +AL4SAN ERROR: AL4SAN_dplgsy_Tile(): invalid descriptor +[flamingo:57949] *** Process received signal *** +[flamingo:57949] Signal: Segmentation fault (11) +[flamingo:57949] Signal code: Address not mapped (1) +[flamingo:57949] Failing at address: 0xb0 +[flamingo:57949] [ 0] /lib/x86_64-linux-gnu/libpthread.so.0(+0x11390)[0x7f41137a5390] +[flamingo:57949] [ 1] ./potrf[0x408244] +[flamingo:57949] [ 2] ./potrf[0x406b85] +[flamingo:57949] [ 3] ./potrf[0x402abc] +[flamingo:57949] [ 4] ./potrf[0x4021c5] +[flamingo:57949] [ 5] /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xf0)[0x7f410c25f830] +[flamingo:57949] [ 6] ./potrf[0x4024f9] +[flamingo:57949] *** End of error message *** diff --git a/example/potrf/parsec_init.log b/example/potrf/parsec_init.log new file mode 100644 index 0000000..25ab535 --- /dev/null +++ b/example/potrf/parsec_init.log @@ -0,0 +1,1859 @@ +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.520 1883.52 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.496 1913.50 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 256, 55, 1.482 1931.93 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.948 1469.72 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.925 1487.70 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 512, 55, 1.897 1509.11 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.853 1545.67 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.754 1632.96 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 768, 55, 1.758 1628.62 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.105 1360.18 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.051 1396.06 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 20480 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 20480, 20480, 20480, 1024, 55, 2.050 1396.97 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.105 1893.26 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.109 1891.72 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 256, 55, 5.169 1869.50 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.422 1504.92 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.322 1528.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 512, 55, 6.355 1520.67 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.653 1709.61 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.620 1719.49 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 768, 55, 5.654 1709.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 5.899 1638.32 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 5.917 1633.17 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 30720 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 30720, 30720, 30720, 1024, 55, 6.115 1580.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.233 1872.52 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.231 1872.93 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 256 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 256, 55, 12.237 1871.95 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.707 1557.61 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.614 1567.44 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 512, 55, 14.583 1570.88 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 13.353 1715.48 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 12.898 1776.01 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 768, 55, 12.998 1762.34 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.439 1704.54 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.488 1698.39 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 40960 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 40960, 40960, 40960, 1024, 55, 13.391 1710.66 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 28.292 1581.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 28.170 1588.26 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 512, 55, 28.114 1591.43 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.097 1782.72 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.626 1745.92 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 768, 55, 25.158 1778.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 26.045 1717.84 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 25.395 1761.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1024, 55, 26.310 1700.51 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.588 1819.61 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.495 1826.53 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1280, 55, 24.736 1808.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 24.177 1850.51 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 24.541 1823.13 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1536, 55, 25.058 1785.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 24.862 1799.55 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 25.279 1769.84 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 1792, 55, 24.476 1827.94 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 27.040 1654.61 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 27.092 1651.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 51200 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 51200, 51200, 51200, 2048, 55, 27.037 1654.79 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 48.078 1608.04 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 48.161 1605.26 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 512, 55, 47.997 1610.76 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 43.187 1790.15 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 42.864 1803.64 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 768, 55, 42.953 1799.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 44.498 1737.42 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 44.797 1725.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1024, 55, 44.448 1739.35 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 41.662 1855.67 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 42.121 1835.47 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1280, 55, 42.130 1835.06 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 40.746 1897.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 40.560 1906.09 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1536, 55, 41.142 1879.11 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 41.237 1874.79 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 40.837 1893.18 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 1792, 55, 40.286 1919.07 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 44.008 1756.77 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 43.671 1770.30 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 61440 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 61440, 61440, 61440, 2048, 55, 42.901 1802.08 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 76.167 1611.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 76.124 1612.73 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 512, 55, 75.946 1616.51 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 67.888 1808.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 68.398 1794.90 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 768, 55, 67.706 1813.23 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, 71.524 1716.45 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, 70.178 1749.36 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1024, 55, 70.468 1742.16 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, 67.186 1827.27 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, 67.091 1829.86 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1280, 55, 66.849 1836.49 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, 63.155 1943.91 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, 64.179 1912.89 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1536, 55, 64.210 1911.96 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, 63.808 1924.01 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, 63.513 1932.96 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 1792, 55, 64.049 1916.76 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, 66.885 1835.50 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, 65.639 1870.33 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 71680 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 71680, 71680, 71680, 2048, 55, 65.519 1873.76 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, 112.664 1626.56 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, 112.969 1622.18 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 512, 55, 112.565 1627.99 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, 102.265 1791.96 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, 101.512 1805.26 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 768, 55, 100.433 1824.65 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, 103.541 1769.88 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, 104.010 1761.90 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1024, 55, 103.322 1773.63 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, 99.731 1837.49 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, 99.145 1848.36 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1280, 55, 98.823 1854.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, 96.162 1905.70 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, 96.258 1903.80 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1536, 55, 95.053 1927.93 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, 93.763 1954.45 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, 95.114 1926.69 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 1792, 55, 95.591 1917.07 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, 98.702 1856.64 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, 96.816 1892.81 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 81920 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 81920, 81920, 81920, 2048, 55, 98.613 1858.33 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, 219.396 1631.38 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, 218.360 1639.12 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 512, 55, 218.456 1638.40 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, 195.930 1826.77 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, 195.296 1832.70 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 768 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 768, 55, 198.101 1806.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, 203.577 1758.15 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, 201.278 1778.23 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1024 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1024, 55, 202.620 1766.46 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, 193.023 1854.28 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, 192.895 1855.51 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1280 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1280, 55, 197.082 1816.09 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, 190.021 1883.58 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, 186.503 1919.11 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1536 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1536, 55, 186.138 1922.87 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, 184.126 1943.88 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, 182.916 1956.75 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 1792 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 1792, 55, 184.925 1935.48 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, 188.708 1896.68 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, 186.480 1919.35 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 102400 +# NB: 2048 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 102400, 102400, 102400, 2048, 55, 189.379 1889.97 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 512, 55, 379.168 1631.16 +# +# AL4SAN 1.0.0, ./potrf +# Nb threads: 55 +# Nb gpus: 0 +# Nb Prows: 1 +# Nb Qcols: 1 +# N: 122880 +# NB: 512 +# eps: 1.110223e-16 +# Activated runtime: Parsec +# +# M N K/NRHS NB Nb threads seconds Gflop/s +F# 122880, 122880, 122880, 512, 55, \ No newline at end of file diff --git a/example/potrf/potrf.c b/example/potrf/potrf.c index fafcb1f..c2afd7a 100644 --- a/example/potrf/potrf.c +++ b/example/potrf/potrf.c @@ -28,10 +28,6 @@ #include #include #include -#include -#include -#include -#include #include #include "potrf.h" @@ -55,7 +51,7 @@ float get_time(); /* Cholesky factorization: * A is replaced by its factorization L or L^T depending on uplo */ -int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) +int AL4SAN_cholesky(al4san_uplo_t uplo, AL4SAN_desc_t *A) { /* @@ -66,7 +62,7 @@ int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) AL4SAN_context_t *al4sanctxt; AL4SAN_sequence_t *sequence = NULL; - AL4SAN_request_t* request = RUNTIME_REQUEST_INITIALIZER; + AL4SAN_request_t* request = AL4SAN_REQUEST_INITIALIZER; AL4SAN_option_t options; int status; @@ -97,9 +93,9 @@ int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) ldak = BLKLDD(A, k); options.priority = 2*A->nt - 2*k; - INSERT_Task_dpotrf( + if(1)Task_dpotrf( &options, - ChamUpper, + Al4sanUpper, tempkm, A->mb, A(k, k), ldak, A->nb*k); @@ -107,9 +103,9 @@ int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; options.priority = 2*A->nt - 2*k - n; - INSERT_Task_dtrsm( + if(1)Task_dtrsm( &options, - ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit, + Al4sanLeft, Al4sanUpper, Al4sanConjTrans, Al4sanNonUnit, A->mb, tempnn, A->mb, zone, A(k, k), ldak, A(k, n), ldak); @@ -121,9 +117,9 @@ int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) ldam = BLKLDD(A, m); options.priority = 2*A->nt - 2*k - m; - INSERT_Task_dsyrk( + if(1)Task_dsyrk( &options, - ChamUpper, ChamConjTrans, + Al4sanUpper, Al4sanConjTrans, tempmm, A->mb, A->mb, -1.0, A(k, m), ldak, 1.0, A(m, m), ldam); @@ -132,9 +128,9 @@ int AL4SAN_cholesky(cham_uplo_t uplo, AL4SAN_desc_t *A) tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; options.priority = 2*A->nt - 2*k - n - m; - INSERT_Task_dgemm( + if(1)Task_dgemm( &options, - ChamConjTrans, ChamNoTrans, + Al4sanTrans, Al4sanNoTrans, tempmm, tempnn, A->mb, A->mb, mzone, A(k, m), ldak, A(k, n), ldak, @@ -170,11 +166,11 @@ int main(int argc, char* argv[]){ int NRHS; // number of RHS vectors int NCPU; // number of cores to use int NGPU; // number of gpus (cuda devices) to use - int UPLO = ChamUpper; // where is stored L + int UPLO = Al4sanUpper; // where is stored L /* descriptors necessary for calling AL4SAN data descriptor interface */ AL4SAN_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL; /* declarations to time the program and evaluate performances */ - double fmuls, fadds, flops, gflops, cpu_time; + double fmuls, fadds, flops, gflops, cpu_time= 0.0; /* variable to check the numerical results */ double anorm, bnorm, xnorm, eps, res; @@ -191,8 +187,9 @@ int main(int argc, char* argv[]){ NRHS = iparam[IPARAM_NRHS]; NB = iparam[IPARAM_NB]; /* compute the algorithm complexity to evaluate performances */ - fadds = (double)( FADDS_POTRF(N) + 2 * FADDS_TRSM(N,NRHS) ); - fmuls = (double)( FMULS_POTRF(N) + 2 * FMULS_TRSM(N,NRHS) ); + fadds = (double)( FADDS_POTRF(N)); + fmuls = (double)( FMULS_POTRF(N)); + flops = 1e-9 * (fmuls + fadds); /* initialize the number of thread if not given by the user in argv @@ -204,11 +201,18 @@ int main(int argc, char* argv[]){ NCPU = iparam[IPARAM_THRDNBR]; NGPU = iparam[IPARAM_GPUS]; - /* print informations to user */ - print_header( argv[0], iparam); /* Initialize AL4SAN with main parameters */ - AL4SAN_context_t *al4san = AL4SAN_Init("Starpu", NCPU, NGPU); + AL4SAN_context_t *al4san = AL4SAN_Init(runtime, NCPU, NGPU); + +#if defined(AL4SAN_USE_MPI) + al4san->prows=iparam[IPARAM_P]; + al4san->pcols=iparam[IPARAM_Q]; + AL4SAN_Init_Processor_Grid(al4san->prows, al4san->pcols); +#endif + + /* print informations to user */ + print_header( argv[0], iparam); /* * Allocate memory for our data using a C macro (see step2.h) @@ -237,12 +241,12 @@ int main(int argc, char* argv[]){ * AL4SAN_Desc_Create( ... , 0, 0, number of rows, number of columns, 1, 1); * Have a look to the documentation for details about these parameters. */ + AL4SAN_Matrix_Create(&descA, NULL, Al4sanRealDouble, AL4SAN_Col_Major, NB, NB, NB, N, N, N); + /* generate A matrix with random values such that it is spd*/ - /* generate A matrix with random values such that it is spd using chameleon lib*/ - int rc = CHAMELEON_Init( NCPU, NGPU ); - CHAMELEON_dplgsy_Tile( (double)N, ChamUpperLower, (CHAM_desc_t*) descA, 51 ); + dplgsy_Tile( (double)N, Al4sanUpperLower, descA, 51 ); cpu_time = -AL4SAN_timer(); /* Cholesky factorization: @@ -254,15 +258,17 @@ int main(int argc, char* argv[]){ /* print informations to user */ gflops = flops / cpu_time; + + if(AL4SAN_My_Mpi_Rank()==0){ printf( "%9.3f %9.2f\n", cpu_time, gflops); fflush( stdout ); - + } /* deallocate A and associated descriptors descA, ... */ AL4SAN_Desc_Destroy( &descA ); /* Finalize AL4SAN */ AL4SAN_Finalize(); - + return 0; } diff --git a/example/potrf/potrf.h b/example/potrf/potrf.h index 570f851..8445c0d 100644 --- a/example/potrf/potrf.h +++ b/example/potrf/potrf.h @@ -35,18 +35,12 @@ #include #include #include -#include "runtime/al4san_quark.h" -#include "runtime/al4san_starpu.h" -#include "runtime/al4san_parsec.h" +#include "runtime/al4san_runtime.h" #include "control/al4san_descriptor.h" -#include -#include -#include -#include -#include +//#include +#include #include #include - #if defined( _WIN32 ) || defined( _WIN64 ) #include #else /* Non-Windows */ @@ -69,6 +63,53 @@ static void get_thread_count(int *thrdnbr) { #define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) #define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + +/* + Cholesky Task Headers +*/ + +void Task_dpotrf( const AL4SAN_option_t *options, + al4san_uplo_t uplo, int n, int nb, + const AL4SAN_desc_t *A, int Am, int An, int lda, + int iinfo ); +void Task_dgemm( const AL4SAN_option_t *options, + al4san_trans_t transA, al4san_trans_t transB, + int m, int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ); +void TASK_dplgsy( const AL4SAN_option_t *options, + double bump, int m, int n, AL4SAN_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); +void Task_dsyrk( const AL4SAN_option_t *options, + al4san_uplo_t uplo, al4san_trans_t trans, + int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ); +void Task_dtrsm( const AL4SAN_option_t *options, + al4san_side_t side, al4san_uplo_t uplo, al4san_trans_t transA, al4san_diag_t diag, + int m, int n, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb ); +void CORE_dplgsy( double bump, int m, int n, double *A, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); + +int dplgsy_Tile( double bump, al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed ); + +int dplgsy_Tile_Async( double bump, + al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, + AL4SAN_request_t *request ); + +void pdplgsy( double bump, al4san_uplo_t uplo, AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, AL4SAN_request_t *request ); + +char runtime[20]; /* Integer parameters for step2 */ enum iparam_step2 { IPARAM_THRDNBR, /* Number of cores */ @@ -76,6 +117,8 @@ enum iparam_step2 { IPARAM_N, /* Number of columns of the matrix */ IPARAM_NRHS, /* Number of RHS */ IPARAM_NB, /* Number of NB */ + IPARAM_P, /* Number of P */ + IPARAM_Q, /* Number of Q */ /* End */ IPARAM_SIZEOF }; @@ -91,6 +134,9 @@ static void init_iparam(int iparam[IPARAM_SIZEOF]){ iparam[IPARAM_N ] = 500; iparam[IPARAM_NRHS ] = 1; iparam[IPARAM_NB ] = 10; + iparam[IPARAM_P ] = 1; + iparam[IPARAM_Q ] = 1; + } /** @@ -137,6 +183,12 @@ static void read_args(int argc, char *argv[], int *iparam){ sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) ); } else if (startswith( argv[i], "--gpus=" )) { sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_GPUS]) ); + }else if (startswith( argv[i], "--p=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_P]) ); + } else if (startswith( argv[i], "--q=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_Q]) ); + } else if (startswith( argv[i], "--runtime=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%s", runtime); } else { fprintf( stderr, "Unknown option: %s\n", argv[i] ); } @@ -152,15 +204,18 @@ static void print_header(char *prog_name, int * iparam) { #else double eps = LAPACKE_dlamch_work( 'e' ); #endif + if(AL4SAN_My_Mpi_Rank()==0){ printf( "#\n" "# AL4SAN %d.%d.%d, %s\n" "# Nb threads: %d\n" "# Nb gpus: %d\n" + "# Nb Prows: %d\n" + "# Nb Qcols: %d\n" "# N: %d\n" "# NB: %d\n" - "# IB: %d\n" "# eps: %e\n" + "# Activated runtime: %s\n" "#\n", AL4SAN_VERSION_MAJOR, AL4SAN_VERSION_MINOR, @@ -168,14 +223,17 @@ static void print_header(char *prog_name, int * iparam) { prog_name, iparam[IPARAM_THRDNBR], iparam[IPARAM_GPUS], + iparam[IPARAM_P], + iparam[IPARAM_Q], iparam[IPARAM_N], iparam[IPARAM_NB], - 32, - eps ); + eps, + runtime ); printf( "# M N K/NRHS NB seconds Gflop/s\n"); printf( "#%7d %7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS], iparam[IPARAM_NB]); fflush( stdout ); +} return; } diff --git a/example/potrf_cmake/CMakeLists.txt b/example/potrf_cmake/CMakeLists.txt new file mode 100644 index 0000000..8767000 --- /dev/null +++ b/example/potrf_cmake/CMakeLists.txt @@ -0,0 +1,53 @@ +cmake_minimum_required(VERSION 3.13) +# Defines the source files of the tests. Each file generates 1 test +include_directories( ${CMAKE_SOURCE_DIR} ) +include_directories(../../build-parsec/install/include) +include_directories(/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/include/) +set(SRC + ${CMAKE_SOURCE_DIR}/codelets/codelet_dtrsm.c + ${CMAKE_SOURCE_DIR}/codelets/codelet_dpotrf.c + ${CMAKE_SOURCE_DIR}/codelets/codelet_dgemm.c + ${CMAKE_SOURCE_DIR}/codelets/codelet_dsyrk.c + ${CMAKE_SOURCE_DIR}/codelets/codelet_dplgsy.c + ${CMAKE_SOURCE_DIR}/coreblas/core_dplgsy.c + ${CMAKE_SOURCE_DIR}/compute/dplgsy.c + ${CMAKE_SOURCE_DIR}/compute/pdplgsy.c + ${CMAKE_SOURCE_DIR}/potrf.c +) +add_library( potrf_lib ${SRC}) +#file(GLOB tests_files "src/*.cpp") + +add_definitions(-DENABLE_FINE_TIMING) +#add_definitions(-DENABLE_DEBUGGING) +# Uses RUNPATH instead of RPATH +#set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -Wl,--enable-new-dtags") +list( APPEND CMAKE_EXE_LINKER_FLAGS "-Wl,--enable-new-dtags") +# Transform the list into a space separate string +string(REPLACE ";" " " CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS}") + +set( TIMINGS_SRC + ${CMAKE_SOURCE_DIR}/potrf.c + ) +foreach( test_src ${TIMINGS_SRC} ) + get_filename_component( test_exe ${test_src} NAME_WE ) + if( "${test_exe}" STREQUAL "timing" ) + # skip timing.c + continue() + endif() + + add_executable( ${test_exe} ${test_src} ) + if( _build_netlib-scalapack ) + add_dependencies( ${test_exe} NETLIB-SCALAPACK ) + endif() + + target_link_libraries( ${test_exe} potrf_lib + ${MPI_C_LIBRARIES} + ${PARSEC_LIBRARIES} + ${LAPACKE_LIBRARIES} + ${LAPACK_LIBRARIES} + ${BLAS_LIBRARIES} + ${EXTRA_LIBS} + ${AL4SAN_LIBS} + ) + install( TARGETS ${test_exe} DESTINATION lib/timing ) +endforeach() diff --git a/example/potrf_cmake/Make.inc b/example/potrf_cmake/Make.inc new file mode 100644 index 0000000..1987247 --- /dev/null +++ b/example/potrf_cmake/Make.inc @@ -0,0 +1,49 @@ +CC = +LINK = $(CC) + +#LDFLAGS = -O3 +#CFLAGS = -g -ldl -Wall -Wno-unused-function -DAL4SAN_USE_CUDA -DAL4SAN_CUDA_ASYNC1 +CFLAGS = -ldl -Wno-unused-parameter -multiply_defined + + +#F77BLASL= -L${MKLROOT}/lib/intel64 -lmkl_intel_lp64 -lmkl_gnu_thread -lmkl_core -fopenmp -lpthread -lm -lgfortran + +#LIBS = -L/usr/lib -lm -llapacke -lblas -lhwloc -lpthread -L/opt/ecrc/cuda/9.0/lib64/ -lcuda -lcudart -lcublas -L/home/omairyrm/al4san-switch/al4san-dev/build/install/lib -lal4san -lal4san_quark -lal4san_starpu -lal4san_parsec -L/home/omairyrm/chameleon/build-starpu-gpu-gcc/install/lib -lchameleon -lchameleon_starpu -lcoreblas -lcudablas -L/home/omairyrm/quark -lquark -L/home/omairyrm/parsec/build/install/lib -lparsec -L/home/omairyrm/starpu-1.3.3.28Oct.2019/install/lib -lstarpu-1.3 + +#starpu mpi +#LIBS = -lpthread -L../../hwloc-install-mpi/lib/ -lhwloc -L/opt/ecrc/openmpi/4.0.1-gcc-8.3.0/ub16/lib -lmpi -L../../build-mpi/install/lib -lal4san -lal4san_starpu -L../../starpu-1.3-install-mpi/lib/ -lstarpu-1.3 -lstarpumpi-1.3 -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core + +#starpu +#LIBS = -lpthread -L../../hwloc-install/lib/ -lhwloc -L../../build/install/lib -lal4san -lal4san_starpu -L../../starpu-1.3-install/lib/ -lstarpu-1.3 -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core + +#parsec mpi +#LIBS = -lpthread -lhwloc -L/opt/ecrc/openmpi/3.0.0-gcc-5.5.0/ub16/lib -lmpi -L../../build-parsec/install/lib/ -lal4san -lal4san_parsec -L/opt/ecrc/parsec/master-gcc-5.5.0-mkl-openmpi-plasma-2.8.0/ub16/lib -lparsec -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#parsec mpi +LIBS = -lpthread -lhwloc -L/opt/intel/compilers_and_libraries_2019.5.281/linux/mpi/intel64/lib/release -lmpi -L../../build-parsec/install/lib/ -lal4san -lal4san_parsec -L/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/lib64/ -lparsec -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#LIBS = -lpthread -lhwloc -L/opt/ecrc/openmpi/3.0.0-gcc-5.5.0/ub16/lib -lmpi -L../../build-parsec/install/lib -lal4san -lal4san_parsec -L/opt/ecrc/parsec/master-gcc-5.5.0-mkl-openmpi-plasma-2.8.0/ub16/lib -lparsec -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#quark + +#LIBS = -lhwloc -lpthread -L/home/omairyrm/quark -lquark -L../../build-quark/install/lib -lal4san -lal4san_quark -L${MKLROOT}/lib/intel64 -lm -lmkl_intel_lp64 -lmkl_sequential -lmkl_core -ldl + +#INCLUDES = -I/home/omairyrm/al4san-switch/al4san-dev/build/install/include -I/home/omairyrm/chameleon/build-starpu-gpu-gcc/install/include -I/opt/ecrc/starpu/1.3.1-gcc-5.5.0-mkl-openmpi-4.0.1/ub16/include/starpu/1.3/ + +#starpu mpi +#INCLUDES = -I${MKLROOT}/include -I../../build-mpi/install/include -I../../starpu-1.3-install-mpi/include/starpu/1.3/ + +#INCLUDES = -I${MKLROOT}/include -I../../build/install/include -I../../starpu-1.3-install/include/starpu/1.3/ + +#parsec mpi +INCLUDES = -I${MKLROOT}/include -I../../build-parsec/install/include -I/opt/intel/compilers_and_libraries_2019.5.281/linux/mpi/intel64/include/ -I/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/include/ -I/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/include/parsec + +#INCLUDES = -I${MKLROOT}/include -I../../build-parsec2/installdir/include -I/home/omairyrm/parsec-last/dplasma/build/installdir/include + +#INCLUDES = -I${MKLROOT}/include -I../../build-quark/install/include -I/home/omairyrm/quark + + +LOADLIBES = $(LIBS) + +%.o: %.c + $(CC) $(CFLAGS) $(INCLUDES) -c $*.c -o $@ diff --git a/example/potrf_cmake/Makefile b/example/potrf_cmake/Makefile new file mode 100644 index 0000000..551859b --- /dev/null +++ b/example/potrf_cmake/Makefile @@ -0,0 +1,26 @@ +# -*- Makefile -*- +include Make.inc + +CSHOBJS = potrf.o\ + compute/dplgsy.o\ + compute/pdplgsy.o\ + coreblas/core_dplgsy.o\ + codelets/codelet_dgemm.o\ + codelets/codelet_dpotrf.o\ + codelets/codelet_dtrsm.o\ + codelets/codelet_dsyrk.o\ + codelets/codelet_dplgsy.o\ + +all: potrf + +lib: + (cd lib; make all) + +potrf: $(CSHOBJS) + $(LINK) $(LDFLAGS) $(CSHOBJS) -o $@ $(LIBS) + +.PHONY: clean all potrf + +clean: + (rm -f *.o codelets/*.o compute/*.o coreblas/*.o potrf *~) + diff --git a/example/potrf_cmake/codelets/codelet_dgemm.c b/example/potrf_cmake/codelets/codelet_dgemm.c new file mode 100644 index 0000000..66ae056 --- /dev/null +++ b/example/potrf_cmake/codelets/codelet_dgemm.c @@ -0,0 +1,160 @@ +/** + * + * @file al4san/codelet_zgemm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology + * (KAUST). All rights reserved. + * + *** + * + * @brief Chameleon zgemm AL4SAN codelet + * + * @version 1.0.1 + * @author Rabab Alomairy + * @date 2019-02-06 + * @precisions normal z -> c d s + * + */ + +#include "../potrf.h" + + +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument cpu user function name + * @param[in] Second argument gpu user function name +*/ + +AL4SAN_TASK_CPU_GPU(gemm, gemm_cpu_func, gemm_cuda_func) + +void gemm_cpu_func( AL4SAN_arg_list *al4san_arg) +{ + + int transA; + int transB; + int m; + int n; + int k; + double alpha; + double *A; + int lda; + double *B; + int ldb; + double beta; + double *C; + int ldc; + + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + + AL4SAN_Unpack_Arg(al4san_arg, &transA, &transB, &m, &n, &k, &alpha, &A, &lda, &B, &ldb, &beta, &C, &ldc); + + cblas_dgemm( + CblasColMajor, + (CBLAS_TRANSPOSE)transA, (CBLAS_TRANSPOSE)transB, + m, n, k, + (alpha), A, lda, + B, ldb, + (beta), C, ldc); + +} + +#ifdef AL4SAN_USE_CUDA +void gemm_cuda_func(AL4SAN_arg_list *al4san_arg) +{ + int transA; + int transB; + int m; + int n; + int k; + double alpha; + const double *A; + int lda; + const double *B; + int ldb; + double beta; + double *C; + int ldc; + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + AL4SAN_Unpack_Arg(al4san_arg, &transA, &transB, &m, &n, &k, &alpha, &A, &lda, &B, &ldb, &beta, &C, &ldc); + + AL4SAN_getStream( stream ); + + CUDA_dgemm( + transA, transB, + m, n, k, + &alpha, A, lda, + B, ldb, + &beta, C, ldc, + stream); + +#ifndef AL4SAN_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif // defined(AL4SAN_USE_CUDA) + +void Task_dgemm( const AL4SAN_option_t *options, + al4san_trans_t transA, al4san_trans_t transB, + int m, int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ) +{ + (void)nb; + + /* + * Insert Task function: + * @param[in] First argument AL4SAN_TASK macro with task name + * @param[in] options argument which holds sequence data sturcture + * @param[in] Parameter list of va_list type to represent data and the dependencies + */ + + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_R(B, Bm, Bn); + AL4SAN_ACCESS_RW(C, Cm, Cn); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(gemm), (AL4SAN_option_t*)options, + AL4SAN_VALUE, &transA, sizeof(int), + AL4SAN_VALUE, &transB, sizeof(int), + AL4SAN_VALUE, &m, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_VALUE, &k, sizeof(int), + AL4SAN_VALUE, &alpha, sizeof(double), + AL4SAN_INPUT, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_INPUT, AL4SAN_ADDR(B, double, Bm, Bn), AL4SAN_DEP, + AL4SAN_VALUE, &ldb, sizeof(int), + AL4SAN_VALUE, &beta, sizeof(double), + AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(C, double, Cm, Cn), AL4SAN_DEP, + AL4SAN_VALUE, &ldc, sizeof(int), +#ifdef AL4SAN_USE_CUDA + + AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif + AL4SAN_PRIORITY, options->priority, sizeof(int), + AL4SAN_LABEL, "zgemm", sizeof(char), +// AL4SAN_COLOR, "yellow", sizeof(char), + ARG_END); +} + diff --git a/example/potrf_cmake/codelets/codelet_dplgsy.c b/example/potrf_cmake/codelets/codelet_dplgsy.c new file mode 100644 index 0000000..05b9af3 --- /dev/null +++ b/example/potrf_cmake/codelets/codelet_dplgsy.c @@ -0,0 +1,83 @@ +/** + * + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + *** + * + * @brief Chameleon dplgsy AL4SAN codelet + * + * @version 1.0.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rabab Alomairy + * @date 2019-02-06 + * @precisions normal z -> c d s + * + */ + +#include "../potrf.h" + + +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument user function name +*/ + + +AL4SAN_TASK_CPU(plgsy, plgsy_cpu_fun) + +/* Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */ + +void plgsy_cpu_fun(AL4SAN_arg_list *al4san_arg) +{ + double bump; + int m; + int n; + double *A; + int lda; + int bigM; + int m0; + int n0; + unsigned long long int seed; + + AL4SAN_Unpack_Arg(al4san_arg, &bump, &m, &n, &A, &lda, &bigM, &m0, &n0, &seed ); + CORE_dplgsy( bump, m, n, A, lda, bigM, m0, n0, seed ); +} + + +void TASK_dplgsy( const AL4SAN_option_t *options, + double bump, int m, int n, AL4SAN_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_W(A, Am, An); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(plgsy), (AL4SAN_option_t * )options, + AL4SAN_VALUE, &bump, sizeof(double), + AL4SAN_VALUE, &m, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_OUTPUT | AL4SAN_AFFINITY, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_VALUE, &bigM, sizeof(int), + AL4SAN_VALUE, &m0, sizeof(int), + AL4SAN_VALUE, &n0, sizeof(int), + AL4SAN_VALUE, &seed, sizeof(unsigned long long int), + AL4SAN_PRIORITY, options->priority, sizeof(int), + AL4SAN_LABEL, "dplgsy", sizeof(char), + ARG_END); +} + + diff --git a/example/potrf_cmake/codelets/codelet_dpotrf.c b/example/potrf_cmake/codelets/codelet_dpotrf.c new file mode 100644 index 0000000..a5221dc --- /dev/null +++ b/example/potrf_cmake/codelets/codelet_dpotrf.c @@ -0,0 +1,101 @@ +/** + * + * @file al4san/codelet_zpotrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + *** + * + * @brief Chameleon zpotrf AL4SAN codelet + * + * @version 1.0.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rabab Alomairy + * @date 2019-02-06 + * @precisions normal z -> c d s + * + */ + +#include "../potrf.h" + + +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument user function name +*/ + +AL4SAN_TASK_CPU(potrf, potrf_cpu_fun) + + +void Task_dpotrf( const AL4SAN_option_t *options, + al4san_uplo_t uplo, int n, int nb, + const AL4SAN_desc_t *A, int Am, int An, int lda, + int iinfo ) +{ + + /* + * Insert Task function: + * @param[in] First argument AL4SAN_TASK macro with task name + * @param[in] options argument which holds sequence data sturcture + * @param[in] Parameter list of va_list type to represent data and the dependencies + */ + + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_RW(A, Am, An); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(potrf), (AL4SAN_option_t * )options, + AL4SAN_VALUE, &uplo, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_VALUE, &(options->sequence), sizeof(AL4SAN_sequence_t*), + AL4SAN_VALUE, &(options->request), sizeof(AL4SAN_request_t*), + AL4SAN_VALUE, &iinfo, sizeof(int), + AL4SAN_PRIORITY, options->priority, sizeof(int), + AL4SAN_LABEL, "zpotrf", sizeof(char), +// AL4SAN_COLOR, "green", sizeof(char), + ARG_END); + +} + + +void potrf_cpu_fun(AL4SAN_arg_list *al4san_arg) +{ + int uplo; + int n; + double *A; + int lda; + int iinfo; + int info = 0; + AL4SAN_sequence_t* sequence; + AL4SAN_request_t* request; + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + AL4SAN_Unpack_Arg(al4san_arg, &uplo, &n, &A, &lda, &sequence, &request, &iinfo); + info = LAPACKE_dpotrf_work( + LAPACK_COL_MAJOR, + 'U', + n, A, lda ); + if (info !=0){ + printf("\nMatrix is not SPD:%d\n", info); + exit(0); + } + +} diff --git a/example/potrf_cmake/codelets/codelet_dsyrk.c b/example/potrf_cmake/codelets/codelet_dsyrk.c new file mode 100644 index 0000000..78f82cf --- /dev/null +++ b/example/potrf_cmake/codelets/codelet_dsyrk.c @@ -0,0 +1,146 @@ +/** + * + * @file al4san/codelet_zsyrk.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + *** + * + * @brief Chameleon zsyrk AL4SAN codelet + * + * @version 1.0.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 1.0.0 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rabab Alomairy + * @date 2019-02-06 + * + */ + +#include "../potrf.h" + +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument cpu user function name + * @param[in] Second argument gpu user function name +*/ + +AL4SAN_TASK_CPU_GPU(syrk, syrk_cpu_func, syrk_cuda_func) + +void Task_dsyrk( const AL4SAN_option_t *options, + al4san_uplo_t uplo, al4san_trans_t trans, + int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ) +{ + (void)nb; + /* + * Insert Task function: + * @param[in] First argument AL4SAN_TASK macro with task name + * @param[in] options argument which holds sequence data sturcture + * @param[in] Parameter list of va_list type to represent data and the dependencies + */ + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_RW(C, Cm, Cn); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(syrk), (AL4SAN_option_t * )options, + AL4SAN_VALUE, &uplo, sizeof(int), + AL4SAN_VALUE, &trans, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_VALUE, &k, sizeof(int), + AL4SAN_VALUE, &alpha, sizeof(double), + AL4SAN_INPUT, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_VALUE, &beta, sizeof(double), + AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(C, double, Cm, Cn), AL4SAN_DEP, + AL4SAN_VALUE, &ldc, sizeof(int), +#ifdef AL4SAN_USE_CUDA + AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif + AL4SAN_PRIORITY, options->priority, sizeof(int), + AL4SAN_LABEL, "zsyrk", sizeof(char), +// AL4SAN_COLOR, "red", sizeof(char), + ARG_END); + +} + +void syrk_cpu_func(AL4SAN_arg_list *al4san_arg) +{ + int uplo; + int trans; + int n; + int k; + double alpha; + double *A; + int lda; + double beta; + double *C; + int ldc; + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + AL4SAN_Unpack_Arg(al4san_arg, &uplo, &trans, &n, &k, &alpha, &A, &lda, &beta, &C, &ldc); + + cblas_dsyrk( + CblasColMajor, + (CBLAS_UPLO)uplo, (CBLAS_TRANSPOSE)trans, + n, k, + (alpha), A, lda, + (beta), C, ldc); + +} + +#ifdef AL4SAN_USE_CUDA +void syrk_cuda_func(AL4SAN_arg_list *al4san_arg) +{ + int uplo; + int trans; + int n; + int k; + double alpha; + const double *A; + int lda; + double beta; + double *C; + int ldc; + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + AL4SAN_Unpack_Arg(al4san_arg, &uplo, &trans, &n, &k, &alpha, &A, &lda, &beta, &C, &ldc); + + AL4SAN_getStream(stream); + + CUDA_dsyrk( + uplo, trans, + n, k, + &alpha, A, lda, + &beta, C, ldc, + stream); + +#ifndef AL4SAN_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif /* AL4SAN_USE_CUDA */ + diff --git a/example/potrf_cmake/codelets/codelet_dtrsm.c b/example/potrf_cmake/codelets/codelet_dtrsm.c new file mode 100644 index 0000000..e73f330 --- /dev/null +++ b/example/potrf_cmake/codelets/codelet_dtrsm.c @@ -0,0 +1,145 @@ +/** + * + * @file al4san/codelet_ztrsm.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2017-2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + *** + * + * @brief Chameleon ztrsm AL4SAN codelet + * + * @version 1.0.1 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for AL4SAN 1.0.1 + * @author Hatem Ltaief + * @author Jakub Kurzak + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rabab Alomairy + * @date 2019-02-06 + * + */ + +#include "../potrf.h" +/* + * Preparing work's function: + * @param[in] First argument is task name. + * @param[in] Second argument cpu user function name + * @param[in] Second argument gpu user function name +*/ +AL4SAN_TASK_CPU_GPU(trsm, trsm_cpu_func, trsm_cuda_func) + + +void Task_dtrsm( const AL4SAN_option_t *options, + al4san_side_t side, al4san_uplo_t uplo, al4san_trans_t transA, al4san_diag_t diag, + int m, int n, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb ) +{ + + /* + * Insert Task function: + * @param[in] First argument AL4SAN_TASK macro with task name + * @param[in] options argument which holds sequence data sturcture + * @param[in] Parameter list of va_list type to represent data and the dependencies + */ + + AL4SAN_BEGIN_ACCESS_DECLARATION; + AL4SAN_ACCESS_R(A, Am, An); + AL4SAN_ACCESS_RW(B, Bm, Bn); + AL4SAN_END_ACCESS_DECLARATION; + + AL4SAN_Insert_Task(AL4SAN_TASK(trsm), (AL4SAN_option_t * )options, + AL4SAN_VALUE, &side, sizeof(int), + AL4SAN_VALUE, &uplo, sizeof(int), + AL4SAN_VALUE, &transA, sizeof(int), + AL4SAN_VALUE, &diag, sizeof(int), + AL4SAN_VALUE, &m, sizeof(int), + AL4SAN_VALUE, &n, sizeof(int), + AL4SAN_VALUE, &alpha, sizeof(double), + AL4SAN_INPUT, AL4SAN_ADDR(A, double, Am, An), AL4SAN_DEP, + AL4SAN_VALUE, &lda, sizeof(int), + AL4SAN_INOUT | AL4SAN_AFFINITY, AL4SAN_ADDR(B, double, Bm, Bn), AL4SAN_DEP, + AL4SAN_VALUE, &ldb, sizeof(int), +#ifdef AL4SAN_USE_CUDA + AL4SAN_CUDA_FLG, ON, sizeof(int), +#endif + AL4SAN_PRIORITY, options->priority, sizeof(int), + AL4SAN_LABEL, "ztrsm", sizeof(char), +// AL4SAN_COLOR, "yellow", sizeof(char), + ARG_END); +} + +void trsm_cpu_func(AL4SAN_arg_list *al4san_arg) +{ + int side; + int uplo; + int transA; + int diag; + int m; + int n; + double alpha; + double *A; + int lda; + double *B; + int ldb; + + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + + AL4SAN_Unpack_Arg(al4san_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &A, &lda, &B, &ldb); + + cblas_dtrsm( + CblasColMajor, + (CBLAS_SIDE)side, (CBLAS_UPLO)uplo, + (CBLAS_TRANSPOSE)transA, (CBLAS_DIAG)diag, + m, n, + (alpha), A, lda, + B, ldb); + +} +#ifdef AL4SAN_USE_CUDA +void trsm_cuda_func(AL4SAN_arg_list *al4san_arg) +{ + int side; + int uplo; + int transA; + int diag; + int m; + int n; + double alpha; + const double *A; + int lda; + double *B; + int ldb; + /* + * AL4SAN_Unpack_Arg: + * @param[in] First argument AL4SAN_arg that hold the packed data + * @param[in] Parameter list of va_list type which holds list of arguments + */ + AL4SAN_Unpack_Arg(al4san_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha, &A, &lda, &B, &ldb); + + AL4SAN_getStream(stream); + + CUDA_dtrsm( + side, uplo, transA, diag, + m, n, + &alpha, A, lda, + B, ldb, + stream); + +#ifndef AL4SAN_CUDA_ASYNC + cudaStreamSynchronize( stream ); +#endif + + return; +} +#endif /* AL4SAN_USE_CUDA */ diff --git a/example/potrf_cmake/compute/dplgsy.c b/example/potrf_cmake/compute/dplgsy.c new file mode 100644 index 0000000..7496c4b --- /dev/null +++ b/example/potrf_cmake/compute/dplgsy.c @@ -0,0 +1,180 @@ +/** + * + * @file dplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon dplgsy wrappers + * + * @version 0.9.2 + * @comment This file is a copy of dplgsy.c, + * wich has been automatically generated + * from Plasma 2.5.0 for AL4SAN 0.9.2 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rade Mathis + * @author Florent Pruvost + * @date 2014-11-16 + * @generated from /home/omairyrm/modified-al4san/chameleon/compute/zplgsy.c, normal z -> d, Sun Jun 14 16:04:55 2020 + * + */ + +#include "../potrf.h" + + +/** + ******************************************************************************** + * + * @ingroup double_Tile + * + * AL4SAN_dplgsy_Tile - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + * Tile equivalent of AL4SAN_dplgsy(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] bump + * The value to add to the diagonal to be sure + * to have a positive definite matrix. + * + * @param[in] uplo + * The half of the matrix that will be generated. + * + * @param[in] A + * On exit, The random symmetric matrix A generated. + * + * @param[in] seed + * The seed used in the random generation. + * + ******************************************************************************* + * + * @retval AL4SAN_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa AL4SAN_dplgsy + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_cplgsy_Tile + * @sa AL4SAN_dplgsy_Tile + * @sa AL4SAN_splgsy_Tile + * @sa AL4SAN_dplgsy_Tile + * + */ +int dplgsy_Tile( double bump, al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed ) +{ + AL4SAN_context_t *chamctxt; + AL4SAN_sequence_t *sequence = NULL; + AL4SAN_request_t request = AL4SAN_REQUEST_INITIALIZER; + int status; + + chamctxt = al4san_context_self(); + if (chamctxt == NULL) { + al4san_fatal_error("dplgsy_Tile", "AL4SAN not initialized"); + return AL4SAN_ERR_NOT_INITIALIZED; + } + sequence = AL4SAN_Sequence_Create(); + + dplgsy_Tile_Async( bump, uplo, A, seed, sequence, &request ); + + AL4SAN_Desc_Flush( A, sequence ); + + + AL4SAN_Sequence_Wait(sequence); + status = sequence->status; + + AL4SAN_Sequence_Destroy( sequence ); + + + return status; +} + +/** + ******************************************************************************** + * + * @ingroup double_Tile_Async + * + * AL4SAN_dplgsy_Tile_Async - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + * Non-blocking equivalent of AL4SAN_dplgsy_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa AL4SAN_dplgsy + * @sa AL4SAN_dplgsy_Tile + * @sa AL4SAN_cplgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_splgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * @sa AL4SAN_dplgsy_Tile_Async + * + */ +int dplgsy_Tile_Async( double bump, + al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, + AL4SAN_request_t *request ) +{ + AL4SAN_context_t *al4sanctxt; + + al4sanctxt = al4san_context_self(); + if (al4sanctxt == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "AL4SAN not initialized"); + return AL4SAN_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "NULL sequence"); + return AL4SAN_ERR_UNALLOCATED; + } + if (request == NULL) { + al4san_fatal_error("AL4SAN_dplgsy_Tile", "NULL request"); + return AL4SAN_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == AL4SAN_SUCCESS) { + request->status = AL4SAN_SUCCESS; + } + else { + return al4san_request_fail(sequence, request, AL4SAN_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (al4san_desc_check(A) != AL4SAN_SUCCESS) { + al4san_error("AL4SAN_dplgsy_Tile", "invalid descriptor"); + return al4san_request_fail(sequence, request, AL4SAN_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + al4san_error("AL4SAN_dplgsy_Tile", "only square tiles supported"); + return al4san_request_fail(sequence, request, AL4SAN_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if (al4san_min( A->m, A->n ) == 0) + return AL4SAN_SUCCESS; + + pdplgsy( bump, uplo, A, seed, sequence, request ); + + return AL4SAN_SUCCESS; +} + diff --git a/example/potrf_cmake/compute/pdplgsy.c b/example/potrf_cmake/compute/pdplgsy.c new file mode 100644 index 0000000..7949630 --- /dev/null +++ b/example/potrf_cmake/compute/pdplgsy.c @@ -0,0 +1,99 @@ +/** + * + * @file pdplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon dplgsy parallel algorithm + * + * @version 0.9.2 + * @comment This file is a copy of pdplgsy.c, + wich has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 0.9.2 + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Rade Mathis + * @author Florent Pruvost + * @date 2014-11-16 + * @generated from /home/omairyrm/modified-chameleon/chameleon/compute/pzplgsy.c, normal z -> d, Sun Jun 14 16:04:55 2020 + * + */ +#include "../potrf.h" +#define A(m,n) A, m, n +#define BLKLDD(A, k) A->get_blkldd( A, k ) + +/** + * pdplgsy - Generate a random symmetric (positive definite if 'bump' is large enough) half-matrix by tiles. + */ +void pdplgsy( double bump, al4san_uplo_t uplo, AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, AL4SAN_request_t *request ) +{ + AL4SAN_context_t *chamctxt; + AL4SAN_option_t options; + + int m, n; + int ldam; + int tempmm, tempnn; + + chamctxt = al4san_context_self(); + if (sequence->status != AL4SAN_SUCCESS) { + return; + } + AL4SAN_Options_Init(&options, sequence, request); + + for (m = 0; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + ldam = BLKLDD(A, m); + + /* + * Al4sanLower + */ + if (uplo == Al4sanLower) { + for (n = 0; n <= m; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + options.priority = m + n; + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + /* + * Al4sanUpper + */ + else if (uplo == Al4sanUpper) { + for (n = m; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + options.priority = m + n; + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + /* + * Al4sanUpperLower + */ + else { + for (n = 0; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + TASK_dplgsy( + &options, + bump, tempmm, tempnn, A(m, n), ldam, + A->m, m*A->mb, n*A->nb, seed ); + } + } + } + AL4SAN_Options_Finalize(&options); + +} diff --git a/example/potrf_cmake/coreblas/core_dplgsy.c b/example/potrf_cmake/coreblas/core_dplgsy.c new file mode 100644 index 0000000..0924190 --- /dev/null +++ b/example/potrf_cmake/coreblas/core_dplgsy.c @@ -0,0 +1,147 @@ +/** + * + * @file core_dplgsy.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon core_dplgsy CPU kernel + * + * @version 0.9.2 + * @comment This file has been automatically generated + * from Plasma 2.5.0 for CHAMELEON 0.9.2 + * @author Piotr Luszczek + * @author Pierre Lemarinier + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @date 2014-11-16 + * @generated from /home/omairyrm/al4san-dev/modified-chameleon/chameleon/coreblas/compute/core_zplgsy.c, normal z -> d, Sat Jun 27 18:00:10 2020 + * + */ +#include "../potrf.h" + +/* + Rnd64seed is a global variable but it doesn't spoil thread safety. All matrix + generating threads only read Rnd64seed. It is safe to set Rnd64seed before + and after any calls to create_tile(). The only problem can be caused if + Rnd64seed is changed during the matrix generation time. + */ + +//static unsigned long long int Rnd64seed = 100; +#define Rnd64_A 6364136223846793005ULL +#define Rnd64_C 1ULL +#define RndF_Mul 5.4210108624275222e-20f +#define RndD_Mul 5.4210108624275222e-20 + +#if defined(PRECISION_z) || defined(PRECISION_c) +#define NBELEM 2 +#else +#define NBELEM 1 +#endif + +static unsigned long long int +Rnd64_jump(unsigned long long int n, unsigned long long int seed ) { + unsigned long long int a_k, c_k, ran; + int i; + + a_k = Rnd64_A; + c_k = Rnd64_C; + + ran = seed; + for (i = 0; n; n >>= 1, i++) { + if (n & 1) + ran = a_k * ran + c_k; + c_k *= (a_k + 1); + a_k *= a_k; + } + + return ran; +} + + +// CORE_dplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. + +void CORE_dplgsy( double bump, int m, int n, double *A, int lda, + int bigM, int m0, int n0, unsigned long long int seed ) +{ + double *tmp = A; + int64_t i, j; + unsigned long long int ran, jump; + + jump = (unsigned long long int)m0 + (unsigned long long int)n0 * (unsigned long long int)bigM; + + /* + * Tile diagonal + */ + if ( m0 == n0 ) { + for (j = 0; j < n; j++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (i = j; i < m; i++) { + *tmp = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + tmp++; + } + tmp += (lda - i + j + 1); + jump += bigM + 1; + } + + for (j = 0; j < n; j++) { + A[j+j*lda] += bump; + + for (i=0; i n0 ) { + for (j = 0; j < n; j++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (i = 0; i < m; i++) { + *tmp = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + *tmp += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + tmp++; + } + tmp += (lda - i); + jump += bigM; + } + } + /* + * Upper part + */ + else if ( m0 < n0 ) { + /* Overwrite jump */ + jump = (unsigned long long int)n0 + (unsigned long long int)m0 * (unsigned long long int)bigM; + + for (i = 0; i < m; i++) { + ran = Rnd64_jump( NBELEM * jump, seed ); + + for (j = 0; j < n; j++) { + A[j*lda+i] = 0.5f - ran * RndF_Mul; + ran = Rnd64_A * ran + Rnd64_C; +#if defined(PRECISION_z) || defined(PRECISION_c) + A[j*lda+i] += I*(0.5f - ran * RndF_Mul); + ran = Rnd64_A * ran + Rnd64_C; +#endif + } + jump += bigM; + } + } +} diff --git a/example/potrf_cmake/potrf.c b/example/potrf_cmake/potrf.c new file mode 100644 index 0000000..c2ddac4 --- /dev/null +++ b/example/potrf_cmake/potrf.c @@ -0,0 +1,271 @@ +/** + * + * @file cholesky.c + * + * @copyright 2018 King Abdullah University of Science and Technology (KAUST). + * All rights reserved. + **/ +/** + * @file cholesky.c + * @brief Cholesky Example + * + * AL4SAN is a software package provided by King Abdullah University of Science and Technology (KAUST) + * + * @version 1.0.1 + * @author Rabab Alomairy + * @date 2019-02-06 + * + **/ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "potrf.h" + +float get_time(); +/* + * @briefThis example only factorizes using Cholesky using the C interface of + * BLAS/LAPACK. + * @details This program solves a linear system AX=B with matrix A symmetric + * positive definite. + * The matrix A is first factorized using the Cholesky factorization, A = LL^T. + * Then the solution X is calculated thanks to forward and back substitutions. + * We use the C interface of BLAS and LAPACK, that is, CBLAS and LAPACKE, to solve this problem. + * This program is either sequential if it is linked with a sequential LAPACK library + * or parallel if it is linked with a multithreaded LAPACK library. + * The precision is: double + */ + +#define A(m,n) A, m, n +#define BLKLDD(A, k) A->get_blkldd( A, k ) + +/* Cholesky factorization: + * A is replaced by its factorization L or L^T depending on uplo */ + +int AL4SAN_cholesky(al4san_uplo_t uplo, AL4SAN_desc_t *A) +{ + + /* + * Define AL4SAN handle for seqeunce to manage groupe of threads. + * Define AL4SAN handle for options to set glabel task options and set the sequence handle. + * Define AL4SAN handle for request status. + */ + + AL4SAN_context_t *al4sanctxt; + AL4SAN_sequence_t *sequence = NULL; + AL4SAN_request_t* request = AL4SAN_REQUEST_INITIALIZER; + AL4SAN_option_t options; + + int status; + int k, m, n; + int ldak, ldam, ldan; + int tempkm, tempmm, tempnn; + size_t ws_host = 0; + + double zone = (double) 1.0; + double mzone = (double)-1.0; + + al4sanctxt = al4san_context_self(); + if (al4sanctxt == NULL) { + al4san_fatal_error("AL4SAN_cholesky", "AL4SAN not initialized"); + return AL4SAN_ERR_NOT_INITIALIZED; + } + + + /* + * Create sequence data sturcture + * Init task global options data sturcture + */ + sequence = AL4SAN_Sequence_Create(); + AL4SAN_Options_Init(&options, sequence, request); + + for (k = 0; k < A->nt; k++) { + tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb; + ldak = BLKLDD(A, k); + + options.priority = 2*A->nt - 2*k; + Task_dpotrf( + &options, + Al4sanUpper, + tempkm, A->mb, + A(k, k), ldak, A->nb*k); + + for (n = k+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb; + + options.priority = 2*A->nt - 2*k - n; + Task_dtrsm( + &options, + Al4sanLeft, Al4sanUpper, Al4sanConjTrans, Al4sanNonUnit, + A->mb, tempnn, A->mb, + zone, A(k, k), ldak, + A(k, n), ldak); + } + AL4SAN_Data_Flush( sequence, A(k, k) ); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb; + ldam = BLKLDD(A, m); + + options.priority = 2*A->nt - 2*k - m; + Task_dsyrk( + &options, + Al4sanUpper, Al4sanConjTrans, + tempmm, A->mb, A->mb, + -1.0, A(k, m), ldak, + 1.0, A(m, m), ldam); + + for (n = m+1; n < A->nt; n++) { + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + options.priority = 2*A->nt - 2*k - n - m; + Task_dgemm( + &options, + Al4sanTrans, Al4sanNoTrans, + tempmm, tempnn, A->mb, A->mb, + mzone, A(k, m), ldak, + A(k, n), ldak, + zone, A(m, n), ldam); + } + AL4SAN_Data_Flush( sequence, A(k, m) ); + } + + } + + /* + * Finilized options data + */ + AL4SAN_Options_Finalize(&options); + + AL4SAN_Desc_Flush( A, sequence ); + + /* + * Use sequence for sync + * Destroy sequence + */ + + AL4SAN_Sequence_Wait(sequence); + AL4SAN_Sequence_Destroy( sequence ); + + + return AL4SAN_SUCCESS; +} + +int main(int argc, char* argv[]){ + size_t N; // matrix order + int NB; // number of rows and columns in tiles + int NRHS; // number of RHS vectors + int NCPU; // number of cores to use + int NGPU; // number of gpus (cuda devices) to use + al4san_uplo_t UPLO = Al4sanUpper; // where is stored L + /* descriptors necessary for calling AL4SAN data descriptor interface */ + AL4SAN_desc_t *descA = NULL, *descAC = NULL, *descB = NULL, *descX = NULL; + /* declarations to time the program and evaluate performances */ + double fmuls, fadds, flops, gflops, cpu_time= 0.0; + + /* variable to check the numerical results */ + double anorm, bnorm, xnorm, eps, res; + int hres; + + /* initialize some parameters with default values */ + int iparam[IPARAM_SIZEOF]; + memset(iparam, 0, IPARAM_SIZEOF*sizeof(int)); + init_iparam(iparam); + + /* read arguments */ + read_args(argc, argv, iparam); + N = iparam[IPARAM_N]; + NRHS = iparam[IPARAM_NRHS]; + NB = iparam[IPARAM_NB]; + /* compute the algorithm complexity to evaluate performances */ + fadds = (double)( FADDS_POTRF(N)); + fmuls = (double)( FMULS_POTRF(N)); + + flops = 1e-9 * (fmuls + fadds); + + /* initialize the number of thread if not given by the user in argv + * It makes sense only if this program is linked with pthread and + * multithreaded BLAS and LAPACK */ + if ( iparam[IPARAM_THRDNBR] == -1 ) { + get_thread_count( &(iparam[IPARAM_THRDNBR]) ); + } + NCPU = iparam[IPARAM_THRDNBR]; + NGPU = iparam[IPARAM_GPUS]; + + + /* Initialize AL4SAN with main parameters */ + AL4SAN_context_t *al4san = AL4SAN_Init(runtime, NCPU, NGPU); + +#if defined(AL4SAN_USE_MPI) + al4san->prows=iparam[IPARAM_P]; + al4san->pcols=iparam[IPARAM_Q]; + AL4SAN_Init_Processor_Grid(al4san->prows, al4san->pcols); +#endif + + /* print informations to user */ + print_header( argv[0], iparam); + + /* + * Allocate memory for our data using a C macro (see step2.h) + * - matrix A : size N x N + */ + + /* + * Initialize the structure required for AL4SAN data interface + * AL4SAN_desc_t is a structure wrapping your data allowing AL4SAN to get + * pointers to tiles. A tile is a data subset of your matrix on which we + * apply some optimized CPU/GPU kernels. + * Notice that this routine suppose your matrix is a contiguous vector of + * data (1D array), as a data you would give to BLAS/LAPACK. + * Main arguments: + * - descA is a pointer to a descriptor, you need to give the address + * of this pointer + * - if you want to give your allocated matrix give its address, + * if not give a NULL pointer, the routine will allocate the memory + * and you access the matrix data with descA->mat + * - give the data type (Al4sanByte, Al4sanInteger, Al4sanRealFloat, + * Al4sanRealDouble, Al4sanComplexFloat, Al4sanComplexDouble) + * - number of rows in a block (tile) + * - number of columns in a block (tile) + * - number of elements in a block (tile) + * The other parameters are specific, use: + * AL4SAN_Desc_Create( ... , 0, 0, number of rows, number of columns, 1, 1); + * Have a look to the documentation for details about these parameters. + */ + + AL4SAN_Matrix_Create(&descA, NULL, Al4sanRealDouble, + AL4SAN_Col_Major, NB, NB, NB, N, N, N); + /* generate A matrix with random values such that it is spd*/ + dplgsy_Tile( (double)N, Al4sanUpperLower, descA, 51 ); + + cpu_time = -AL4SAN_timer(); + /* Cholesky factorization: + * A is replaced by its factorization L or L^T depending on uplo */ + AL4SAN_cholesky(UPLO, descA); + + cpu_time += AL4SAN_timer(); + /* print informations to user */ + + /* print informations to user */ + gflops = flops / cpu_time; + + if(AL4SAN_My_Mpi_Rank()==0){ + printf( "%9.3f %9.2f\n", cpu_time, gflops); + fflush( stdout ); + } + + /* deallocate A and associated descriptors descA, ... */ + AL4SAN_Desc_Destroy( &descA ); + + /* Finalize AL4SAN */ + AL4SAN_Finalize(); + return 0; +} + diff --git a/example/potrf_cmake/potrf.h b/example/potrf_cmake/potrf.h new file mode 100644 index 0000000..7be3bb9 --- /dev/null +++ b/example/potrf_cmake/potrf.h @@ -0,0 +1,248 @@ +/** + * + * @file potrf.h + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon step2 example header + * + * @version 1.0.1 + * @author Florent Pruvost + * @date 2019-02-06 + * + */ +#ifndef _potrf_h_ +#define _potrf_h_ + +/* Common include for all steps of the tutorial */ + + + +/* Specific includes for potrf */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "control/al4san_descriptor.h" +#include "runtime/al4san_runtime.h" +//#include +#include +#include +#include +#if defined( _WIN32 ) || defined( _WIN64 ) +#include +#else /* Non-Windows */ +#include +#include +#endif + +/* Common functions for all steps of the tutorial */ +static void get_thread_count(int *thrdnbr) { +#if defined WIN32 || defined WIN64 + sscanf( getenv( "NUMBER_OF_PROCESSORS" ), "%d", thrdnbr ); +#else + *thrdnbr = sysconf(_SC_NPROCESSORS_ONLN); +#endif +} + +/* define complexity of algorithms - see Lawn 41 page 120 */ +#define FMULS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) + 0.5) * (double)(__n) + (1. / 3.))) +#define FADDS_POTRF(__n) ((double)(__n) * (((1. / 6.) * (double)(__n) ) * (double)(__n) - (1. / 6.))) +#define FMULS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)+1.)) +#define FADDS_TRSM(__m, __n) (0.5 * (double)(__n) * (double)(__m) * ((double)(__m)-1.)) + + +/* + Cholesky Task Headers +*/ + +void Task_dpotrf( const AL4SAN_option_t *options, + al4san_uplo_t uplo, int n, int nb, + const AL4SAN_desc_t *A, int Am, int An, int lda, + int iinfo ); +void Task_dgemm( const AL4SAN_option_t *options, + al4san_trans_t transA, al4san_trans_t transB, + int m, int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ); +void TASK_dplgsy( const AL4SAN_option_t *options, + double bump, int m, int n, AL4SAN_desc_t *A, int Am, int An, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); +void Task_dsyrk( const AL4SAN_option_t *options, + al4san_uplo_t uplo, al4san_trans_t trans, + int n, int k, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + double beta, const AL4SAN_desc_t *C, int Cm, int Cn, int ldc ); +void Task_dtrsm( const AL4SAN_option_t *options, + al4san_side_t side, al4san_uplo_t uplo, al4san_trans_t transA, al4san_diag_t diag, + int m, int n, int nb, + double alpha, const AL4SAN_desc_t *A, int Am, int An, int lda, + const AL4SAN_desc_t *B, int Bm, int Bn, int ldb ); +void CORE_dplgsy( double bump, int m, int n, double *A, int lda, + int bigM, int m0, int n0, unsigned long long int seed ); + +int dplgsy_Tile( double bump, al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed ); +int dplgsy_Tile_Async( double bump, + al4san_uplo_t uplo, + AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, + AL4SAN_request_t *request ); + +void pdplgsy( double bump, al4san_uplo_t uplo, AL4SAN_desc_t *A, + unsigned long long int seed, + AL4SAN_sequence_t *sequence, AL4SAN_request_t *request ); +void gemm_cpu_func( AL4SAN_arg_list *al4san_arg); +void potrf_cpu_fun(AL4SAN_arg_list *al4san_arg); +void trsm_cpu_func(AL4SAN_arg_list *al4san_arg); +void syrk_cpu_func(AL4SAN_arg_list *al4san_arg); +void plgsy_cpu_fun(AL4SAN_arg_list *al4san_arg); + +char runtime[20]; +/* Integer parameters for step2 */ +enum iparam_step2 { + IPARAM_THRDNBR, /* Number of cores */ + IPARAM_GPUS, /* Number of gpus */ + IPARAM_N, /* Number of columns of the matrix */ + IPARAM_NRHS, /* Number of RHS */ + IPARAM_NB, /* Number of NB */ + IPARAM_P, /* Number of P */ + IPARAM_Q, /* Number of Q */ + /* End */ + IPARAM_SIZEOF +}; + +/* Specific routines used in step2.c main program */ + +/** + * Initialize integer parameters + */ +static void init_iparam(int iparam[IPARAM_SIZEOF]){ + iparam[IPARAM_THRDNBR ] = -1; + iparam[IPARAM_GPUS ] = -1; + iparam[IPARAM_N ] = 500; + iparam[IPARAM_NRHS ] = 1; + iparam[IPARAM_NB ] = 10; + iparam[IPARAM_P ] = 1; + iparam[IPARAM_Q ] = 1; + +} + +/** + * Print how to use the program + */ +static void show_help(char *prog_name) { + printf( "Usage:\n%s [options]\n\n", prog_name ); + printf( "Options are:\n" + " --help Show this help\n" + "\n" + " --n=X dimension (N). (default: 500)\n" + " --nrhs=X number of RHS. (default: 1)\n" + " --nb=X number of RHS. (default: 10)\n" + "\n" + " --threads=X Number of CPU workers (default: _SC_NPROCESSORS_ONLN)\n" + "\n"); +} + + +static int startswith(const char *s, const char *prefix) { + size_t n = strlen( prefix ); + if (strncmp( s, prefix, n )) + return 0; + return 1; +} + +/** + * Read arguments following step2 program call + */ +static void read_args(int argc, char *argv[], int *iparam){ + int i; + for (i = 1; i < argc && argv[i]; ++i) { + if ( startswith( argv[i], "--help") || startswith( argv[i], "-help") || + startswith( argv[i], "--h") || startswith( argv[i], "-h") ) { + show_help( argv[0] ); + exit(0); + } else if (startswith( argv[i], "--n=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_N]) ); + } else if (startswith( argv[i], "--nrhs=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NRHS]) ); + } else if (startswith( argv[i], "--nb=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_NB]) ); + } else if (startswith( argv[i], "--threads=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_THRDNBR]) ); + } else if (startswith( argv[i], "--gpus=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_GPUS]) ); + }else if (startswith( argv[i], "--p=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_P]) ); + } else if (startswith( argv[i], "--q=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_Q]) ); + } else if (startswith( argv[i], "--runtime=" )) { + sscanf( strchr( argv[i], '=' ) + 1, "%s", runtime); + } else { + fprintf( stderr, "Unknown option: %s\n", argv[i] ); + } + } +} + +/** + * Print a header message to summarize main parameters + */ +static void print_header(char *prog_name, int * iparam) { +#if defined(CHAMELEON_SIMULATION) + double eps = 0.; +#else + double eps = LAPACKE_dlamch_work( 'e' ); +#endif + if(AL4SAN_My_Mpi_Rank()==0){ + + printf( "#\n" + "# AL4SAN %d.%d.%d, %s\n" + "# Nb threads: %d\n" + "# Nb gpus: %d\n" + "# Nb Prows: %d\n" + "# Nb Qcols: %d\n" + "# N: %d\n" + "# NB: %d\n" + "# eps: %e\n" + "# Activated runtime: %s\n" + "#\n", + AL4SAN_VERSION_MAJOR, + AL4SAN_VERSION_MINOR, + AL4SAN_VERSION_MICRO, + prog_name, + iparam[IPARAM_THRDNBR], + iparam[IPARAM_GPUS], + iparam[IPARAM_P], + iparam[IPARAM_Q], + iparam[IPARAM_N], + iparam[IPARAM_NB], + eps, + runtime ); + + printf( "# M N K/NRHS NB seconds Gflop/s\n"); + printf( "#%7d %7d %7d %7d ", iparam[IPARAM_N], iparam[IPARAM_N], iparam[IPARAM_NRHS], iparam[IPARAM_NB]); + fflush( stdout ); +} + return; +} + + + +#endif /* _step2_h_ */ + diff --git a/example/potrf_cmake/shaheen-modules.sh b/example/potrf_cmake/shaheen-modules.sh new file mode 100644 index 0000000..f6b1b5e --- /dev/null +++ b/example/potrf_cmake/shaheen-modules.sh @@ -0,0 +1,6 @@ +module unload cray-libsci1 +module switch PrgEnv-cray/6.0.5 PrgEnv-intel +module load cmake/3.13.4 +export CRAYPE_LINK_TYPE=dynamic +export PKG_CONFIG_PATH=/lustre/project/k1205/akbudak/codes/plasma-installer_2.8.0/install-intel/lib/pkgconfig:$PKG_CONFIG_PATH +export LD_LIBRARY_PATH=/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/lib64/:$LD_LIBRARY_PATH diff --git a/example/potrf_cmake/shaheen_build.sh b/example/potrf_cmake/shaheen_build.sh new file mode 100644 index 0000000..09b1174 --- /dev/null +++ b/example/potrf_cmake/shaheen_build.sh @@ -0,0 +1 @@ +cmake .. -DCMAKE_INSTALL_PREFIX=$PWD/installdir -DBLAS_LIBRARIES="-Wl,--no-as-needed;-L${MKLROOT}/lib;-lmkl_core;-lmkl_sequential;-lmkl_intel_lp64;-lpthread;-lm;-ldl" -DMPI_C_LIBRARIES="-L/opt/cray/pe/mpt/7.7.11/gni/mpich-cray/90/;-lmpich" -DPARSEC_LIBRARIES="-L/project/k1205/omairyrm/dplasma-parsec/parsec/build/install_dir/lib64;-lparsec" -DAL4SAN_LIBS=" -L/project/k1205/omairyrm/al4san-parsec/al4san-dev/build-parsec/install/lib;-lal4san;-lal4san_parsec" diff --git a/include/al4san.h b/include/al4san.h index 1f34d6d..b1e307a 100644 --- a/include/al4san.h +++ b/include/al4san.h @@ -177,7 +177,7 @@ int AL4SAN_Desc_Acquire (AL4SAN_desc_t *desc); int AL4SAN_Desc_Release (AL4SAN_desc_t *desc); void AL4SAN_Flush(); int AL4SAN_Desc_Flush (AL4SAN_desc_t *desc, AL4SAN_sequence_t *sequence); -//void AL4SAN_Data_flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am, int An ); +void AL4SAN_Data_Flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am, int An ); void AL4SAN_Matrix_Flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am, int An ); void AL4SAN_Vector_Flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am); void AL4SAN_Scaler_Flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A); @@ -216,6 +216,7 @@ int AL4SAN_Options_Workspace_Free(AL4SAN_option_t *options ); #define PRAGMA(x) _Pragma(#x) #define AL4SAN_Sequence_Create() \ malloc(sizeof(AL4SAN_sequence_t)); \ + (*sequence)->status = AL4SAN_SUCCESS; \ PRAGMA(omp parallel) \ { \ PRAGMA(omp master) \ diff --git a/include/al4san/task.h b/include/al4san/task.h index cc8752b..834b9da 100644 --- a/include/al4san/task.h +++ b/include/al4san/task.h @@ -111,9 +111,10 @@ typedef struct al4san_arg_list_s #define GET_MACRO_ADDR(_1,_2,_3,_4, NAME,...) NAME #define AL4SAN_ADDR(...) GET_MACRO_ADDR(__VA_ARGS__, AL4SAN_ADDR4, AL4SAN_ADDR3, AL4SAN_ADDR2)(__VA_ARGS__) -#define AL4SAN_ADDR4( desc, type, m, n ) ( (type*)AL4SAN_Data_getaddr( desc, m, n ) ) -#define AL4SAN_ADDR3( desc, type, m) ( (type*)AL4SAN_Data_getaddr( desc, m, 0 ) ) -#define AL4SAN_ADDR2( desc, type) ( (type*)AL4SAN_Data_getaddr( desc, 0, 0 ) ) + +#define AL4SAN_ADDR4( desc, type, m, n ) ( (type*)AL4SAN_Data_getaddr( desc, m, n ) ), desc->schedopt +#define AL4SAN_ADDR3( desc, type, m) ( (type*)AL4SAN_Data_getaddr( desc, m, 0 ) ), desc->schedopt +#define AL4SAN_ADDR2( desc, type) ( (type*)AL4SAN_Data_getaddr( desc, 0, 0 ) ), desc->schedopt #define AL4SAN_TASK_HEADER(name)\ AL4SAN_QUARK_TASK_HEADER(name)\ @@ -135,6 +136,21 @@ void *OPENMP_func, *PARSEC_func; #define AL4SAN_TASK(name) QUARK_##name##_func, &cl_##name, PARSEC_func, OPENMP_func #endif +#if !defined(AL4SAN_SCHED_STARPU) && !defined(AL4SAN_SCHED_QUARK) && defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +void *QUARK_func, *STARPU_func, *OPENMP_func; +#define AL4SAN_TASK(name) QUARK_func, STARPU_func, PARSEC_##name##_func, OPENMP_func +#endif + +#if !defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +void *OPENMP_func, *STARPU_func, *PARSEC_func; +#define AL4SAN_TASK(name) QUARK_##name##_func, STARPU_func, PARSEC_func, OPENMP_func +#endif + +#if !defined(AL4SAN_SCHED_STARPU) && !defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && defined(AL4SAN_SCHED_OPENMP) +void *QUARK_func, *STARPU_func, *PARSEC_func; +#define AL4SAN_TASK(name) QUARK_func, STARPU_func, PARSEC_func, OPENMP_##name##_func +#endif + #if defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && defined(AL4SAN_SCHED_PARSEC) && defined(AL4SAN_SCHED_OPENMP) #define AL4SAN_TASK(name) QUARK_##name##_func, &cl_##name, PARSEC_##name##_func, OPENMP_##name##_func #endif diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index ed9e8bd..88c34a0 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -52,6 +52,25 @@ if( AL4SAN_SCHED_OPENMP) add_subdirectory(openmp) endif() +# Define the list of headers +# -------------------------- +set(AL4SAN_RUNTIME_HDRS + ${CMAKE_CURRENT_SOURCE_DIR}/al4san_runtime.h + ) + + +# Force generation of headers +# --------------------------- +add_custom_target( + runtime_include + ALL SOURCES ${AL4SAN_RUNTIME_HDRS}) + +# Installation +# ------------ +install( + FILES ${AL4SAN_RUNTIME_HDRS} + DESTINATION include/runtime ) + # Put doxygen input to parent scope set(DOXYGEN_INPUT ${DOXYGEN_INPUT} PARENT_SCOPE ) diff --git a/runtime/al4san_runtime.h b/runtime/al4san_runtime.h new file mode 100644 index 0000000..dcb1144 --- /dev/null +++ b/runtime/al4san_runtime.h @@ -0,0 +1,44 @@ +#ifndef _AL4SAN_RUNTIME_HEADER_ +#define _AL4SAN_RUNTIME_HEADER_ + +BEGIN_C_DECLS + + +#if defined(AL4SAN_SCHED_STARPU) && !defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +#include "starpu/include/al4san_starpu.h" +#endif + +#if defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +#include "starpu/include/al4san_starpu.h" +#include "parsec/include/al4san_parsec.h" +#include "quark/include/al4san_quark.h" +#endif + +#if defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +#include "starpu/include/al4san_starpu.h" +#include "quark/include/al4san_quark.h" +#endif + +#if !defined(AL4SAN_SCHED_STARPU) && !defined(AL4SAN_SCHED_QUARK) && defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +#include "parsec/include/al4san_parsec.h" +#endif + +#if !defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && !defined(AL4SAN_SCHED_OPENMP) +#include "quark/include/al4san_quark.h" +#endif + +#if !defined(AL4SAN_SCHED_STARPU) && !defined(AL4SAN_SCHED_QUARK) && !defined(AL4SAN_SCHED_PARSEC) && defined(AL4SAN_SCHED_OPENMP) +#include "openmp/include/al4san_openmp.h" +#endif + +#if defined(AL4SAN_SCHED_STARPU) && defined(AL4SAN_SCHED_QUARK) && defined(AL4SAN_SCHED_PARSEC) && defined(AL4SAN_SCHED_OPENMP) +#include "starpu/include/al4san_starpu.h" +#include "parsec/include/al4san_parsec.h" +#include "quark/include/al4san_quark.h" +#include "openmp/include/al4san_openmp.h" +#endif + +END_C_DECLS + +#endif /*_AL4SAN_RUNTIME_HEADER_*/ + diff --git a/runtime/openmp/CMakeLists.txt b/runtime/openmp/CMakeLists.txt index fb6c169..1ec878e 100644 --- a/runtime/openmp/CMakeLists.txt +++ b/runtime/openmp/CMakeLists.txt @@ -50,7 +50,7 @@ add_custom_target(runtime_openmp_include ALL SOURCES ${AL4SAN_RUNTIME_HDRS}) # installation # ------------ install(FILES ${AL4SAN_RUNTIME_HDRS} - DESTINATION include/runtime) + DESTINATION include/runtime/openmp/include) set(AL4SAN_RUNTIME_COMMON diff --git a/runtime/openmp/control/openmp_task.c b/runtime/openmp/control/openmp_task.c index 53c85d3..fce09d5 100644 --- a/runtime/openmp/control/openmp_task.c +++ b/runtime/openmp/control/openmp_task.c @@ -26,6 +26,7 @@ void AL4SAN_Openmp_task_option_init() { AL4SAN_DEP = 101; ARG_END = 0; + AL4SAN_INPUT=(1<<0); AL4SAN_OUTPUT=(1<<1); AL4SAN_INOUT=(AL4SAN_INPUT|AL4SAN_OUTPUT); AL4SAN_VALUE=(1<<2); @@ -114,25 +115,38 @@ void al4san_openmp_task_info(AL4SAN_Openmp_task_t* al4san_task, int *codelet_buf va_copy(varg_list_copy, varg_list); while ((arg_type = va_arg(varg_list_copy, int))!=ARG_END) { - arg_ptr = va_arg(varg_list_copy, void *); - ptr_size = va_arg(varg_list_copy, int); if (((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK)== AL4SAN_INPUT) || ((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK)==AL4SAN_OUTPUT) || - ((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK)==AL4SAN_INOUT) || - ((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK) ==AL4SAN_SCRATCH)){ - + ((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK)==AL4SAN_INOUT)){ + + arg_ptr = va_arg(varg_list_copy, void *); + (void*)va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); + al4san_task->arg_depenency[num_arg]=(arg_type & AL4SAN_OPENMP_UNDEFINED_MASK); + al4san_task->arg_size[num_arg]=ptr_size; + num_arg++; + (*codelet_buffers)++; + } + else if ((arg_type & AL4SAN_OPENMP_UNDEFINED_MASK) ==AL4SAN_SCRATCH){ + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); al4san_task->arg_depenency[num_arg]=(arg_type & AL4SAN_OPENMP_UNDEFINED_MASK); + al4san_task->arg_size[num_arg]=ptr_size; + num_arg++; + (*codelet_buffers)++; + } + else if (arg_type==AL4SAN_VALUE) + { + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); + al4san_task->arg_depenency[num_arg]=arg_type; al4san_task->arg_size[num_arg]=ptr_size; num_arg++; - (*codelet_buffers)++; + }else{ + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); } -else if (arg_type==AL4SAN_VALUE) -{ - al4san_task->arg_depenency[num_arg]=arg_type; -al4san_task->arg_size[num_arg]=ptr_size; -num_arg++; -} } al4san_task->num_arg=num_arg; va_end(varg_list_copy); @@ -206,10 +220,11 @@ kmp_depend_info_t dep_info[NUM_SHAREDS]; while ((arg_dep=va_arg(varg_list, int))!=0) { - ptr=va_arg(varg_list, void *); - arg_size=va_arg(varg_list, int); if((arg_dep & AL4SAN_OPENMP_UNDEFINED_MASK)==AL4SAN_INPUT ) { + ptr=va_arg(varg_list, void *); + (void*)va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); dep_info[dep_count].base_addr=ptr; dep_info[dep_count].len=arg_size; dep_info[dep_count].flags.in = 1; @@ -221,6 +236,9 @@ while ((arg_dep=va_arg(varg_list, int))!=0) } else if((arg_dep & AL4SAN_OPENMP_UNDEFINED_MASK)==AL4SAN_OUTPUT) { + ptr=va_arg(varg_list, void *); + (void*)va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); dep_info[dep_count].base_addr=ptr; dep_info[dep_count].len=arg_size; dep_info[dep_count].flags.in = 0; @@ -232,6 +250,9 @@ while ((arg_dep=va_arg(varg_list, int))!=0) } else if((arg_dep& AL4SAN_OPENMP_UNDEFINED_MASK) ==AL4SAN_INOUT) { + ptr=va_arg(varg_list, void *); + (void*)va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); dep_info[dep_count].base_addr=ptr; dep_info[dep_count].len=arg_size; dep_info[dep_count].flags.in = 1; @@ -243,6 +264,8 @@ while ((arg_dep=va_arg(varg_list, int))!=0) } else if((arg_dep& AL4SAN_OPENMP_UNDEFINED_MASK) ==AL4SAN_SCRATCH) { + ptr=va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); if (ptr==NULL) { ptr=malloc(arg_size); @@ -257,13 +280,28 @@ while ((arg_dep=va_arg(varg_list, int))!=0) } } else if(arg_dep==AL4SAN_VALUE){ - al4san_openmp_pack_arg(&state, ptr, arg_size); + ptr=va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); + al4san_openmp_pack_arg(&state, ptr, arg_size); } - else if (arg_dep!=AL4SAN_OPENMP_UNDEFINED) - { + else if(arg_dep==AL4SAN_PRIORITY || + arg_dep==AL4SAN_LABEL || + arg_dep==AL4SAN_OPENMP_UNDEFINED) + { + ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + } + else if(arg_dep!=AL4SAN_PRIORITY && + arg_dep!=AL4SAN_LABEL && + arg_dep!=AL4SAN_OPENMP_UNDEFINED) + { fprintf(stderr,"Unrecognized argument, did you perhaps forget to end arguments with ARG_END?\n"); abort(); } + else{ + ptr=va_arg(varg_list, void *); + arg_size=va_arg(varg_list, int); + } } if (state.nargs>0) { diff --git a/runtime/openmp/include/al4san_openmp.h b/runtime/openmp/include/al4san_openmp.h index 384a0aa..96a1282 100644 --- a/runtime/openmp/include/al4san_openmp.h +++ b/runtime/openmp/include/al4san_openmp.h @@ -75,7 +75,7 @@ typedef int (al4san_openmp_codelet) (kmp_int32, kmp_task_t *); #define AL4SAN_OPENMP_getStream(_stream_) -int al4san_openmp_workspace_destroy(AL4SAN_Openmp_arg_list *al4san_arg); +int al4san_openmp_workspace_destroy(AL4SAN_arg_list *al4san_arg); END_C_DECLS #endif /* _AL4SAN_OPENMP_H_ */ diff --git a/runtime/parsec/CMakeLists.txt b/runtime/parsec/CMakeLists.txt index 3bd94e6..53e3938 100644 --- a/runtime/parsec/CMakeLists.txt +++ b/runtime/parsec/CMakeLists.txt @@ -61,7 +61,7 @@ add_custom_target( # ------------ install( FILES ${AL4SAN_RUNTIME_HDRS} - DESTINATION include/runtime ) + DESTINATION include/runtime/parsec/include) # Generate the Al4san common for all possible precisions # --------------------------------------------------------- diff --git a/runtime/parsec/control/parsec_async.c b/runtime/parsec/control/parsec_async.c index 14cad39..00d3bd1 100644 --- a/runtime/parsec/control/parsec_async.c +++ b/runtime/parsec/control/parsec_async.c @@ -53,7 +53,8 @@ int AL4SAN_Parsec_sequence_destroy( AL4SAN_context_t *al4san, parsec_taskpool_t *parsec_dtd_tp = (parsec_taskpool_t *)(sequence->schedopt); assert( parsec_dtd_tp ); - parsec_dtd_taskpool_wait( parsec, parsec_dtd_tp ); +// parsec_dtd_taskpool_wait( parsec, parsec_dtd_tp ); + parsec_dtd_taskpool_wait( parsec_dtd_tp ); parsec_taskpool_free( parsec_dtd_tp ); sequence->schedopt = NULL; @@ -70,8 +71,8 @@ int AL4SAN_Parsec_sequence_wait( AL4SAN_context_t *al4san, parsec_taskpool_t *parsec_dtd_tp = (parsec_taskpool_t *) sequence->schedopt; assert( parsec_dtd_tp ); - parsec_dtd_taskpool_wait( parsec, parsec_dtd_tp ); - +// parsec_dtd_taskpool_wait( parsec, parsec_dtd_tp ); + parsec_dtd_taskpool_wait( parsec_dtd_tp ); return AL4SAN_SUCCESS; } diff --git a/runtime/parsec/control/parsec_descriptor.c b/runtime/parsec/control/parsec_descriptor.c index 50af5be..4d4e84c 100644 --- a/runtime/parsec/control/parsec_descriptor.c +++ b/runtime/parsec/control/parsec_descriptor.c @@ -247,7 +247,7 @@ void AL4SAN_Parsec_desc_create( AL4SAN_desc_t *mdesc ) (void)rc; } #endif - data_collection->memory_registration_status = MEMORY_STATUS_UNREGISTERED; + data_collection->memory_registration_status = PARSEC_MEMORY_STATUS_UNREGISTERED; pdesc->data_map = calloc( mdesc->lmt * mdesc->lnt, sizeof(parsec_data_t*) ); @@ -297,7 +297,10 @@ void AL4SAN_Parsec_desc_create( AL4SAN_desc_t *mdesc ) } /* Register the new arena */ - parsec_matrix_add2arena( parsec_dtd_arenas[i], datatype, matrix_UpperLower, 1, + //parsec_matrix_add2arena( parsec_dtd_arenas[i], datatype, matrix_UpperLower, 1, + // mdesc->mb, mdesc->nb, mdesc->mb, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); + //parsec_arena_datatype_t **adt; + parsec_matrix_add2arena( &parsec_dtd_arenas_datatypes[i], datatype, matrix_UpperLower, 1, mdesc->mb, mdesc->nb, mdesc->mb, PARSEC_ARENA_ALIGNMENT_SSE, -1 ); arena->size = size; pdesc->arena_index = i; @@ -380,13 +383,17 @@ void AL4SAN_Parsec_desc_flush( const AL4SAN_desc_t *desc, void AL4SAN_Parsec_data_flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am, int An ) { - /* - * For now, we do nothing in this function as in PaRSEC, once the data is - * flushed it cannot be reused in the same sequence, when this issue will be - * fixed, we will uncomment this function - */ - /* parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); */ - /* parsec_dtd_data_flush( PARSEC_dtd_taskpool, RTBLKADDR( A, AL4SAN_Complex64_t, Am, An ) ); */ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); + + if( A->dtyp == Al4sanComplexDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanComplexFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanRealDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanRealFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } (void)sequence; (void)A; (void)Am; (void)An; return; @@ -395,14 +402,17 @@ void AL4SAN_Parsec_data_flush( const AL4SAN_sequence_t *sequence, void AL4SAN_Parsec_matrix_flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am, int An ) { - /* - * For now, we do nothing in this function as in PaRSEC, once the data is - * flushed it cannot be reused in the same sequence, when this issue will be - * fixed, we will uncomment this function - */ - /* parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); */ - /* parsec_dtd_data_flush( PARSEC_dtd_taskpool, RTBLKADDR( A, AL4SAN_Complex64_t, Am, An ) ); */ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); + if( A->dtyp == Al4sanComplexDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanComplexFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanRealDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } else if ( A->dtyp == Al4sanRealFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, An)); + } (void)sequence; (void)A; (void)Am; (void)An; return; } @@ -410,14 +420,17 @@ void AL4SAN_Parsec_matrix_flush( const AL4SAN_sequence_t *sequence, void AL4SAN_Parsec_vector_flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A, int Am) { - /* - * For now, we do nothing in this function as in PaRSEC, once the data is - * flushed it cannot be reused in the same sequence, when this issue will be - * fixed, we will uncomment this function - */ - /* parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); */ - /* parsec_dtd_data_flush( PARSEC_dtd_taskpool, RTBLKADDR( A, AL4SAN_Complex64_t, Am, An ) ); */ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); + if( A->dtyp == Al4sanComplexDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, 0)); + } else if ( A->dtyp == Al4sanComplexFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, 0)); + } else if ( A->dtyp == Al4sanRealDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, 0)); + } else if ( A->dtyp == Al4sanRealFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, Am, 0)); + } (void)sequence; (void)A; (void)Am; return; } @@ -425,14 +438,17 @@ void AL4SAN_Parsec_vector_flush( const AL4SAN_sequence_t *sequence, void AL4SAN_Parsec_scaler_flush( const AL4SAN_sequence_t *sequence, const AL4SAN_desc_t *A) { - /* - * For now, we do nothing in this function as in PaRSEC, once the data is - * flushed it cannot be reused in the same sequence, when this issue will be - * fixed, we will uncomment this function - */ - /* parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); */ - /* parsec_dtd_data_flush( PARSEC_dtd_taskpool, RTBLKADDR( A, AL4SAN_Complex64_t, Am, An ) ); */ + parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(sequence->schedopt); + if( A->dtyp == Al4sanComplexDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, 0, 0)); + } else if ( A->dtyp == Al4sanComplexFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, 0, 0)); + } else if ( A->dtyp == Al4sanRealDouble ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, 0, 0)); + } else if ( A->dtyp == Al4sanRealFloat ){ + parsec_dtd_data_flush( PARSEC_dtd_taskpool, AL4SAN_Parsec_data_getaddr( A, 0, 0)); + } (void)sequence; (void)A; return; } diff --git a/runtime/parsec/control/parsec_task.c b/runtime/parsec/control/parsec_task.c index d3c5517..8579104 100644 --- a/runtime/parsec/control/parsec_task.c +++ b/runtime/parsec/control/parsec_task.c @@ -119,16 +119,19 @@ al4san_parsec_arg_iterator(va_list args, parsec_dtd_arg_cb *cb, void *cb_data) /* We always expect three arguments to come a set */ int arg_type, arg_size; void *arg_ptr; + void *ptr; while(PARSEC_DTD_ARG_END != (arg_type = va_arg(args, int))) { - arg_ptr = va_arg(args, void *); - arg_size = va_arg(args, int); if(((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_INPUT || ((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_INOUT || ((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_OUTPUT ) { + arg_ptr = va_arg(args, void *); + ptr = va_arg(args, void *); + int arena_index = ((al4san_parsec_desc_t *)ptr)->arena_index; + arg_size = va_arg(args, int); arg_size=PASSED_BY_REF; - cb(arg_size, arg_ptr, (arg_type & AL4SAN_PARSEC_UNDEFINED_MASK), cb_data); + cb(arg_size, arg_ptr, arena_index | (arg_type & AL4SAN_PARSEC_UNDEFINED_MASK), cb_data); } else if(arg_type==AL4SAN_VALUE || @@ -138,12 +141,23 @@ al4san_parsec_arg_iterator(va_list args, parsec_dtd_arg_cb *cb, void *cb_data) arg_type==AL4SAN_DONT_TRACK || arg_type==AL4SAN_PASSED_BY_REF) { + arg_ptr = va_arg(args, void *); + arg_size = va_arg(args, int); cb(arg_size, arg_ptr, arg_type, cb_data); } - else if(arg_type!=AL4SAN_PRIORITY && + else if(arg_type==AL4SAN_PRIORITY || + arg_type==AL4SAN_LABEL || + arg_type==AL4SAN_PARSEC_UNDEFINED) + { + arg_ptr = va_arg(args, void *); + arg_size = va_arg(args, int); + } + else if(arg_type!=AL4SAN_PRIORITY && arg_type!=AL4SAN_LABEL && arg_type!=AL4SAN_PARSEC_UNDEFINED) { + arg_ptr = va_arg(args, void *); + arg_size = va_arg(args, int); parsec_fatal("Unrecognized argument, did you perhaps forget to end arguments with ARG_END?\n"); } @@ -161,135 +175,50 @@ void al4san_parsec_flags_decode(AL4SAN_Parsec_Task_Flags *task_flag, va_list arg task_flag->task_label=NULL; while(PARSEC_DTD_ARG_END != (arg_type = va_arg(args_for_flags, int))) { - arg_ptr = va_arg(args_for_flags, void *); - arg_size = va_arg(args_for_flags, int); if( arg_type == AL4SAN_PRIORITY) { + arg_ptr = va_arg(args_for_flags, void *); + arg_size = va_arg(args_for_flags, int); task_flag->task_priority=(intptr_t)arg_ptr; } else if(arg_type == AL4SAN_LABEL) { + arg_ptr = va_arg(args_for_flags, void *); + arg_size = va_arg(args_for_flags, int); task_flag->task_label=(char *)arg_ptr; } + else if(((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_INPUT || + ((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_INOUT || + ((arg_type & AL4SAN_PARSEC_UNDEFINED_MASK) & GET_OP_TYPE) == AL4SAN_OUTPUT ) + { + (void)va_arg(args_for_flags, void *); + (void)va_arg(args_for_flags, void *); + (void)va_arg(args_for_flags, int); + } + else if(arg_type==AL4SAN_VALUE || + arg_type==AL4SAN_REF || + arg_type==AL4SAN_SCRATCH || + arg_type==AL4SAN_AFFINITY || + arg_type==AL4SAN_DONT_TRACK || + arg_type==AL4SAN_PASSED_BY_REF) + { + arg_ptr = va_arg(args_for_flags, void *); + arg_size = va_arg(args_for_flags, int); + } + else if(arg_type!=AL4SAN_PRIORITY && + arg_type!=AL4SAN_LABEL && + arg_type!=AL4SAN_PARSEC_UNDEFINED) + { + arg_ptr = va_arg(args_for_flags, void *); + arg_size = va_arg(args_for_flags, int); + parsec_fatal("Unrecognized argument, did you perhaps forget to end arguments with ARG_END?\n"); + } } + return; } -/*void al4san_insert_task( al4san_parsec_codelet *codelet, AL4SAN_option_t *options, ...) -{ - //Defines a DAG of tasks - parsec_taskpool_t *tp = (parsec_taskpool_t *) (options->sequence->schedopt); - - parsec_dtd_taskpool_t *dtd_tp = (parsec_dtd_taskpool_t *)tp; - - if( tp->context == NULL ) { - parsec_fatal( "Sorry! You can not insert task wihtout enqueuing the taskpool to parsec_context" - " first. Please make sure you call parsec_enqueue(parsec_context, taskpool) before" - " you try inserting task in PaRSEC\n" ); - } - - if( dtd_tp == NULL ) { - parsec_fatal( "Wait! You need to pass a correct parsec taskpool in order to insert task. " - "Please use \"parsec_dtd_taskpool_new()\" to create new taskpool" - "and then try to insert task. Thank you\n" ); - } - - - va_list args, args_for_size, args_for_rank, args_for_flags; - parsec_dtd_common_args_t common_args; - AL4SAN_Parsec_Task_Flags task_flag; - - common_args.rank = -1; common_args.write_flow_count = 1; - common_args.flow_count_of_template = 0; common_args.dtd_tp = dtd_tp; - common_args.count_of_params_sent_by_user = 0; - common_args.size_of_params = 0; common_args.flow_index = 0; - - va_start(args, options); -#if defined(PARSEC_PROF_TRACE) - parsec_profiling_trace(dtd_tp->super.context->virtual_processes[0]->execution_streams[0]->es_profile, - insert_task_trace_keyin, 0, dtd_tp->super.taskpool_id, NULL ); -#endif - //extracting the flags of the task - va_copy(args_for_flags, args); - al4san_flags_decode(&task_flag, args_for_flags); - va_end(args_for_flags); - - //extracting the rank of the task - va_copy(args_for_rank, args); - al4san_parsec_arg_iterator(args_for_rank, parsec_dtd_iterator_arg_get_rank, (void*)&common_args); - va_end(args_for_rank); - - uint64_t fkey = (uint64_t)(uintptr_t)codelet+ common_args.flow_count_of_template; - //Creating master function structures - //Hash table lookup to check if the function structure exists or not - parsec_task_class_t *tc = (parsec_task_class_t *) - parsec_dtd_find_task_class(dtd_tp, fkey); - - if( NULL == tc ) { - va_copy(args_for_size, args); - al4san_parsec_arg_iterator(args_for_size, parsec_dtd_iterator_arg_get_size, (void*)&common_args); - va_end(args_for_size); - - tc = parsec_dtd_create_task_class(dtd_tp, (parsec_dtd_funcptr_t*) codelet, task_flag.task_label, - common_args.count_of_params_sent_by_user, - common_args.size_of_params, common_args.flow_count_of_template); - -#if defined(PARSEC_PROF_TRACE) - parsec_dtd_add_profiling_info((parsec_taskpool_t *)dtd_tp, tc->task_class_id, task_flag.task_label); -#endif // defined(PARSEC_PROF_TRACE) - } - -#if defined(DISTRIBUTED) - if( tp->context->nb_nodes > 1 ) { - if( (-1 == common_args.rank) && (common_args.write_flow_count > 1) ) { - parsec_fatal( "You inserted a task with out indicating where the task should be executed(using AFFINITY flag)." - "This will result in executing this task on all nodes and the outcome might be not be what you want." - "So we are exiting for now. Please see the usage of AFFINITY flag.\n" ); - } else if( common_args.rank == -1 && common_args.write_flow_count == 1 ) { - // we have tasks with no real data as parameter so we are safe to execute it in each mpi process - common_args.rank = tp->context->my_rank; - } - } else { - common_args.rank = 0; - } -#else - common_args.rank = 0; -#endif - - parsec_dtd_task_t *this_task = parsec_dtd_create_and_initialize_task(dtd_tp, tc, common_args.rank); - this_task->super.priority = task_flag.task_priority; - common_args.task = this_task; - - if( parsec_dtd_task_is_local(this_task) ) { - parsec_object_t *object = (parsec_object_t *)this_task; - // retaining the local task as many write flows as - // it has and one to indicate when we have executed the task - (void)parsec_atomic_fetch_add_int32( &object->obj_reference_count, (common_args.write_flow_count) ); - - common_args.tmp_param = NULL; - - // Getting the pointer to allocated memory by mempool - common_args.head_of_param_list = GET_HEAD_OF_PARAM_LIST(this_task); - common_args.current_param = common_args.head_of_param_list; - common_args.value_block = GET_VALUE_BLOCK(common_args.head_of_param_list, ((parsec_dtd_task_class_t*)tc)->count_of_params); - common_args.current_val = common_args.value_block; - - al4san_parsec_arg_iterator(args, parsec_dtd_iterator_arg_set_param_local, (void*)&common_args); - - if( common_args.tmp_param != NULL ) - common_args.tmp_param->next = NULL; - } else { - al4san_parsec_arg_iterator(args, parsec_dtd_iterator_arg_set_param_remote, (void*)&common_args); - } - va_end(args); - -#if defined(DISTRIBUTED) - assert(this_task->rank != -1); -#endif - - parsec_insert_dtd_task( (parsec_task_t *)this_task ); -}*/ int AL4SAN_Parsec_insert_task(AL4SAN_codelet codelet, AL4SAN_option_t *options, va_list args) { @@ -409,37 +338,6 @@ int AL4SAN_Parsec_insert_task(AL4SAN_codelet codelet, AL4SAN_option_t *options, } -/*void al4san_unpack_arg(AL4SAN_Parsec_arg_list *al4san_arg, ...) -{ - parsec_dtd_task_t *current_task = (parsec_dtd_task_t *)al4san_arg->this_task; - parsec_dtd_task_param_t *current_param = GET_HEAD_OF_PARAM_LIST(current_task); - int i = 0; - void *tmp_val; void **tmp_ref; - va_list varg_list; - - va_start(varg_list, al4san_arg); - while(current_param != NULL) { - if((current_param->op_type & GET_OP_TYPE) == VALUE) { - tmp_val = va_arg(varg_list, void*); - memcpy(tmp_val, current_param->pointer_to_tile, current_param->arg_size); - } else if((current_param->op_type & GET_OP_TYPE) == SCRATCH || - (current_param->op_type & GET_OP_TYPE) == REF) { - tmp_ref = va_arg(varg_list, void**); - *tmp_ref = current_param->pointer_to_tile; - } else if((current_param->op_type & GET_OP_TYPE) == INPUT || - (current_param->op_type & GET_OP_TYPE) == INOUT || - (current_param->op_type & GET_OP_TYPE) == OUTPUT) { - tmp_ref = va_arg(varg_list, void**); - *tmp_ref = PARSEC_DATA_COPY_GET_PTR(al4san_arg->this_task->data[i].data_in); - i++; - } else { - parsec_warning("/!\\ Flag is not recognized in parsec_dtd_unpack_args /!\\.\n"); - assert(0); - } - current_param = current_param->next; - } - va_end(varg_list); -}*/ int AL4SAN_Parsec_unpack_arg(AL4SAN_arg_list* al4san_arg, va_list varg_list) diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt index 345a7ea..0fe5732 100644 --- a/runtime/quark/CMakeLists.txt +++ b/runtime/quark/CMakeLists.txt @@ -63,7 +63,7 @@ add_custom_target( # ------------ install( FILES ${AL4SAN_RUNTIME_HDRS} - DESTINATION include/runtime ) + DESTINATION include/runtime/quark/include) # Generate the Al4san common for all possible precisions # --------------------------------------------------------- diff --git a/runtime/quark/control/quark_task.c b/runtime/quark/control/quark_task.c index 44f9f54..3a51c0e 100644 --- a/runtime/quark/control/quark_task.c +++ b/runtime/quark/control/quark_task.c @@ -121,31 +121,40 @@ void al4san_quark_task_info(AL4SAN_Quark_task_t *al4san, va_list varg_list){ while ((arg_type = va_arg(varg_list_copy, int))!=ARG_END) { - arg_ptr = va_arg(varg_list_copy, void *); - arg_size = va_arg(varg_list_copy, int); quark_direction_t arg_direction = (quark_direction_t) ((arg_type & AL4SAN_QUARK_UNDEFINED_MASK) & QUARK_DIRECTION_BITMASK); if( arg_direction==AL4SAN_INPUT || arg_direction==AL4SAN_OUTPUT || arg_direction==AL4SAN_INOUT ) { + arg_ptr = va_arg(varg_list_copy, void *); + (void *)va_arg(varg_list_copy, void *); + arg_size = va_arg(varg_list_copy, int); al4san->arg_depenency[narg]=arg_direction; al4san->arg_size[narg]=sizeof(char *); narg++; } else if( arg_type==AL4SAN_SCRATCH || arg_type==AL4SAN_NODEP){ + arg_ptr = va_arg(varg_list_copy, void *); + arg_size = va_arg(varg_list_copy, int); al4san->arg_depenency[narg]=arg_type; - al4san->arg_size[narg]=sizeof(char *); + al4san->arg_size[narg]=sizeof(char *); narg++; } else if( arg_type==AL4SAN_VALUE ) { mask=( arg_type & QUARK_QUARK_VALUE_FLAGS_BITMASK ); + arg_ptr = va_arg(varg_list_copy, void *); + arg_size = va_arg(varg_list_copy, int); if(mask==0){ al4san->arg_depenency[narg]=arg_type; al4san->arg_size[narg]=arg_size; narg++; } - } + } + else{ + arg_ptr = va_arg(varg_list_copy, void *); + arg_size = va_arg(varg_list_copy, int); + } } al4san->num_arg=narg; @@ -167,59 +176,78 @@ int al4san_quark_task_create(Quark *quark, Quark_Task *task, va_list varg_list, arg_type=AL4SAN_VALUE; arg_ptr= (void *)&al4san; arg_size=sizeof(AL4SAN_Quark_task_t); + QUARK_Task_Pack_Arg( quark, task, arg_size, arg_ptr, arg_type); - do{ + while((arg_type = va_arg(varg_list, int)) != ARG_END){ quark_direction_t arg_direction = (quark_direction_t) ((arg_type & AL4SAN_QUARK_UNDEFINED_MASK) & QUARK_DIRECTION_BITMASK); if( arg_direction==AL4SAN_INPUT || arg_direction==AL4SAN_OUTPUT || arg_direction==AL4SAN_INOUT ) { - // QUARK_Task_Pack_Arg( quark, task, arg_size, arg_ptr, (arg_type & AL4SAN_QUARK_UNDEFINED_MASK)); + + arg_ptr = va_arg(varg_list, void *); + (void*)va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); QUARK_Task_Pack_Arg( quark, task, AL4SAN_DEP, arg_ptr, (arg_type & AL4SAN_QUARK_UNDEFINED_MASK)); } else if (arg_type==AL4SAN_VALUE || arg_type==AL4SAN_SCRATCH || arg_type==AL4SAN_NODEP) { - QUARK_Task_Pack_Arg( quark, task, arg_size, arg_ptr, arg_type); + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Pack_Arg( quark, task, arg_size, arg_ptr, arg_type); } //Task flags (add support to task option inside insert task API) else if (arg_type==AL4SAN_PRIORITY){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_PRIORITY, (intptr_t)arg_ptr); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_PRIORITY, (intptr_t)arg_ptr); + nflags++; } else if (arg_type==AL4SAN_LOCK_TO_THREAD){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_LOCK_TO_THREAD, (intptr_t)arg_ptr); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_LOCK_TO_THREAD, (intptr_t)arg_ptr); + nflags++; } else if (arg_type==AL4SAN_SEQUENCE){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_SEQUENCE, (intptr_t)arg_ptr); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_SEQUENCE, (intptr_t)arg_ptr); + nflags++; } else if (arg_type==AL4SAN_THREAD_COUNT){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_THREAD_COUNT, (intptr_t)arg_ptr); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_THREAD_COUNT, (intptr_t)arg_ptr); + nflags++; } else if (arg_type==AL4SAN_THREAD_SET_TO_MANUAL_SCHEDULING){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_THREAD_SET_TO_MANUAL_SCHEDULING, (intptr_t)arg_ptr); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_THREAD_SET_TO_MANUAL_SCHEDULING, (intptr_t)arg_ptr); + nflags++; } else if (arg_type==AL4SAN_LOCK_TO_THREAD_MASK){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_LOCK_TO_THREAD_MASK, (intptr_t)strdup((unsigned char *)arg_ptr)); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_LOCK_TO_THREAD_MASK, (intptr_t)strdup((unsigned char *)arg_ptr)); + nflags++; } else if (arg_type==AL4SAN_LABEL){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_LABEL, (intptr_t)strdup((char *)arg_ptr)); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_LABEL, (intptr_t)strdup((char *)arg_ptr)); + nflags++; } else if (arg_type==AL4SAN_COLOR){ - QUARK_Task_Flag_Set(task_flags, AL4SAN_COLOR, (intptr_t)strdup((char *)arg_ptr)); - nflags++; + arg_ptr = va_arg(varg_list, void *); + arg_size = va_arg(varg_list, int); + QUARK_Task_Flag_Set(task_flags, AL4SAN_COLOR, (intptr_t)strdup((char *)arg_ptr)); + nflags++; } else if (arg_type!=AL4SAN_QUARK_UNDEFINED){ fprintf(stderr,"Unrecognized argument, did you perhaps forget to end arguments with ARG_END?\n"); abort(); } - arg_type = va_arg(varg_list, int); - arg_ptr = va_arg(varg_list, void *); - arg_size = va_arg(varg_list, int); -}while (arg_type != ARG_END); + } if (nflags!=0){ quark_set_task_flags_in_task_structure( quark, task, task_flags ); diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index b9c763b..5f36d1c 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -68,7 +68,7 @@ add_custom_target( # ------------ install( FILES ${AL4SAN_RUNTIME_HDRS} - DESTINATION include/runtime ) + DESTINATION include/runtime/starpu/include ) # Generate the Al4san common for all possible precisions # --------------------------------------------------------- diff --git a/runtime/starpu/control/starpu_descriptor.c b/runtime/starpu/control/starpu_descriptor.c index fdc3c2a..d1f54a2 100644 --- a/runtime/starpu/control/starpu_descriptor.c +++ b/runtime/starpu/control/starpu_descriptor.c @@ -42,8 +42,8 @@ /* Take 24 bits for the tile id, and 7 bits for descriptor id. These values can be changed through the call AL4SAN_User_Tag_Size(int tag_width, int tag_sep) */ #define TAG_WIDTH_MIN 20 -static int tag_width = 31; -static int tag_sep = 24; +static int tag_width = 64; +static int tag_sep = 54; static int _tag_mpi_initialized_ = 0; int diff --git a/runtime/starpu/control/starpu_task.c b/runtime/starpu/control/starpu_task.c index 7b10a6b..574736c 100644 --- a/runtime/starpu/control/starpu_task.c +++ b/runtime/starpu/control/starpu_task.c @@ -120,12 +120,13 @@ void al4san_starpu_task_info(AL4SAN_Starpu_task_t* al4san_task, int *codelet_buf va_copy(varg_list_copy, varg_list); while ((arg_type = va_arg(varg_list_copy, int))!=ARG_END) { - arg_ptr = va_arg(varg_list_copy, void *); - ptr_size = va_arg(varg_list_copy, int); if ((arg_type & AL4SAN_STARPU_UNDEFINED_MASK)== AL4SAN_INPUT || (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_OUTPUT || (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_INOUT){ + arg_ptr = va_arg(varg_list_copy, void *); + (void*)va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); al4san_task->arg_depenency[num_arg]=(arg_type & AL4SAN_STARPU_UNDEFINED_MASK); al4san_task->arg_size[num_arg]=1; num_arg++; @@ -133,20 +134,28 @@ void al4san_starpu_task_info(AL4SAN_Starpu_task_t* al4san_task, int *codelet_buf } else if (arg_type==AL4SAN_SCRATCH || arg_type==AL4SAN_REDUX){ + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); al4san_task->arg_depenency[num_arg]=arg_type; - al4san_task->arg_size[num_arg]=1; - num_arg++; - (*codelet_buffers)++; + al4san_task->arg_size[num_arg]=1; + num_arg++; + (*codelet_buffers)++; } else if (arg_type==AL4SAN_VALUE || arg_type==AL4SAN_DATA_ARRAY || arg_type==AL4SAN_DATA_MODE_ARRAY || arg_type==AL4SAN_CL_ARGS || arg_type==AL4SAN_CL_ARGS_NFREE){ + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); al4san_task->arg_depenency[num_arg]=arg_type; al4san_task->arg_size[num_arg]=ptr_size; num_arg++; } + else{ + arg_ptr = va_arg(varg_list_copy, void *); + ptr_size = va_arg(varg_list_copy, int); + } } al4san_task->num_arg=num_arg; va_end(varg_list_copy); @@ -188,8 +197,15 @@ int al4san_starpu_task_create(struct starpu_codelet *cl, struct starpu_task *tas if ((arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_INPUT || (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_OUTPUT || (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_INOUT || - (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_SCRATCH || (arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_REDUX) + { + /* We have an access mode : we expect to find a handle */ + starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t); + starpu_task_insert_data_process_arg(cl, task, &allocated_buffers, ¤t_buffer, (arg_type & AL4SAN_STARPU_UNDEFINED_MASK), handle); + (void*)va_arg(varg_list, void *); + ptr_size=va_arg(varg_list, int); + } + else if((arg_type & AL4SAN_STARPU_UNDEFINED_MASK)==AL4SAN_SCRATCH) { /* We have an access mode : we expect to find a handle */ starpu_data_handle_t handle = va_arg(varg_list, starpu_data_handle_t); @@ -488,7 +504,7 @@ int al4san_starpu_mpi_task_decode(struct starpu_codelet *codelet, int me, int nb } (void)va_arg(varg_list_copy, int); } - else if (arg_type_nocommute==AL4SAN_INPUT || arg_type_nocommute==AL4SAN_OUTPUT || arg_type_nocommute==AL4SAN_INOUT || arg_type==AL4SAN_SCRATCH || arg_type==AL4SAN_REDUX) + else if (arg_type_nocommute==AL4SAN_INPUT || arg_type_nocommute==AL4SAN_OUTPUT || arg_type_nocommute==AL4SAN_INOUT || arg_type==AL4SAN_REDUX) { starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); enum starpu_data_access_mode mode = (enum starpu_data_access_mode) (arg_type & AL4SAN_STARPU_UNDEFINED_MASK); @@ -511,9 +527,34 @@ int al4san_starpu_mpi_task_decode(struct starpu_codelet *codelet, int me, int nb descrs[nb_data].handle = data; descrs[nb_data].mode = mode; nb_data ++; - + (void*)va_arg(varg_list_copy, void *); (void)va_arg(varg_list_copy, int); } + else if (arg_type==AL4SAN_SCRATCH) + { + starpu_data_handle_t data = va_arg(varg_list_copy, starpu_data_handle_t); + enum starpu_data_access_mode mode = (enum starpu_data_access_mode) (arg_type & AL4SAN_STARPU_UNDEFINED_MASK); + if (node_selected == 0) + { + int ret = _starpu_mpi_find_executee_node(data, mode, me, do_execute, &inconsistent_execute, xrank); + if (ret == -EINVAL) + { + free(descrs); + va_end(varg_list_copy); + _STARPU_TRACE_TASK_MPI_DECODE_END(); + return ret; + } + } + if (nb_data >= nb_allocated_data) + { + nb_allocated_data *= 2; + _STARPU_MPI_REALLOC(descrs, nb_allocated_data * sizeof(struct starpu_data_descr)); + } + descrs[nb_data].handle = data; + descrs[nb_data].mode = mode; + nb_data ++; + (void)va_arg(varg_list_copy, int); + } else if (arg_type == AL4SAN_DATA_ARRAY) { starpu_data_handle_t *datas = va_arg(varg_list_copy, starpu_data_handle_t *); @@ -703,12 +744,27 @@ int al4san_starpu_mpi_task_decode(struct starpu_codelet *codelet, int me, int nb select_node_policy = va_arg(varg_list_copy, int); (void)va_arg(varg_list_copy, int); } - else if (arg_type==AL4SAN_CUDA_FLG) - { - (void)va_arg(varg_list_copy, unsigned); - (void)va_arg(varg_list_copy, int); - } - else if (arg_type!=AL4SAN_STARPU_UNDEFINED) + + else if(arg_type==AL4SAN_CUDA_FLG) + { + #ifdef AL4SAN_USE_CUDA + if ( va_arg(varg_list, int)== ON){ + cl->cuda_flags[0]=AL4SAN_CUDA_ASYNC; + (void)va_arg(varg_list, int); + } + else + (void)va_arg(varg_list, int); + #else + (void)va_arg(varg_list, int); + (void)va_arg(varg_list, int); + #endif + } + else if (arg_type==AL4SAN_STARPU_UNDEFINED) + { + (void)va_arg(varg_list, int); + (void)va_arg(varg_list, int); + } + else if (arg_type!=AL4SAN_STARPU_UNDEFINED) { STARPU_ABORT_MSG("Unrecognized argument %d, did you perhaps forget to end arguments with 0?\n", arg_type); } @@ -792,6 +848,9 @@ int al4san_starpu_mpi_task_decode(struct starpu_codelet *codelet, int me, int nb *task = starpu_task_create(); (*task)->cl_arg_free = 1; + (*task)->callback_arg_free = 1; + (*task)->prologue_callback_arg_free = 1; + (*task)->prologue_callback_pop_arg_free = 1; va_copy(varg_list_copy, varg_list); al4san_starpu_task_create(codelet, *task, varg_list_copy); diff --git a/runtime/starpu/include/al4san_starpu.h.in b/runtime/starpu/include/al4san_starpu.h.in index 8dbf2bf..567929a 100644 --- a/runtime/starpu/include/al4san_starpu.h.in +++ b/runtime/starpu/include/al4san_starpu.h.in @@ -142,7 +142,7 @@ typedef struct al4san_starpu_arg_list_s //void AL4SAN_Starpu_set_reduction_methods(starpu_data_handle_t handle, al4san_flttype_t dtyp); -/*#define GET_MACRO_STARPU_ACCESS_WRITE_CACHED(_1,_2,_3, NAME,...) NAME +#define GET_MACRO_STARPU_ACCESS_WRITE_CACHED(_1,_2,_3, NAME,...) NAME #define AL4SAN_STARPU_ACCESS_WRITE_CACHED(...) GET_MACRO_STARPU_ACCESS_WRITE_CACHED(__VA_ARGS__, AL4SAN_STARPU_ACCESS_WRITE_CACHED3, AL4SAN_STARPU_ACCESS_WRITE_CACHED2, AL4SAN_STARPU_ACCESS_WRITE_CACHED1)(__VA_ARGS__) #if defined(AL4SAN_USE_MPI) && defined(HAVE_STARPU_MPI_CACHED_RECEIVE) @@ -273,7 +273,7 @@ al4san_starpu_data_iscached(const AL4SAN_desc_t *A, int m, int n) AL4SAN_STARPU_PRUNING_STATS_RANK_CHANGED(rank) #define AL4SAN_STARPU_END_ACCESS_DECLARATION \ - AL4SAN_STARPU_PRUNING_STATS_END_ACCESS_DECLARATION;*/ + AL4SAN_STARPU_PRUNING_STATS_END_ACCESS_DECLARATION; END_C_DECLS diff --git a/script/build-distmpi.sh b/script/build-distmpi.sh new file mode 100644 index 0000000..c0f661f --- /dev/null +++ b/script/build-distmpi.sh @@ -0,0 +1,126 @@ +#!/bin/bash +repo=al4san-dev +hwloc_install_dir=$HOME/al4san-dev/hwloc-install-mpi +starpu_install_dir=$HOME/al4san-dev/starpu-1.2-install-mpi + + +currentdir=`pwd` +run_clone=0 +set_pkgconfig_runtime_libs=1 +run_update_submodules=0 +run_module_setup=1 +compile_hwloc=1 +compile_starpu=1 +compile_cham=0 +compile_starsh=0 +compile_starshcore=0 +compile_al4san=1 +compile_hcore=1 +pause_info(){ + echo "Please press enter key to proceed"; read + echo "=================" +} +pause_step(){ + echo "Please press enter key to proceed"; read + #echo "=================" +} + +if [ $run_clone -eq 1 ]; then +if [ ! -d $repo ]; then + git clone git@github.com:ecrc/$repo.git + pause_step +else + echo "Echo \"$repo\" exists so I am not cloning from github" + pause_info +fi +fi +if [ $set_pkgconfig_runtime_libs -eq 1 ]; then + export PKG_CONFIG_PATH=$hwloc_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + export PKG_CONFIG_PATH=$starpu_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH +fi +cd $currentdir/$repo +if [ $run_update_submodules -eq 1 ];then + git submodule update --init --recursive +fi +if [ $run_module_setup -eq 1 ]; then + . script/modules-ecrc.sh + . script/modules-ecrc-mpi.sh + pause_info +fi +if [ $compile_hwloc -eq 1 ];then + if [ ! -f "hwloc-1.11.13.tar.gz" ]; then + wget https://download.open-mpi.org/release/hwloc/v1.11/hwloc-1.11.13.tar.gz + tar -zxvf hwloc-1.11.13.tar.gz + fi + if [ -d "hwloc-1.11.13" ]; then + rm -rf hwloc-1.11.13 + fi + tar -zxvf hwloc-1.11.13.tar.gz + cd hwloc-1.11.13 + [[ -d $hwloc_install_dir ]] || mkdir -p $hwloc_install_dir + ./configure --prefix=$hwloc_install_dir --disable-libxml2 --disable-pci --enable-shared=yes --enable-static=yes + make -j + make -j install + if [ -d "$hwloc_install_dir/lib/pkgconfig" ]; then + export PKG_CONFIG_PATH=$hwloc_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_starpu -eq 1 ];then + if [ ! -f "starpu-1.2.10.tar.gz" ]; then + wget http://starpu.gforge.inria.fr/files/starpu-1.2.10/starpu-1.2.10.tar.gz + fi + if [ -d "starpu-1.2.10" ]; then + rm -rf starpu-1.2.10 + fi + tar -zxvf starpu-1.2.10.tar.gz + cd starpu-1.2.10 + [[ -d $starpu_install_dir ]] || mkdir -p $starpu_install_dir + ./configure --prefix=$starpu_install_dir --disable-cuda --disable-opencl --disable-mpi-check --disable-build-doc --disable-export-dynamic --with-mpicc=`which mpicc` + #--disable-mpi-check + make -j + make -j install + if [ -d "$starpu_install_dir/lib/pkgconfig" ]; then + export PKG_CONFIG_PATH=$starpu_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_cham -eq 1 ];then + git clone https://github.com/Rabab53/modified-chameleon.git + cd modified-chameleon + cd chameleon + git checkout rabab/newchameleon + if [ -d build-mpi ]; then + rm -rf build-mpi + fi + mkdir build-mpi + cd build-mpi +# cmake .. -DCMAKE_INSTALL_PREFIX=$currentdir/$repo/modified-chameleon/chameleon/build-mpi/install -DCMAKE_COLOR_MAKEFILE:BOOL=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DBUILD_SHARED_LIBS=OFF -DCHAMELEON_ENABLE_EXAMPLE=ON -DCHAMELEON_ENABLE_TESTING=ON -DCHAMELEON_ENABLE_TIMING=ON -DCHAMELEON_USE_MPI=ON -DCHAMELEON_USE_CUDA=OFF -DCHAMELEON_SCHED_QUARK=OFF -DCHAMELEON_SCHED_STARPU=ON \ +# -DBLAS_LIBRARIES="-Wl,--no-as-needed;-L${MKLROOT}/lib;-lmkl_intel_lp64;-lmkl_core;-lmkl_sequential;-lpthread;-lm;-ldl" -DBLAS_COMPILER_FLAGS="-m64;-I${MKLROOT}/include" -DLAPACK_LIBRARIES="-Wl,--no-as-needed;-L${MKLROOT}/lib;-lmkl_intel_lp64;-lmkl_core;-lmkl_sequential;-lpthread;-lm;-ldl" -DCBLAS_DIR="${MKLROOT}" -DLAPACKE_DIR="${MKLROOT}" -DTMG_DIR="${MKLROOT}" \ +# -DMPI_C_COMPILER=`which mpicc` + + cmake .. -DCMAKE_INSTALL_PREFIX=$currentdir/$repo/modified-chameleon/chameleon/build-mpi/install -DCMAKE_COLOR_MAKEFILE:BOOL=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DBUILD_SHARED_LIBS=OFF -DCHAMELEON_ENABLE_EXAMPLE=ON -DCHAMELEON_ENABLE_TESTING=ON -DCHAMELEON_ENABLE_TIMING=ON -DCHAMELEON_USE_MPI=ON -DCHAMELEON_USE_CUDA=OFF -DCHAMELEON_SCHED_QUARK=OFF -DCHAMELEON_SCHED_STARPU=ON + make -j install + if [ -d $currentdir/$repo/modified-chameleon/chameleon/build-mpi/install ]; then + export PKG_CONFIG_PATH=$currentdir/$repo/modified-chameleon/chameleon/build-mpi/install/lib/pkgconfig/:$PKG_CONFIG_PATH + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_al4san -eq 1 ];then + rm -rf build-mpi + mkdir build-mpi + cd build-mpi + cmake .. -DCMAKE_INSTALL_PREFIX=$currentdir/$repo/build-mpi/install -DAL4SAN_SCHED_STARPU=ON -DAL4SAN_USE_MPI=ON -DAL4SAN_ENABLE_CUDA=OFF -DAL4SAN_USE_CUDA=OFF + make -j + make install + if [ -d $currentdir/$repo/build-mpi/install ]; then + export PKG_CONFIG_PATH=$currentdir/$repo/build-mpi/install/lib/pkgconfig/:$PKG_CONFIG_PATH + fi + +fi + +echo "Everything has finished, returning to initial folder \"$currentdir\"" +cd $currentdir diff --git a/script/build-shared.sh b/script/build-shared.sh new file mode 100644 index 0000000..86515d3 --- /dev/null +++ b/script/build-shared.sh @@ -0,0 +1,123 @@ +#!/bin/bash +repo=al4san-dev +currentdir=`pwd` +hwloc_install_dir=$currentdir/al4san-dev/hwloc-install +starpu_install_dir=$currentdir/al4san-dev/starpu-1.2-install + + +currentdir=`pwd` +run_clone=0 +set_pkgconfig_runtime_libs=1 +run_update_submodules=0 +run_module_setup=1 +compile_hwloc=1 +compile_starpu=1 +compile_cham=0 +compile_starsh=0 +compile_starshcore=0 +compile_al4san=1 +compile_hcore=1 +pause_info(){ + echo "Please press enter key to proceed"; read + echo "=================" +} +pause_step(){ + echo "Please press enter key to proceed"; read + #echo "=================" +} + +if [ $run_clone -eq 1 ]; then +if [ ! -d $repo ]; then + git clone git@github.com:ecrc/$repo.git + pause_step +else + echo "Echo \"$repo\" exists so I am not cloning from github" + pause_info +fi +fi +if [ $set_pkgconfig_runtime_libs -eq 1 ]; then + export PKG_CONFIG_PATH=$hwloc_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + export PKG_CONFIG_PATH=$starpu_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH +fi +cd $currentdir/$repo +if [ $run_update_submodules -eq 1 ];then + git submodule update --init --recursive +fi +if [ $run_module_setup -eq 1 ]; then + . script/modules-ecrc.sh + pause_info +fi +if [ $compile_hwloc -eq 1 ];then + if [ ! -f "hwloc-1.11.13.tar.gz" ]; then + wget https://download.open-mpi.org/release/hwloc/v1.11/hwloc-1.11.13.tar.gz + tar -zxvf hwloc-1.11.13.tar.gz + fi + if [ -d "hwloc-1.11.13" ]; then + rm -rf hwloc-1.11.13 + fi + tar -zxvf hwloc-1.11.13.tar.gz + cd hwloc-1.11.13 + [[ -d $hwloc_install_dir ]] || mkdir -p $hwloc_install_dir + ./configure --prefix=$hwloc_install_dir --disable-libxml2 --disable-pci --enable-shared=yes --enable-static=yes + make -j + make -j install + if [ -d "$hwloc_install_dir/lib/pkgconfig" ]; then + export PKG_CONFIG_PATH=$hwloc_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_starpu -eq 1 ];then + if [ ! -f "starpu-1.2.10.tar.gz" ]; then + wget http://starpu.gforge.inria.fr/files/starpu-1.2.10/starpu-1.2.10.tar.gz + fi + if [ -d "starpu-1.2.10" ]; then + rm -rf starpu-1.2.10 + fi + tar -zxvf starpu-1.2.10.tar.gz + cd starpu-1.2.10 + [[ -d $starpu_install_dir ]] || mkdir -p $starpu_install_dir + ./configure --prefix=$starpu_install_dir --disable-cuda --disable-opencl --disable-build-doc --disable-export-dynamic --without-mpicc + #--disable-mpi-check + make -j + make -j install + if [ -d "$starpu_install_dir/lib/pkgconfig" ]; then + export PKG_CONFIG_PATH=$starpu_install_dir/lib/pkgconfig:$PKG_CONFIG_PATH + export LD_LIBRARY_PATH=$starpu_install_dir/lib:$LD_LIBRARY_PATH + + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_cham -eq 1 ];then + git clone https://github.com/Rabab53/modified-chameleon.git + cd modified-chameleon + cd chameleon + git checkout rabab/newchameleon + if [ -d build ]; then + rm -rf build + fi + mkdir build + cd build + cmake .. -DCMAKE_INSTALL_PREFIX=$currentdir/$repo/modified-chameleon/chameleon/build/install -DCMAKE_COLOR_MAKEFILE:BOOL=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DBUILD_SHARED_LIBS=OFF -DCHAMELEON_ENABLE_EXAMPLE=ON -DCHAMELEON_ENABLE_TESTING=OFF -DCHAMELEON_ENABLE_TIMING=ON -DCHAMELEON_USE_MPI=OFF -DCHAMELEON_USE_CUDA=OFF -DCHAMELEON_SCHED_QUARK=OFF -DCHAMELEON_SCHED_STARPU=ON + make -j install + if [ -d $currentdir/$repo/modified-chameleon/chameleon/build/install ]; then + export PKG_CONFIG_PATH=$currentdir/$repo/modified-chameleon/chameleon/build/install/lib/pkgconfig/:$PKG_CONFIG_PATH + fi + pause_info +fi +cd $currentdir/$repo +if [ $compile_al4san -eq 1 ];then + rm -rf build + mkdir build + cd build + cmake .. -DCMAKE_INSTALL_PREFIX=$currentdir/$repo/build/install -DAL4SAN_SCHED_STARPU=ON -DAL4SAN_USE_MPI=OFF -DAL4SAN_ENABLE_CUDA=OFF -DAL4SAN_USE_CUDA=OFF -DCMAKE_COLOR_MAKEFILE:BOOL=ON -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON + make -j + make install + if [ -d $currentdir/$repo/build/install ]; then + export PKG_CONFIG_PATH=$currentdir/$repo/build/install/lib/pkgconfig/:$PKG_CONFIG_PATH + fi +fi + +echo "Everything has finished, returning to initial folder \"$currentdir\"" +cd $currentdir diff --git a/script/compile-al4san.sh b/script/compile-al4san.sh index 3b8c632..c5403bc 100644 --- a/script/compile-al4san.sh +++ b/script/compile-al4san.sh @@ -4,7 +4,7 @@ module load mkl/2018-initial module load gcc/5.5.0 module load cmake/3.9.6 module load hwloc/1.11.8-gcc-5.5.0 -#module load openmpi/3.0.0-gcc-5.5.0 +module load openmpi/3.0.0-gcc-5.5.0 module load cuda/9.0 module load ecrc-extras diff --git a/script/modules-ecrc-mpi.sh b/script/modules-ecrc-mpi.sh new file mode 100644 index 0000000..c726a98 --- /dev/null +++ b/script/modules-ecrc-mpi.sh @@ -0,0 +1,2 @@ +#module load openmpi/4.0.1-gcc-8.3.0 +module load openmpi/3.0.0-gcc-7.2.0 diff --git a/script/modules-ecrc.sh b/script/modules-ecrc.sh new file mode 100644 index 0000000..1d1df04 --- /dev/null +++ b/script/modules-ecrc.sh @@ -0,0 +1,13 @@ +#module load mkl/2018-initial +module load ecrc-extras +module load mkl/2019-update-5 +#module load gcc/5.5.0 +module load cmake/3.9.6 +module load gcc/7.2.0 +#module load intel +#module load likwid/5.0.0-gcc-8.3.0 +#module load hwloc/1.11.8-gcc-5.5.0 +#module load starpu/1.2.3-gcc-5.5.0-mkl-openmpi-3.0.0 +#module load starpu/1.2.4-gcc-5.5.0-mkl-openmpi-3.0.0 +#module load starpu/1.2.6-gcc-5.5.0-mkl-openmpi-3.0.0 +#module load gsl/2.4-gcc-5.5.0