diff --git a/runtime/flang/fortDt.h b/runtime/flang/fortDt.h index 1eff4a5ee12..71d477b84ca 100644 --- a/runtime/flang/fortDt.h +++ b/runtime/flang/fortDt.h @@ -303,14 +303,9 @@ typedef __INT_T dtype; /* * data type representing the number of elements passed to - * ENTF90(ALLOC04, alloc04), etc. * It's either a 64-bit type, or __INT_T - * which can be either a 64-bit or 32-bit type depending on DESC_I8 + * ENTF90(ALLOC04, alloc04), etc. */ -#if defined(TARGET_X8664) #define __NELEM_T __INT8_T -#else -#define __NELEM_T __INT_T -#endif #endif /*_PGHPF_TYPES_H_*/ diff --git a/runtime/flang/gather_cmplx16.F95 b/runtime/flang/gather_cmplx16.F95 index f08169f1d66..18666268cc4 100644 --- a/runtime/flang/gather_cmplx16.F95 +++ b/runtime/flang/gather_cmplx16.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -21,11 +21,8 @@ subroutine ftn_gather_cmplx16( ta, a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 + integer*8 lda -#else - integer lda -#endif complex*16 :: a( lda,* ), alpha integer :: bufrows, bufcols integer :: i, j, ndx, ndxsave diff --git a/runtime/flang/gather_cmplx8.F95 b/runtime/flang/gather_cmplx8.F95 index 0697b8c00d4..711b0d72301 100644 --- a/runtime/flang/gather_cmplx8.F95 +++ b/runtime/flang/gather_cmplx8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -21,11 +21,8 @@ subroutine ftn_gather_cmplx8( ta, a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 + integer*8 lda -#else - integer lda -#endif complex*8 :: a( lda,* ), alpha integer :: bufrows, bufcols integer :: i, j, ndx, ndxsave diff --git a/runtime/flang/gather_real4.F95 b/runtime/flang/gather_real4.F95 index 9d49ce6605d..bc19bf8c4a8 100644 --- a/runtime/flang/gather_real4.F95 +++ b/runtime/flang/gather_real4.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,11 +20,8 @@ subroutine ftn_gather_real4( a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 + integer*8 lda -#else - integer lda -#endif real*4 :: a( lda,* ), alpha integer :: bufrows, bufcols integer i, j, ndx, ndxsave diff --git a/runtime/flang/gather_real8.F95 b/runtime/flang/gather_real8.F95 index e232ff1542a..2654b25e511 100644 --- a/runtime/flang/gather_real8.F95 +++ b/runtime/flang/gather_real8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,11 +20,8 @@ subroutine ftn_gather_real8( a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 + integer*8 lda -#else - integer lda -#endif real*8 :: a( lda,* ), alpha integer :: bufrows, bufcols integer i, j, ndx, ndxsave diff --git a/runtime/flang/iso_c_bind.F95 b/runtime/flang/iso_c_bind.F95 index 24f18e0bb84..fe68f8d4a52 100644 --- a/runtime/flang/iso_c_bind.F95 +++ b/runtime/flang/iso_c_bind.F95 @@ -153,11 +153,6 @@ module ISO_C_BINDING interface operator (.ne.) module procedure compare_ne_cfunptrs end interface -#if defined TARGET_LINUX_X8664 || defined TARGET_LLVM_64 - ! added this to make the data-initializd module common block be at 32 bytes long - ! maybe only needed for linux86-64 - integer*8 :: __iso_bind_private_i = 11, __iso_bind_private_j = 23 -#endif contains logical function compare_eq_cptrs(a,b) diff --git a/runtime/flang/miscsup_com.c b/runtime/flang/miscsup_com.c index 60d602af76c..4f5e7c0da7f 100644 --- a/runtime/flang/miscsup_com.c +++ b/runtime/flang/miscsup_com.c @@ -4833,11 +4833,7 @@ ENTF90(SPACINGD, spacingd)(__REAL8_T *d) #ifndef DESC_I8 -#if defined(TARGET_X8664) typedef __INT8_T SZ_T; -#else -typedef __INT4_T SZ_T; -#endif #undef _MZERO #define _MZERO(n, t) \ diff --git a/runtime/flang/mvmul_cmplx16.F95 b/runtime/flang/mvmul_cmplx16.F95 index 03471e4d1a8..b18c9d9ad22 100644 --- a/runtime/flang/mvmul_cmplx16.F95 +++ b/runtime/flang/mvmul_cmplx16.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -19,22 +19,15 @@ subroutine ftn_mvmul_cmplx16( ta, tb, m, k, alpha, a, lda, b, beta, c ) implicit none -#ifdef TARGET_X8664 + integer*8 :: m, k, lda -#else - integer :: m, k, lda -#endif complex*16 :: alpha, beta complex*16, dimension( lda, * ) :: a complex*16, dimension( * ) :: b, c integer :: ta, tb ! Local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif complex*16 :: temp ! print *, "#### In mvmul ####" diff --git a/runtime/flang/mvmul_cmplx8.F95 b/runtime/flang/mvmul_cmplx8.F95 index 06848fd6294..d0cf0264d2c 100644 --- a/runtime/flang/mvmul_cmplx8.F95 +++ b/runtime/flang/mvmul_cmplx8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -19,22 +19,15 @@ subroutine ftn_mvmul_cmplx8( ta, tb, m, k, alpha, a, lda, b, beta, c ) implicit none -#ifdef TARGET_X8664 + integer*8 :: m, k, lda -#else - integer :: m, k, lda -#endif complex*8 :: alpha, beta complex*8, dimension( lda, * ) :: a complex*8, dimension( * ) :: b, c integer :: ta, tb ! Local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif complex*8 :: temp ! print *, "#### In mvmul ####" diff --git a/runtime/flang/mvmul_real4.F95 b/runtime/flang/mvmul_real4.F95 index a698ec7e9a0..5f9b46b1eca 100644 --- a/runtime/flang/mvmul_real4.F95 +++ b/runtime/flang/mvmul_real4.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,15 @@ subroutine ftn_mvmul_real4( ta, m, k, alpha, a, lda, b, beta, c ) implicit none -#ifdef TARGET_X8664 + integer*8 :: m, k, lda -#else - integer :: m, k, lda -#endif real*4 :: alpha, beta real*4, dimension( lda, * ) :: a real*4, dimension( * ) :: b, c integer :: ta ! Local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif real*4 :: temp if( ta .eq. 0 )then ! normally oriented a matrix diff --git a/runtime/flang/mvmul_real8.F95 b/runtime/flang/mvmul_real8.F95 index 99b8e37e534..d12404b575b 100644 --- a/runtime/flang/mvmul_real8.F95 +++ b/runtime/flang/mvmul_real8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,15 @@ subroutine ftn_mvmul_real8( ta, m, k, alpha, a, lda, b, beta, c ) implicit none -#ifdef TARGET_X8664 + integer*8 :: m, k, lda -#else - integer :: m, k, lda -#endif real*8 :: alpha, beta real*8, dimension( lda, * ) :: a real*8, dimension( * ) :: b, c integer :: ta ! Local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif real*8 :: temp if( ta .eq. 0 )then ! normally oriented a matrix diff --git a/runtime/flang/pgf90_mmul_cmplx16.h b/runtime/flang/pgf90_mmul_cmplx16.h index 1a17302970b..b4256f6b428 100644 --- a/runtime/flang/pgf90_mmul_cmplx16.h +++ b/runtime/flang/pgf90_mmul_cmplx16.h @@ -17,11 +17,7 @@ ! ! Global variables ! -#ifdef TARGET_X8664 integer*8 :: mra, ncb, kab, lda, ldb, ldc -#else - integer :: mra, ncb, kab, lda, ldb, ldc -#endif complex*16, dimension( lda, * )::a complex*16, dimension( ldb, * )::b complex*16, dimension( ldc, * )::c @@ -29,8 +25,7 @@ character*1 :: ca, cb ! ! local variables - ! -#ifdef TARGET_X8664 + ! integer*8 :: colsa, rowsa, rowsb, colsb integer*8 :: i, j, jb, k, ak, bk, jend integer*8 :: ar, ar_sav, ac, ac_sav, br, bc @@ -41,18 +36,6 @@ integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#else - integer :: colsa, rowsa, rowsb, colsb - integer :: i, j, jb, k, ak, bk, jend - integer :: ar, ar_sav, ac, ac_sav, br, bc - integer :: ndxa, ndxasav - integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 - integer :: colachunk, colachunks, colbchunk, colbchunks - integer :: rowchunk, rowchunks - integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end - integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end - integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#endif integer :: ta, tb complex*16 :: temp, temp0, temp1, temp2, temp3 real*8 :: temprr0, temprr1, temprr2, temprr3 diff --git a/runtime/flang/pgf90_mmul_cmplx8.h b/runtime/flang/pgf90_mmul_cmplx8.h index 8e79488872f..c3c29557792 100644 --- a/runtime/flang/pgf90_mmul_cmplx8.h +++ b/runtime/flang/pgf90_mmul_cmplx8.h @@ -17,11 +17,7 @@ ! ! Global variables ! -#ifdef TARGET_X8664 integer*8 :: mra, ncb, kab, lda, ldb, ldc -#else - integer :: mra, ncb, kab, lda, ldb, ldc -#endif complex*8, dimension( lda, * )::a complex*8, dimension( ldb, * )::b complex*8, dimension( ldc, * )::c @@ -29,8 +25,7 @@ character*1 :: ca, cb ! ! local variables - ! -#ifdef TARGET_X8664 + ! integer*8 :: colsa, rowsa, rowsb, colsb integer*8 :: i, j, jb, k, ak, bk, jend integer*8 :: ar, ar_sav, ac, ac_sav, br, bc @@ -41,18 +36,6 @@ integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#else - integer :: colsa, rowsa, rowsb, colsb - integer :: i, j, jb, k, ak, bk, jend - integer :: ar, ar_sav, ac, ac_sav, br, bc - integer :: ndxa, ndxasav - integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 - integer :: colachunk, colachunks, colbchunk, colbchunks - integer :: rowchunk, rowchunks - integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end - integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end - integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#endif integer :: ta, tb complex*8 :: temp, temp0, temp1, temp2, temp3 real*4 :: temprr0, temprr1, temprr2, temprr3 diff --git a/runtime/flang/pgf90_mmul_real4.h b/runtime/flang/pgf90_mmul_real4.h index f1fa52253a2..a3aa1ca0f8e 100644 --- a/runtime/flang/pgf90_mmul_real4.h +++ b/runtime/flang/pgf90_mmul_real4.h @@ -21,11 +21,7 @@ ! ! Global variables ! -#ifdef TARGET_X8664 integer*8 :: mra, ncb, kab, lda, ldb, ldc -#else - integer :: mra, ncb, kab, lda, ldb, ldc -#endif real*4, dimension( lda, * )::a real*4, dimension( ldb, * )::b real*4, dimension( ldc, * )::c @@ -34,7 +30,6 @@ ! ! local variables ! -#ifdef TARGET_X8664 integer*8 :: colsa, rowsa, rowsb, colsb integer*8 :: i, j, jb, k, ak, bk, jend integer*8 :: ar, ar_sav, ac, ac_sav, br, bc @@ -45,18 +40,6 @@ integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#else - integer :: colsa, rowsa, rowsb, colsb - integer :: i, j, jb, k, ak, bk, jend - integer :: ar, ar_sav, ac, ac_sav, br, bc - integer :: ndxa, ndxasav - integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 - integer :: colachunk, colachunks, colbchunk, colbchunks - integer :: rowchunk, rowchunks - integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end - integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end - integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#endif real*4 :: temp, temp0, temp1, temp2, temp3 real*4 :: bufatemp, bufbtemp real*8 :: time_start, time_end, ttime, all_time diff --git a/runtime/flang/pgf90_mmul_real8.h b/runtime/flang/pgf90_mmul_real8.h index 9b13327ac1a..9e078cc5949 100644 --- a/runtime/flang/pgf90_mmul_real8.h +++ b/runtime/flang/pgf90_mmul_real8.h @@ -20,11 +20,7 @@ ! ! Global variables ! -#ifdef TARGET_X8664 integer*8 :: mra, ncb, kab, lda, ldb, ldc -#else - integer :: mra, ncb, kab, lda, ldb, ldc -#endif real*8, dimension( lda, * )::a real*8, dimension( ldb, * )::b real*8, dimension( ldc, * )::c @@ -33,7 +29,6 @@ ! ! local variables ! -#ifdef TARGET_X8664 integer*8 :: colsa, rowsa, rowsb, colsb integer*8 :: i, j, jb, k, ak, bk, jend integer*8 :: ar, ar_sav, ac, ac_sav, br, bc @@ -44,18 +39,6 @@ integer*8 :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end integer*8 :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end integer*8 :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#else - integer :: colsa, rowsa, rowsb, colsb - integer :: i, j, jb, k, ak, bk, jend - integer :: ar, ar_sav, ac, ac_sav, br, bc - integer :: ndxa, ndxasav - integer :: ndxb, ndxbsav, ndxb0, ndxb1, ndxb2, ndxb3 - integer :: colachunk, colachunks, colbchunk, colbchunks - integer :: rowchunk, rowchunks - integer :: colsb_chunk, colsb_chunks, colsb_strt, colsb_end - integer :: colsa_chunk, colsa_chunks, colsa_strt, colsa_end - integer :: bufr, bufr_sav, bufca, bufca_sav, bufcb, bufcb_sav -#endif real*8 :: temp, temp0, temp1, temp2, temp3 real*8 :: bufatemp, bufbtemp real*8 :: time_start, time_end, ttime, all_time diff --git a/runtime/flang/rdst.c b/runtime/flang/rdst.c index 8fc708ead09..9c74dce320b 100644 --- a/runtime/flang/rdst.c +++ b/runtime/flang/rdst.c @@ -220,6 +220,14 @@ I8(local_copy)(char *db, F90_Desc *dd, __INT_T doffset, char *ab, } } +/** \brief check if a descriptor is associated with a non-contiguous + * section. + * + * \param a is the descriptor we are checking. + * \param dim is the rank of the array we are checking. + * + * \returns 0 if contiguous, else the dimension that is non-contiguous. + */ __INT_T I8(is_nonsequential_section)(F90_Desc *a, __INT_T dim) { @@ -233,7 +241,7 @@ I8(is_nonsequential_section)(F90_Desc *a, __INT_T dim) for (i = 0; i < dim; i++) { SET_DIM_PTRS(ad, a, i); if (F90_DPTR_LSTRIDE_G(ad) != tmp_lstride || F90_DPTR_SSTRIDE_G(ad) != 1) { - is_nonseq_section = 1; + is_nonseq_section = i + 1; break; } tmp_lstride *= F90_DPTR_EXTENT_G(ad); @@ -2744,3 +2752,60 @@ ENTF90(IS_CONTIGUOUS, is_contiguous)(char *ab, F90_Desc *ad) return GET_DIST_TRUE_LOG; } +/** \brief Print a contiguous error message and abort. + * + * This function will also call is_nonsequential_section() to get the + * first dimension of the array that is non-contiguous and include it in the + * error message. + * + * \param ptr is the pointer we are checking. + * \param pd is the descriptor we are checking. + * \param lineno is the source line number we are checking. + * \param ptrnam is the name of pointer, null-terminated string. + * \param srcfil is the name of source file, null-terminated string. + * \param flags is currently 1 when ptr is an optional argument, else 0. + */ +void +ENTF90(CONTIGERROR, contigerror)(void *ptr, F90_Desc *pd, __INT_T lineno, + char *ptrnam, char *srcfil, __INT_T flags) +{ + char str[200]; + int dim; + + if (flags == 1 && ptr == NULL) { + /* ignore non-present optional argument */ + return; + } + dim = I8(is_nonsequential_section)(pd, F90_RANK_G(pd)); + sprintf(str, "Runtime Error at %s, line %d: Pointer assignment of " + "noncontiguous target (dimension %d) to CONTIGUOUS pointer " + "%s\n", srcfil, lineno, dim, ptrnam); + __fort_abort(str); +} + +/** \brief Check whether a pointer is associated with a contiguous array object. + * + * If the pointer is not associated with a contiguous array object, then a + * message is printed to stderr and the user program aborts. + * + * \param ptr is the pointer we are checking. + * \param pd is the descriptor we are checking. + * \param lineno is the source line number we are checking. + * \param ptrnam is the name of pointer, null-terminated string. + * \param srcfil is the name of source file, null-terminated string. + * \param flags is currently 1 when ptr is an optional argument, else 0. + */ +void +ENTF90(CONTIGCHK, contigchk)(void *ptr, F90_Desc *pd, __INT_T lineno, + char *ptrnam, char *srcfil, __INT_T flags) +{ + if (flags == 1 && ptr == NULL) { + /* ignore non-present optional argument */ + return; + } + + if (!(ENTF90(IS_CONTIGUOUS, is_contiguous)(ptr, pd))) { + ENTF90(CONTIGERROR, contigerror)(ptr, pd, lineno, ptrnam, srcfil, flags); + } +} + diff --git a/runtime/flang/transpose_cmplx16.F95 b/runtime/flang/transpose_cmplx16.F95 index bacd9764052..ffc3496b2c9 100644 --- a/runtime/flang/transpose_cmplx16.F95 +++ b/runtime/flang/transpose_cmplx16.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -21,11 +21,7 @@ subroutine ftn_transpose_cmplx16( ta, a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 integer*8 lda -#else - integer lda -#endif integer :: bufrows, bufcols integer i, j, ndx, ndxsave complex*16 :: a( lda, * ), alpha diff --git a/runtime/flang/transpose_cmplx8.F95 b/runtime/flang/transpose_cmplx8.F95 index 3196f3a93fe..93c064f3a1b 100644 --- a/runtime/flang/transpose_cmplx8.F95 +++ b/runtime/flang/transpose_cmplx8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -21,11 +21,7 @@ subroutine ftn_transpose_cmplx8( ta, a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 integer*8 lda -#else - integer lda -#endif integer :: bufrows, bufcols integer i, j, ndx, ndxsave complex*8 :: a( lda, * ), alpha diff --git a/runtime/flang/transpose_real4.F95 b/runtime/flang/transpose_real4.F95 index 782b9b10f95..b4e48e238b6 100644 --- a/runtime/flang/transpose_real4.F95 +++ b/runtime/flang/transpose_real4.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,11 +20,7 @@ subroutine ftn_transpose_real4( a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 integer*8 lda -#else - integer lda -#endif integer :: bufrows, bufcols integer i, j, ndx, ndxsave real*4 :: a( lda, * ), alpha diff --git a/runtime/flang/transpose_real8.F95 b/runtime/flang/transpose_real8.F95 index 881c4fabc0d..ced57081803 100644 --- a/runtime/flang/transpose_real8.F95 +++ b/runtime/flang/transpose_real8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2011-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,11 +20,7 @@ subroutine ftn_transpose_real8( a, lda, alpha, buffer, bufrows, bufcols ) implicit none -#ifdef TARGET_X8664 integer*8 lda -#else - integer lda -#endif integer :: bufrows, bufcols integer i, j, ndx, ndxsave real*8 :: a( lda, * ), alpha diff --git a/runtime/flang/vmmul_cmplx16.F95 b/runtime/flang/vmmul_cmplx16.F95 index 46e4ca13629..2bdaf76fb95 100644 --- a/runtime/flang/vmmul_cmplx16.F95 +++ b/runtime/flang/vmmul_cmplx16.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,14 @@ subroutine ftn_vmmul_cmplx16( ta, tb, n, k, alpha, a, b, ldb, beta, c ) implicit none -#ifdef TARGET_X8664 integer*8 :: n, k, ldb -#else - integer :: n, k, ldb -#endif integer :: ta, tb complex*16, dimension (ldb, * ) :: b complex*16, dimension ( * ) :: a, c complex*16 :: alpha, beta ! local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif complex*16 :: temp diff --git a/runtime/flang/vmmul_cmplx8.F95 b/runtime/flang/vmmul_cmplx8.F95 index bbc808e2f01..310a63f5aac 100644 --- a/runtime/flang/vmmul_cmplx8.F95 +++ b/runtime/flang/vmmul_cmplx8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,14 @@ subroutine ftn_vmmul_cmplx8( ta, tb, n, k, alpha, a, b, ldb, beta, c ) implicit none -#ifdef TARGET_X8664 integer*8 :: n, k, ldb -#else - integer :: n, k, ldb -#endif integer :: ta, tb complex*8, dimension (ldb, * ) :: b complex*8, dimension ( * ) :: a, c complex*8 :: alpha, beta ! local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif complex*8 :: temp diff --git a/runtime/flang/vmmul_real4.F95 b/runtime/flang/vmmul_real4.F95 index 5920d48314d..7efb17ea4de 100644 --- a/runtime/flang/vmmul_real4.F95 +++ b/runtime/flang/vmmul_real4.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,14 @@ subroutine ftn_vmmul_real4( tb, n, k, alpha, a, b, ldb, beta, c ) implicit none -#ifdef TARGET_X8664 integer*8 :: n, k, ldb -#else - integer :: n, k, ldb -#endif integer :: tb real*4, dimension (ldb, * ) :: b real*4, dimension ( * ) :: a, c real*4 :: alpha, beta ! local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif real*4 :: temp if( beta .ne. 0.0 )then diff --git a/runtime/flang/vmmul_real8.F95 b/runtime/flang/vmmul_real8.F95 index 207fbe95ec7..e333bb304cb 100644 --- a/runtime/flang/vmmul_real8.F95 +++ b/runtime/flang/vmmul_real8.F95 @@ -1,5 +1,5 @@ ! -! Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. +! Copyright (c) 2012-2018, NVIDIA CORPORATION. All rights reserved. ! ! Licensed under the Apache License, Version 2.0 (the "License"); ! you may not use this file except in compliance with the License. @@ -20,22 +20,14 @@ subroutine ftn_vmmul_real8( tb, n, k, alpha, a, b, ldb, beta, c ) implicit none -#ifdef TARGET_X8664 integer*8 :: n, k, ldb -#else - integer :: n, k, ldb -#endif integer :: tb real*8, dimension (ldb, * ) :: b real*8, dimension ( * ) :: a, c real*8 :: alpha, beta ! local variables -#ifdef TARGET_X8664 integer*8 :: i, j, kk -#else - integer :: i, j, kk -#endif real*8 :: temp if( beta .ne. 0.0 )then diff --git a/runtime/ompstub/ompstubs.c b/runtime/ompstub/ompstubs.c index 5416c8b2c4b..89402108a41 100644 --- a/runtime/ompstub/ompstubs.c +++ b/runtime/ompstub/ompstubs.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2014-2018, NVIDIA CORPORATION. All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -679,3 +679,8 @@ __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_ kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) { } + +void +__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads) +{ +} diff --git a/tools/flang1/flang1exe/dpm_out.c b/tools/flang1/flang1exe/dpm_out.c index 5c5ab8c4522..052d5dbb4f1 100644 --- a/tools/flang1/flang1exe/dpm_out.c +++ b/tools/flang1/flang1exe/dpm_out.c @@ -2625,20 +2625,32 @@ newargs_for_entry(int this_entry) } else { newdsc = NEWDSCG(arg); if (newdsc == 0) { - /* Subtlety: The commented-out ALLOCDESCG(arg) test is what - * seems to break pointer-valued functions in Whizard 2.3.1, - * since their results (which are converted into new first - * arguments) don't have the mystery ALLOCDESC flag set on them. - */ set_preserve_descriptor(CLASSG(arg) || is_procedure_ptr(arg) || (sem.which_pass && IS_PROC_DUMMYG(arg)) || - (ALLOCDESCG(arg) && RESULTG(arg))); + ((ALLOCDESCG(arg) || needs_descriptor(arg)) && + RESULTG(arg))); newdsc = sym_get_arg_sec(arg); set_preserve_descriptor(0); NEWDSCP(arg, newdsc); } } + if (XBIT(54, 0x40) && CONTIGATTRG(arg) + && STYPEG(newdsc) != ST_UNKNOWN + ) { + /* Generate contiguity check on this argument. + * + * NOTE: For LLVM targets, this function gets called by + * newargs_for_llvmiface() to set up placeholder descriptor + * arguments in the interface. We do not want to + * generate contiguity checks in this case since an interface + * block is non-executable code. The sym_get_arg_sec() function + * above returns a newdsc without any STYPE when we're processing + * an interface. Therefore, we check whether STYPEG(newdsc) != ST_UNKNOWN. + */ + int ast = mk_id(arg); + gen_contig_check(ast, ast, newdsc, FUNCLINEG(gbl.currsub), false, Gbegin); + } SCP(newdsc, SC_DUMMY); OPTARGP(newdsc, OPTARGG(arg)); NEWARGP(newdsc, 0); diff --git a/tools/flang1/flang1exe/lowersym.c b/tools/flang1/flang1exe/lowersym.c index e30e60b0d35..7e0c71fc602 100644 --- a/tools/flang1/flang1exe/lowersym.c +++ b/tools/flang1/flang1exe/lowersym.c @@ -2662,8 +2662,6 @@ lower_put_datatype(int dtype, int usage) int restype = DTY(dtype + 1); if (is_array_dtype(restype)) restype = array_element_dtype(restype); - if (restype > 0) - lower_put_datatype(restype, 1); /* result type is a dependency */ } putwhich("proc", "p"); putval("result", DTY(dtype + 1)); diff --git a/tools/flang1/flang1exe/rest.c b/tools/flang1/flang1exe/rest.c index 0acd6d27fe9..6e4bc430cc2 100644 --- a/tools/flang1/flang1exe/rest.c +++ b/tools/flang1/flang1exe/rest.c @@ -1356,6 +1356,11 @@ transform_call(int std, int ast) inface_arg = aux.dpdsc_base[dscptr + i]; needdescr = needs_descriptor(inface_arg); /* actually, only for pointer or assumed-shape arguments dummy */ + if (XBIT(54, 0x80) && inface_arg > NOSYM && ast_is_sym(ele) && + needs_descriptor(memsym_of_ast(ele))) { + /* Generate contiguity check at call-site */ + gen_contig_check(mk_id(inface_arg), ele, 0, gbl.lineno, true, std); + } } if (ele == 0) { ARGT_ARG(newargt, newi) = ele; diff --git a/tools/flang1/flang1exe/rte.c b/tools/flang1/flang1exe/rte.c index 52c7fb38ccc..840743ae8d5 100644 --- a/tools/flang1/flang1exe/rte.c +++ b/tools/flang1/flang1exe/rte.c @@ -827,6 +827,41 @@ get_header_member(int sdsc, int info) return ast; } + +/** \brief Generate an AST for accessing a particular field in a descriptor + * header. + * + * Note: This is similar to get_header_member() above except it also + * operates on descriptors that are embedded in derived type objects. + * + * \param parent is the ast of the expression with the descriptor that + * we want to access. This is needed if the descriptor is embedded + * in a derived type object. + * \param sdsc is the symbol table pointer of the descriptor we want to + * access. + * \param info is the field we want to access in the descriptor. + * + * \return an ast expression of the descriptor access. + */ +int +get_header_member_with_parent(int parent, int sdsc, int info) +{ + int ast; + int subs[1]; + +#if DEBUG + if (!sdsc) + interr("get_header_member, blank static descriptor", 0, 3); + else if (STYPEG(sdsc) != ST_ARRDSC && STYPEG(sdsc) != ST_DESCRIPTOR && + DTY(DTYPEG(sdsc)) != TY_ARRAY) + interr("get_header_member, bad static descriptor", sdsc, 3); +#endif + subs[0] = mk_isz_cval(info, astb.bnd.dtype); + ast = mk_subscr(check_member(parent, mk_id(sdsc)), subs, 1, astb.bnd.dtype); + return ast; +} + + static int get_array_rank(int sdsc) { diff --git a/tools/flang1/flang1exe/semant.c b/tools/flang1/flang1exe/semant.c index d77d84d82e4..148f859d947 100644 --- a/tools/flang1/flang1exe/semant.c +++ b/tools/flang1/flang1exe/semant.c @@ -43,7 +43,6 @@ #include "atomic_common.h" -#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000) static void gen_dinit(int, SST *); static void pop_subprogram(void); @@ -1185,7 +1184,7 @@ semant1(int rednum, SST *top) } if (sem.mpaccatomic.seen && sem.mpaccatomic.action_type != ATOMIC_CAPTURE) { - if ((!sem.mpaccatomic.is_acc && OPT_OMP_ATOMIC)) { + if ((!sem.mpaccatomic.is_acc && use_opt_atomic(sem.doif_depth))) { ; } else { if (sem.mpaccatomic.is_acc) @@ -1575,7 +1574,7 @@ semant1(int rednum, SST *top) int ecs; sem.mpaccatomic.apply = FALSE; if (!sem.mpaccatomic.is_acc) { - if (OPT_OMP_ATOMIC) { + if (use_opt_atomic(sem.doif_depth)) { ecs = mk_stmt(A_MP_ENDATOMIC, 0); add_stmt(ecs); } else { diff --git a/tools/flang1/flang1exe/semant.h b/tools/flang1/flang1exe/semant.h index dce27f51eb7..5a1cb999fd3 100644 --- a/tools/flang1/flang1exe/semant.h +++ b/tools/flang1/flang1exe/semant.h @@ -1486,6 +1486,7 @@ void CheckDecl(int); void end_contained(void); /* semsmp.c */ +LOGICAL use_opt_atomic(int); int emit_epar(void); int emit_etarget(void); void is_dovar_sptr(int); @@ -1517,6 +1518,7 @@ int do_distbegin(DOINFO *, int, int); /* semutil.c */ void check_derived_type_array_section(int); int add_ptr_assign(int, int, int); +void gen_contig_check(int dest, int src, SPTR sdsc, int lineno, bool cs, int std); int collapse_begin(DOINFO *); int collapse_add(DOINFO *); void link_parents(STSK *, int); diff --git a/tools/flang1/flang1exe/semant3.c b/tools/flang1/flang1exe/semant3.c index c2b7920ff1c..08e4931b15b 100644 --- a/tools/flang1/flang1exe/semant3.c +++ b/tools/flang1/flang1exe/semant3.c @@ -71,7 +71,6 @@ static void end_association(int sptr); static int get_sst_named_whole_variable(SST *rhs); static int get_derived_type(SST *, LOGICAL); -#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000) #define IN_OPENMP_ATOMIC (sem.mpaccatomic.ast && !(sem.mpaccatomic.is_acc)) /** \brief semantic actions - part 3. @@ -557,7 +556,7 @@ semant3(int rednum, SST *top) } gen_finalization_for_sym(sptr1, std, parent); } - if (OPT_OMP_ATOMIC && sem.mpaccatomic.seen && !sem.mpaccatomic.is_acc) { + if (use_opt_atomic(sem.doif_depth) && sem.mpaccatomic.seen && !sem.mpaccatomic.is_acc) { sem.mpaccatomic.accassignc++; ast = do_openmp_atomics(RHS(2), RHS(5)); if (ast) { diff --git a/tools/flang1/flang1exe/semsmp.c b/tools/flang1/flang1exe/semsmp.c index 3b272e5060e..fb917cb1de2 100644 --- a/tools/flang1/flang1exe/semsmp.c +++ b/tools/flang1/flang1exe/semsmp.c @@ -114,6 +114,7 @@ static LOGICAL is_valid_atomic_update(int, int); static int mk_atomic_update_binop(int, int); static int mk_atomic_update_intr(int, int); static void do_map(); +static LOGICAL use_atomic_for_reduction(int); #ifdef OMP_OFFLOAD_LLVM static void mp_handle_map_clause(SST *, int, char *, int, int, bool); @@ -554,7 +555,7 @@ static LOGICAL any_pflsr_private = FALSE; static void add_pragmasyms(int pragmatype, int pragmascope, ITEM *itemp, int); static void add_pragma(int pragmatype, int pragmascope, int pragmaarg); -#define OPT_OMP_ATOMIC !flg.omptarget && !XBIT(69,0x1000) +#define OPT_OMP_ATOMIC !XBIT(69,0x1000) static int kernel_argnum; @@ -1348,7 +1349,7 @@ semsmp(int rednum, SST *top) case MP_STMT33: if (sem.mpaccatomic.action_type == ATOMIC_CAPTURE) { int ecs; - if (OPT_OMP_ATOMIC) { + if (use_opt_atomic(sem.doif_depth)) { ecs = mk_stmt(A_MP_ENDATOMIC, 0); std = add_stmt(ecs); } else { @@ -3281,7 +3282,7 @@ semsmp(int rednum, SST *top) sem.mpaccatomic.ast = 0; sem.mpaccatomic.seen = TRUE; - if (OPT_OMP_ATOMIC) { + if (use_opt_atomic(sem.doif_depth)) { sem.mpaccatomic.ast = mk_stmt(A_MP_ATOMIC, 0); (void)add_stmt(sem.mpaccatomic.ast); } else { @@ -8002,7 +8003,7 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme, return; } } - if (OPT_OMP_ATOMIC) + if (use_atomic_for_reduction(sem.doif_depth)) add_stmt(mk_stmt(A_MP_ATOMIC, 0)); (void)mk_storage(reduc_symp->shared, &lhs); @@ -8023,7 +8024,8 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme, * shared <-- intrin(shared, private) */ (void)ref_intrin(&intrin, arg1); - if (OPT_OMP_ATOMIC && sem.mpaccatomic.rmw_op != AOP_UNDEF) { + if (use_atomic_for_reduction(sem.doif_depth) && + sem.mpaccatomic.rmw_op != AOP_UNDEF) { MEMORY_ORDER save_mem_order = sem.mpaccatomic.mem_order; sem.mpaccatomic.mem_order = MO_SEQ_CST; mklvalue(&lhs, 1); @@ -8069,7 +8071,7 @@ gen_reduction(REDUC *reducp, REDUC_SYM *reduc_symp, LOGICAL rmme, SST_ASTP(&op1, ast); SST_SHAPEP(&op1, A_SHAPEG(ast)); - if (OPT_OMP_ATOMIC && get_atomic_rmw_op(opc) != AOP_UNDEF) { + if (use_atomic_for_reduction(sem.doif_depth)&& get_atomic_rmw_op(opc) != AOP_UNDEF) { MEMORY_ORDER save_mem_order = sem.mpaccatomic.mem_order; sem.mpaccatomic.rmw_op = get_atomic_rmw_op(opc); @@ -8140,7 +8142,7 @@ end_reduction(REDUC *red, int doif) if (reduc_symp->shared == 0) /* error - illegal reduction variable */ continue; - if (!OPT_OMP_ATOMIC && !done) { + if (!use_atomic_for_reduction(sem.doif_depth) && !done) { ast_crit = emit_bcs_ecs(A_MP_CRITICAL); done = TRUE; } @@ -8151,21 +8153,20 @@ end_reduction(REDUC *red, int doif) for (reducp = red; reducp; reducp = reducp->next) { for (reduc_symp = reducp->list; reduc_symp; reduc_symp = reduc_symp->next) { - if(flg.omptarget && save_target && save_teams) { - error(1201, ERR_Severe, gbl.lineno, "reduction", "teams"); - } if (reduc_symp->shared == 0) /* error - illegal reduction variable or set by loop above */ continue; - if (!OPT_OMP_ATOMIC && !done) { + if (!use_atomic_for_reduction(sem.doif_depth) && !done) { #ifdef OMP_OFFLOAD_LLVM ast_red = mk_stmt(A_MP_BREDUCTION, 0); - (void)add_stmt(ast_red); + (void) add_stmt(ast_red); #endif ast_crit = emit_bcs_ecs(A_MP_CRITICAL); #ifdef OMP_OFFLOAD_LLVM - A_ISOMPREDUCTIONP(ast_crit, 1); - gen_reduction_ompaccel(reducp, reduc_symp, FALSE, in_parallel); + if (!use_atomic_for_reduction(sem.doif_depth)) { + A_ISOMPREDUCTIONP(ast_crit, 1); + gen_reduction_ompaccel(reducp, reduc_symp, FALSE, in_parallel); + } #endif done = TRUE; } @@ -8177,7 +8178,7 @@ end_reduction(REDUC *red, int doif) sem.parallel = save_par; sem.target = save_target; sem.teams = save_teams; - if (!OPT_OMP_ATOMIC) { + if (!use_atomic_for_reduction(sem.doif_depth)) { ast_endcrit = emit_bcs_ecs(A_MP_ENDCRITICAL); A_LOPP(ast_crit, ast_endcrit); A_LOPP(ast_endcrit, ast_crit); @@ -10259,3 +10260,36 @@ check_map_data_sharing(int sptr) return TRUE; } +/** + * \brief Decide to use optimized atomic usage. + */ +LOGICAL use_opt_atomic(int d) +{ +#ifdef OMP_OFFLOAD_LLVM + if(flg.omptarget && (DI_IN_NEST(d, DI_TARGET) || + DI_IN_NEST(d, DI_TARGTEAMSDISTPARDO) || + DI_IN_NEST(d, DI_TARGPARDO) || + DI_IN_NEST(d, DI_TARGETSIMD) || + DI_IN_NEST(d, DI_TARGTEAMSDIST))) + return TRUE; +#endif + return OPT_OMP_ATOMIC; +} + +/** + \brief Decide whether to use llvm atomic for reduction or not. + Atomic is used only for teams reduction. + */ +static LOGICAL use_atomic_for_reduction(int d) +{ +#ifdef OMP_OFFLOAD_LLVM + if(flg.omptarget && DI_IN_NEST(d, DI_TARGET) ) { + if(DI_IN_NEST(d, DI_PARDO) || + DI_IN_NEST(d, DI_TARGTEAMSDISTPARDO)) + return OPT_OMP_ATOMIC; + else + return TRUE; + } +#endif + return OPT_OMP_ATOMIC; +} diff --git a/tools/flang1/flang1exe/semutil.c b/tools/flang1/flang1exe/semutil.c index 68736272f64..b72bb963251 100644 --- a/tools/flang1/flang1exe/semutil.c +++ b/tools/flang1/flang1exe/semutil.c @@ -32,6 +32,7 @@ #include "semstk.h" #include "machar.h" #include "ast.h" +#define RTE_C #include "rte.h" #include "pd.h" #include "direct.h" @@ -66,6 +67,9 @@ static LOGICAL subst_lhs_pointer(int, int, int); static LOGICAL not_in_arrfn(int, int); static int find_pointer_variable_assign(int, int); +static int inline_contig_check(int src, SPTR src_sptr, SPTR sdsc, int std); + + /*---------------------------------------------------------------------*/ /** \brief If \a stkptr is an LVALUE that has a constant value, replace it with @@ -3775,6 +3779,8 @@ add_ptr_assign(int dest, int src, int std) int ast; int dtype, tag; int dtype2, tag2, dtype3; + SPTR dest_sptr, src_sptr, sdsc; + int newargt, astnew; /* Check if the dest is scalar, if so assign len to descriptor * For array, it was done in runtime. @@ -3846,17 +3852,15 @@ add_ptr_assign(int dest, int src, int std) add_stmt(cvlen); } - if (DTY(dtype) == TY_PTR) { - int dest_sptr, src_sptr, sdsc; - int newargt, func, astnew, zero; - - if (ast_is_sym(src)) { - src_sptr = memsym_of_ast(src); - } else { - src_sptr = 0; - } + if (ast_is_sym(src)) { + src_sptr = memsym_of_ast(src); + } else { + src_sptr = 0; + } - dest_sptr = memsym_of_ast(dest); + dest_sptr = memsym_of_ast(dest); + + if (DTY(dtype) == TY_PTR) { if (STYPEG(src_sptr) == ST_PROC) { int iface=0, iface2=0, dpdsc=0, dpdsc2=0; @@ -3899,15 +3903,200 @@ add_ptr_assign(int dest, int src, int std) add_stmt(astnew); } } - func = intast_sym[I_PTR2_ASSIGN]; ast = begin_call(A_ICALL, func, 2); A_OPTYPEP(ast, I_PTR2_ASSIGN); add_arg(dest); add_arg(src); + if (XBIT(54, 0x40) && ast_is_sym(dest) && CONTIGATTRG(memsym_of_ast(dest))) { + /* Add contiguity pointer check. We add the check after the pointer + * assignment so we will get the correct section descriptor for dest. + */ + if (std) { + std = add_stmt_before(ast, std); + } else { + std = add_stmt(ast); + } + ast = mk_stmt(A_CONTINUE, 0); + std = add_stmt_after(ast, std); + gen_contig_check(dest, dest, 0, gbl.lineno, false, std); + ast = mk_stmt(A_CONTINUE, 0); /* return a continue statement */ + } return ast; } +/** \brief Generate contiguity check test inline (experimental) + * + * Called by gen_contig_check() below to generate the contiguity check inline. + * This is an experimental test since it looks at the descriptor flags, + * data type, and src_sptr if src_sptr is an optional dummy argument. The + * endif asts are generated in gen_contig_check(). + * + * \param src is the source/pointer target ast. + * \param src_sptr is the source/pointer target sptr. + * \param sdsc is the source/pointer target's descriptor + * \param std is the optional statement descriptor for adding the check (0 + * if not applicable). + * + * \return the statement descriptor (std) of the generated code. + */ +static int +inline_contig_check(int src, SPTR src_sptr, SPTR sdsc, int std) +{ + int flagsast = get_header_member_with_parent(src, sdsc, DESC_HDR_FLAGS); + int lenast = get_header_member_with_parent(src, sdsc, DESC_HDR_BYTE_LEN); + int sizeast = size_ast(src_sptr, DDTG(DTYPEG(src_sptr))); + int cmp, astnew, seqast, newargt; + + /* Step 1: Add insertion point in AST */ + astnew = mk_stmt(A_CONTINUE, 0); + if (std) + std = add_stmt_before(astnew, std); + else + std = add_stmt(astnew); + + /* Step 2: If src_sptr is an optional argument, then generate an + * argument "present" check. Also generate this check if XBIT(54, 0x200) + * is set which says to ignore null pointer targets. + */ + if (XBIT(54, 0x200) || (SCG(src_sptr) == SC_DUMMY && OPTARGG(src_sptr))) { + int present = ast_intr(I_PRESENT, stb.user.dt_log, 1, src); + astnew = mk_stmt(A_IFTHEN, 0); + A_IFEXPRP(astnew, present); + std = add_stmt_after(astnew, std); + } + + /* Step 3: Check descriptor flag to see if it includes + * __SEQUENTIAL_SECTION. + */ + seqast = mk_isz_cval(__SEQUENTIAL_SECTION, DT_INT); + flagsast = ast_intr(I_AND, astb.bnd.dtype, 2, flagsast, seqast); + cmp = mk_binop(OP_EQ, flagsast, astb.i0, DT_INT); + astnew = mk_stmt(A_IFTHEN, 0); + A_IFEXPRP(astnew, cmp); + std = add_stmt_after(astnew, std); + + /* Step 4: Check element size to see if it matches descriptor + * element size (i.e., check for a noncontiguous array subobject like + * p => dt(:)%m where dt has more than one component). + */ + cmp = mk_binop(OP_EQ, lenast, sizeast, DT_INT); + astnew = mk_stmt(A_IFTHEN, 0); + A_IFEXPRP(astnew, cmp); + std = add_stmt_after(astnew, std); + + return std; +} + +/** \brief Generate a contiguous pointer check on a pointer assignment + * when applicable. + * + * \param dest is the destination pointer. + * \param src is the pointer target. + * \param sdsc is an optional descriptor argument to pass to the check + * function (0 to use src's descriptor). + * \param srcLine is the line number associated with the check. + * \param cs is true when we are generating the check at a call-site. + * \param std is the optional statement descriptor for adding the check (0 + * if not applicable). + */ +void +gen_contig_check(int dest, int src, SPTR sdsc, int srcLine, bool cs, int std) +{ + int newargt, astnew; + SPTR src_sptr, dest_sptr, func; + bool isFuncCall, inlineContigCheck, ignoreNullTargets; + int argFlags; + + if (ast_is_sym(src)) { + src_sptr = memsym_of_ast(src); + } else { + interr("gen_contig_check: invalid src ast", src, 3); + src_sptr = 0; + } + + if (ast_is_sym(dest)) { + dest_sptr = memsym_of_ast(dest); + } else { + interr("gen_contig_check: invalid dest ast", dest, 3); + dest_sptr = 0; + } + isFuncCall = (RESULTG(dest_sptr) && FVALG(gbl.currsub) != dest_sptr); + /* If XBIT(54, 0x200) is set, we ignore null pointer targets. If + * we have an optional argument, then we need to igore it if it's + * null (i.e., not present). + */ + ignoreNullTargets = (XBIT(54, 0x200) || (SCG(dest_sptr) == SC_DUMMY && + OPTARGG(dest_sptr))); + if (CONTIGATTRG(dest_sptr) || (CONTIGATTRG(src_sptr) && isFuncCall)) { + int lineno, ptrnam, srcfil; + if (sdsc <= NOSYM) + sdsc = SDSCG(src_sptr); + if (sdsc <= NOSYM) + get_static_descriptor(src_sptr); + if (STYPEG(src_sptr) == ST_MEMBER) + sdsc = get_member_descriptor(src_sptr); + if (sdsc <= NOSYM) + sdsc = SDSCG(src_sptr); + lineno = mk_cval1(srcLine, DT_INT); + lineno = mk_unop(OP_VAL, lineno, DT_INT); + ptrnam = !isFuncCall ? getstring(SYMNAME(dest_sptr), + strlen(SYMNAME(dest_sptr))+1) : + getstring(SYMNAME(src_sptr), strlen(SYMNAME(src_sptr))+1); + srcfil = getstring(gbl.curr_file, strlen(gbl.curr_file)+1); + /* Check to see if we should inline the contiguity check. We do not + * currently inline it if the user is also generating checks at the + * call-site. Currently the inlining routine uses an argument structure + * that may conflict with the call-site (but not when we're generating + * checks for pointer assignments or arguments inside a callee). + * We could possibly support inlining at the call-site by deferring the + * check after we generate the call-site code. However, this may be + * a lot of work for something that probably will not be used too often. + * Generating checks for pointer assignments and for arguments inside a + * callee are typically sufficient. The only time one needs to check + * the call-site is when the called routine is inside a library that was + * not compiled with contiguity checking. + */ + inlineContigCheck = (XBIT(54, 0x100) && !cs); + if (inlineContigCheck) { + std = inline_contig_check(src, src_sptr, sdsc, std); + } + newargt = mk_argt(6); + ARGT_ARG(newargt, 0) = A_TYPEG(src) == A_SUBSCR ? A_LOPG(src) : src; + ARGT_ARG(newargt, 1) = STYPEG(sdsc) != ST_MEMBER ? mk_id(sdsc) : + check_member(src, mk_id(sdsc)); + ARGT_ARG(newargt, 2) = lineno; + ARGT_ARG(newargt, 3) = mk_id(ptrnam); + ARGT_ARG(newargt, 4) = mk_id(srcfil); + /* We can pass some flags about src here. For now, the flag is 1 if + * dest_sptr is an optional argument or if we do not want to flag null + * pointer targets. That way, we do not indicate a contiguity error + * if the argument is not present or if the pointer target is null. + */ + argFlags = mk_cval1( ignoreNullTargets ? 1 : 0, DT_INT); + argFlags = mk_unop(OP_VAL, argFlags, DT_INT); + ARGT_ARG(newargt, 5) = argFlags; + + func = mk_id(sym_mkfunc_nodesc(inlineContigCheck ? + mkRteRtnNm(RTE_contigerror) : + mkRteRtnNm(RTE_contigchk), DT_NONE)); + astnew = mk_func_node(A_CALL, func, 6, newargt); + if (inlineContigCheck) { + /* generate endifs for inline contiguity checks */ + std = add_stmt_after(astnew, std); + std = add_stmt_after(mk_stmt(A_ENDIF,0), std); + if (ignoreNullTargets) { + std = add_stmt_after(mk_stmt(A_ENDIF,0), std); + } + add_stmt_after(mk_stmt(A_ENDIF,0), std); + } else if (std) { + add_stmt_before(astnew, std); + } else { + add_stmt(astnew); + } + } +} + int mk_component_ast(int leaf, int parent, int src_ast) { diff --git a/tools/flang2/docs/xflag.n b/tools/flang2/docs/xflag.n index 600a6af98fb..15bc52c465c 100644 --- a/tools/flang2/docs/xflag.n +++ b/tools/flang2/docs/xflag.n @@ -1754,6 +1754,24 @@ from STOP commands as the program exit status. .XB 0x20: Assume that dummy arguments declared EXTERNAL are Fortran routines that were compiled with Flang. +.XB 0x40: +Enable contiguity pointer checks on pointer assignments and on actual arguments +inside callees. +.XB 0x80: +Enable contiguity pointer checks at call-sites. +.XB 0x100: +Use an alternate contiguity pointer check inline that checks whether the +pointer target's descriptor flags have __SEQUENTIAL_SECTION set and +whether the object's data type length match the descriptor's data type +length. This check is experimental and intended for pointer +assignments and actual arguments inside callees. This check cannot currently +be generated at a call-sites. The XBIT(54, 0x40) must also be enabled. If +XBIT(54, 0x80) is enabled, then we perform the contiguity check at +call-sites using a library routine. Note: In the case of an optional argument, +the inline check will also check whether the argument is present. +.XB 0x200: +When checking contiguity (using XBIT(54,0x40), XBIT(54,0x80), XBIT(54,0x100)), +do not flag null pointer targets as noncontiguous. .XF "55:" .XB 0x01: diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 39ba5dadb0e..532f0b55fc8 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -3678,7 +3678,7 @@ make_stmt(STMT_Type stmt_type, int ilix, bool deletable, SPTR next_bih_label, /* A builtin function that gets special handling. */ goto end_make_stmt; } - gen_call_expr(ilix, DT_NONE, NULL, SPTR_NULL); + gen_call_expr(ilix, DT_NONE, NULL, sym); break; continue_call: /* Add instruction if it hasn't been added already by gen_call_expr(). */ @@ -12564,13 +12564,32 @@ process_formal_arguments(LL_ABI_Info *abi) llvm_info.curr_func, "%s%s", get_llvm_name(arg->sptr), suffix); /* Emit code in the entry block that saves the argument into the local - * variable. The pointer bitcast takes care of the coercion. - * - * FIXME: What if the coerced type is larger than the local variable? - * We'll be writing outside its alloca. */ - if (store_addr->ll_type->sub_types[0] != arg->type) - store_addr = - make_bitcast(store_addr, ll_get_pointer_type(arg_op->ll_type)); + * variable. */ + if (store_addr->ll_type->sub_types[0] != arg->type) { + LL_Type *var_type = store_addr->ll_type->sub_types[0]; + if (ll_type_bytes(arg->type) > ll_type_bytes(var_type)) { + /* This can happen in C (but not C++) with a new-style declaration and + an old-style definition: + int f(int); + int f(c) char c; { ... } + Cast the argument value to the local variable type. */ + if (ll_type_int_bits(arg->type) && ll_type_int_bits(var_type)) { + arg_op = convert_operand(arg_op, var_type, I_TRUNC); + } else if (ll_type_is_fp(arg->type) && ll_type_is_fp(var_type)) { + arg_op = convert_operand(arg_op, var_type, I_FPTRUNC); + } else { + assert(false, + "process_formal_arguments: Function argument with mismatched " + "size that is neither integer nor floating-point", + 0, ERR_Fatal); + } + } else { + /* Use a pointer bitcast on the address of the local variable to coerce + the argument to the local variable type. */ + store_addr = + make_bitcast(store_addr, ll_get_pointer_type(arg_op->ll_type)); + } + } flags = ldst_instr_flags_from_dtype(formalsGetDtype(arg->sptr)); if (ftn_byval) { diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index 61788733370..4cf2e115612 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -5781,10 +5781,6 @@ charlen(SPTR sym) int nme; int addr; -#if DEBUG - assert(CLENG(sym) != 0, "charlen: sym not adjustable-length char", sym, - ERR_Severe); -#endif lensym = CLENG(sym); if (!INTERNREFG(lensym) && gbl.internal > 1 && INTERNREFG(sym)) { /* Its len is passed by value in aux.curr_entry->display after sym */ diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 91fb75954af..85cbfe00018 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -2478,17 +2478,10 @@ create_ref(SPTR sym, int *pnmex, int basenm, int baseilix, int *pclen, ADDRCAND(ilix, ILI_OPND(ilix, 2)); } else { if (dtype == DT_DEFERCHAR || dtype == DT_DEFERNCHAR) { - /* nondummy adjustable length character */ - assert(SCG(sym) == SC_BASED, "create_ref:IM_BASE op#2 not based sym", - sym, ERR_Severe); if (SDSCG(sym)) { clen = exp_get_sdsc_len(sym, 0, 0); } else { clen = charlen(sym); -#if DEBUG - assert(SDSCG(sym) != 0, "create_ref:Missing descriptor", sym, - ERR_Severe); -#endif } mxlen = 0; ADDRCAND(clen, ILI_OPND(clen, 2)); diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index d6afbe9a683..de81bf66178 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -950,6 +950,12 @@ tinfo_update_maptype(OMPACCEL_SYM *tsyms, int nargs, SPTR host_symbol, for (i = 0; i < nargs; ++i) { if (tsyms[i].host_sym == host_symbol) { tsyms[i].map_type = map_type; + if (STYPEG(tsyms[i].host_sym) != ST_ARRAY) { + /* if scalar variables are used in map clause, pass them by reference */ + if (map_type & OMP_TGT_MAPTYPE_FROM || map_type & OMP_TGT_MAPTYPE_TO) + PASSBYREFP(tsyms[i].device_sym, 1); + PASSBYVALP(tsyms[i].device_sym, 0); + } return true; } } @@ -962,6 +968,7 @@ ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym, { if (current_tinfo == nullptr) ompaccel_msg_interr("XXX", "Current target info is not found.\n"); + current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols] .private_sym = private_sym; current_tinfo->reduction_symbols[current_tinfo->n_reduction_symbols] @@ -973,9 +980,20 @@ ompaccel_tinfo_current_add_reductionitem(SPTR private_sym, SPTR shared_sym, // copied back to the host. PASSBYVALP(private_sym, 0); - ompaccel_tinfo_current_addupdate_mapitem( - (SPTR)HASHLKG(shared_sym), - OMP_TGT_MAPTYPE_TARGET_PARAM | OMP_TGT_MAPTYPE_TO | OMP_TGT_MAPTYPE_FROM); + /* Mark reduction variable as tofrom */ + if (ompaccel_tinfo_current_target_mode() == + mode_target_teams_distribute_parallel_for || + ompaccel_tinfo_current_target_mode() == + mode_target_teams_distribute_parallel_for_simd) + ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(private_sym), + OMP_TGT_MAPTYPE_TARGET_PARAM | + OMP_TGT_MAPTYPE_TO | + OMP_TGT_MAPTYPE_FROM); + else + ompaccel_tinfo_current_addupdate_mapitem((SPTR)HASHLKG(shared_sym), + OMP_TGT_MAPTYPE_TARGET_PARAM | + OMP_TGT_MAPTYPE_TO | + OMP_TGT_MAPTYPE_FROM); } void @@ -1291,7 +1309,7 @@ ompaccel_nvvm_emit_reduce(OMPACCEL_RED_SYM *ReductionItems, int NumReductions) mk_ompaccel_addsymbol(".rhs", dtypeReduceData, SC_DUMMY, ST_VAR); /* Generate function symbol */ - sprintf(name, "%s%d", "ompaccel.reductionfunc", reductionFunctionCounter++); + sprintf(name, "%s%d", "ompaccel_reduction", reductionFunctionCounter++); sptrFn = mk_ompaccel_function(name, 2, func_params, true); cr_block(); @@ -1360,7 +1378,7 @@ ompaccel_nvvm_emit_shuffle_reduce(OMPACCEL_RED_SYM *ReductionItems, PASSBYVALP(func_params[3], 1); /* Generate function symbol */ - sprintf(name, "%s%d", "ompaccel.shufflereduce", reductionFunctionCounter++); + sprintf(name, "%s%d", "ompaccel_shufflereduce", reductionFunctionCounter++); sptrFn = mk_ompaccel_function(name, 4, func_params, true); cr_block(); @@ -1444,7 +1462,7 @@ ompaccel_nvvm_emit_inter_warp_copy(OMPACCEL_RED_SYM *ReductionItems, DTYPE dtypeReductionItem; char name[30]; - sprintf(name, "%s%d", "ompaccel.InterWarpCopy", reductionFunctionCounter++); + sprintf(name, "%s%d", "ompaccel_InterWarpCopy", reductionFunctionCounter++); sptrReduceData = func_params[0] = mk_ompaccel_addsymbol( ".reduceData", mk_ompaccel_array_dtype(DT_INT8, NumReductions), SC_DUMMY, ST_ARRAY); @@ -1982,8 +2000,8 @@ exp_ompaccel_bteams(ILM *ilmp, int curilm, int outlinedCnt, SPTR uplevel_sptr, } if (flg.omptarget) { - ll_rewrite_ilms(-1, curilm, 0); - return; + ll_rewrite_ilms(-1, curilm, 0); + return; } if (XBIT(232, 0x1)) { @@ -2044,7 +2062,7 @@ exp_ompaccel_emap(ILM *ilmp, int curilm) { int ili; OMPACCEL_TINFO *targetinfo; - if(ompaccel_tinfo_has(gbl.currsub)) + if (ompaccel_tinfo_has(gbl.currsub)) return; ompaccel_symreplacer(true); targetinfo = ompaccel_tinfo_current_get(); diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 71954a66d4b..57832a4be0d 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -1647,11 +1647,13 @@ loadUplevelArgsForRegion(SPTR uplevel, SPTR taskAllocSptr, int count, if (do_load) { if (based) { - PARREFLOADP(based, 1); + /* PARREFLOAD is set if ADDRTKN of based was false */ + PARREFLOADP(based, !ADDRTKNG(based)); ADDRTKNP(based, 1); } else { - PARREFLOADP(sptr, 1); + /* PARREFLOAD is set if ADDRTKN of sptr was false */ + PARREFLOADP(sptr, !ADDRTKNG(sptr)); /* prevent optimizer to remove store instruction */ ADDRTKNP(sptr, 1); } diff --git a/tools/flang2/utils/ilmtp/aarch64/ilmtp.n b/tools/flang2/utils/ilmtp/aarch64/ilmtp.n index 35427a6e21a..6a1e0d8ffc9 100644 --- a/tools/flang2/utils/ilmtp/aarch64/ilmtp.n +++ b/tools/flang2/utils/ilmtp/aarch64/ilmtp.n @@ -820,10 +820,24 @@ p2 - fsource p3 - mask .OP ICMPZ t1 p3 ne .OP DSELECT r t1 p2 p1 -.IL CMERGE arth sym lnk lnk lnk -.AT spec -.IL CDMERGE arth sym lnk lnk lnk -.AT spec dcmplx +.IL CMERGE arth lnk lnk lnk +Real*4 complex valued f90 merge intrinsic. +.nf +p1 - tsource +p2 - fsource +p3 - mask +.fi +.OP ICMPZ t1 p3 ne +.OP CSSELECT r t1 p2 p1 +.IL CDMERGE arth lnk lnk lnk +Real*8 complex valued f90 merge intrinsic. +.nf +p1 - tsource +p2 - fsource +p3 - mask +.fi +.OP ICMPZ t1 p3 ne +.OP CDSELECT r t1 p2 p1 .IL IADD arth lnk lnk .OP IADD r p1 p2 .IL UIADD arth lnk lnk diff --git a/tools/flang2/utils/ilmtp/ppc64le/ilmtp.n b/tools/flang2/utils/ilmtp/ppc64le/ilmtp.n index 35427a6e21a..6a1e0d8ffc9 100644 --- a/tools/flang2/utils/ilmtp/ppc64le/ilmtp.n +++ b/tools/flang2/utils/ilmtp/ppc64le/ilmtp.n @@ -820,10 +820,24 @@ p2 - fsource p3 - mask .OP ICMPZ t1 p3 ne .OP DSELECT r t1 p2 p1 -.IL CMERGE arth sym lnk lnk lnk -.AT spec -.IL CDMERGE arth sym lnk lnk lnk -.AT spec dcmplx +.IL CMERGE arth lnk lnk lnk +Real*4 complex valued f90 merge intrinsic. +.nf +p1 - tsource +p2 - fsource +p3 - mask +.fi +.OP ICMPZ t1 p3 ne +.OP CSSELECT r t1 p2 p1 +.IL CDMERGE arth lnk lnk lnk +Real*8 complex valued f90 merge intrinsic. +.nf +p1 - tsource +p2 - fsource +p3 - mask +.fi +.OP ICMPZ t1 p3 ne +.OP CDSELECT r t1 p2 p1 .IL IADD arth lnk lnk .OP IADD r p1 p2 .IL UIADD arth lnk lnk diff --git a/tools/shared/rte.h b/tools/shared/rte.h index bca8502829c..e86d05e5db7 100644 --- a/tools/shared/rte.h +++ b/tools/shared/rte.h @@ -210,3 +210,4 @@ extern void set_preserve_descriptor(int); extern void set_descriptor_class(int); extern void set_final_descriptor(int); void set_descriptor_sc(int sc); +int get_header_member_with_parent(int parent, int sdsc, int info); diff --git a/tools/shared/rtlRtns.c b/tools/shared/rtlRtns.c index f696eaa05f4..476f1f6d307 100644 --- a/tools/shared/rtlRtns.c +++ b/tools/shared/rtlRtns.c @@ -91,6 +91,8 @@ FtnRteRtn ftnRtlRtns[] = { {"conformable_dnv", "", true, ""}, {"conformable_ndv", "", true, ""}, {"conformable_nnv", "", true, ""}, + {"contigchk", "", true, ""}, + {"contigerror", "", true, ""}, {"copy_f77_argl", "", true, ""}, {"copy_f77_argsl", "", true, ""}, {"copy_f90_argl", "", true, ""}, diff --git a/tools/shared/rtlRtns.h b/tools/shared/rtlRtns.h index 83e8d92410b..d2c31441318 100644 --- a/tools/shared/rtlRtns.h +++ b/tools/shared/rtlRtns.h @@ -96,6 +96,8 @@ typedef enum { RTE_conformable_dnv, RTE_conformable_ndv, RTE_conformable_nnv, + RTE_contigchk, + RTE_contigerror, RTE_copy_f77_argl, RTE_copy_f77_argsl, RTE_copy_f90_argl,