From 62f21e93446e93d816305a517f54baadd26477e1 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 28 Jan 2022 13:36:44 +0100 Subject: [PATCH 01/15] Start new control flow for SPMD --- tools/flang2/flang2exe/kmpcutil.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 72374f6c4d..2f7c374eaf 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1734,6 +1734,7 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) args[2] = ad_icon(2); /* SPMD Mode */ args[1] = ad_icon(0); /* UseGenericStateMachine */ args[0] = ad_icon(0); /* RequiresFullRuntime */ +// args[0] = ad_icon(1); /* RequiresFullRuntime */ } else { args[2] = ad_icon(1); /* Generic mode */ args[1] = ad_icon(1); /* UseGenericStateMachine */ @@ -1744,7 +1745,6 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) // AOCC Begin #ifdef OMP_OFFLOAD_AMD - int ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) { From f5e8e7d960e905c9ed22c6f766f4b30e0e1a79cc Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 28 Jan 2022 15:19:53 +0100 Subject: [PATCH 02/15] Modify deinit calls --- tools/flang2/flang2exe/exp_rte.cpp | 10 ++++++---- tools/flang2/flang2exe/expand.cpp | 9 ++++++++- tools/flang2/flang2exe/kmpcutil.cpp | 1 + 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index 011e8047c4..c8d711abde 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2158,10 +2158,12 @@ exp_end(ILM *ilmp, int curilm, bool is_func) int ilix; if (flg.omptarget && !is_func) { if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !OMPACCFUNCDEVG(gbl.currsub) /*is_gpu_output_file() */ ) { - ilix = ll_make_kmpc_target_deinit( - ompaccel_tinfo_get(gbl.currsub)->mode); - iltb.callfg = 1; - chk_block(ilix); + OMP_TARGET_MODE mode = ompaccel_tinfo_get(gbl.currsub)->mode; + if (!is_SPMD_mode(mode)) { + ilix = ll_make_kmpc_target_deinit(mode); + iltb.callfg = 1; + chk_block(ilix); + } } } #endif diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 12dd61e1ac..fd1df689a0 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -451,6 +451,10 @@ eval_ilm_argument1(int opr, ILM *ilmpx, int ilmx) } } /* eval_ilm_argument1 */ +static void add_instruction(int ilix) +{ +} + void eval_ilm(int ilmx) { @@ -714,8 +718,11 @@ eval_ilm(int ilmx) exp_label(target_code_lab); if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { - iltb.callfg = 1; ilix = ll_make_kmpc_global_thread_num(); + iltb.callfg = 1; + chk_block(ilix); + ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); + iltb.callfg = 1; chk_block(ilix); } diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 2f7c374eaf..a68f44da6d 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1755,6 +1755,7 @@ ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) if (is_SPMD_mode(mode)) { args[1] = ad_icon(2); /* SPMD Mode */ args[0] = ad_icon(0); /* RequiresFullRuntime */ +// args[0] = ad_icon(1); /* RequiresFullRuntime */ } else { args[1] = ad_icon(1); /* Generic mode */ args[0] = ad_icon(1); /* RequiresFullRuntime */ From bf15996268218755cd701c50ff536c83dc487d1e Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 1 Feb 2022 13:04:35 +0100 Subject: [PATCH 03/15] Added first version of calling kmpc_parallel_51 Done: Added declaration of kmpc_parallel_51 Set constant args Added logic for setting up size of array which will contain target symbols Calling kmpc_parallel_51 Moved deinit function just after kmpc_parallel_51 Not done: Passing symbols to target array Creating separate function which will reflect kernel code Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 1 + tools/flang2/flang2exe/kmpcutil.cpp | 44 +++++++++++++++++++++++++++-- tools/flang2/flang2exe/kmpcutil.h | 7 +++++ tools/flang2/flang2exe/tgtutil.h | 5 ++++ 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index fd1df689a0..29fc63625c 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -719,6 +719,7 @@ eval_ilm(int ilmx) if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { ilix = ll_make_kmpc_global_thread_num(); + ilix = ll_make_kmpc_parallel_51(ilix, ompaccel_tinfo_get(gbl.currsub)); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index a68f44da6d..34ab733565 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -188,10 +188,13 @@ static class ClassKmpcApiCalls break; case KMPC_API_SPMD_KERNEL_INIT: return {"__kmpc_spmd_kernel_init", IL_NONE, DT_VOID_NONE, 0}; + // AOCC Begin case KMPC_API_TARGET_INIT: return {"__kmpc_target_init_v1", IL_NONE, DT_INT, 0}; break; - // AOCC Begin + case KMPC_API_PARALLEL_51: + return {"__kmpc_parallel_51", IL_NONE, DT_INT, 0}; + break; #ifdef OMP_OFFLOAD_AMD case KMPC_API_TARGET_DEINIT: return {"__kmpc_target_deinit_v1", IL_NONE, DT_VOID_NONE, 0}; @@ -313,9 +316,11 @@ static const struct kmpc_api_entry_t kmpc_api_calls[] = { KMPC_FLAG_STR_FMT}, [KMPC_API_SPMD_KERNEL_INIT] = {"__kmpc_spmd_kernel_init", 0, DT_VOID_NONE, 0}, + // AOCC Begin [KMPC_API_TARGET_INIT] = {"__kmpc_target_init_v1", 0, DT_INT, 0}, - // AOCC Begin + [KMPC_API_PARALLEL_51] = {"__kmpc_parallel_51", 0, DT_INT, + 0}, #ifdef OMP_OFFLOAD_AMD [KMPC_API_TARGET_DEINIT] = {"__kmpc_target_deinit_v1", 0, DT_VOID_NONE, 0}, @@ -1743,6 +1748,41 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) return mk_kmpc_api_call(KMPC_API_TARGET_INIT, 4, arg_types, args); } +int +ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) +{ + static int id; + int n_symbols = symbols->n_symbols; + DTYPE arg_types[9]; + DTYPE void_ptr_t = create_dtype_funcprototype(); + DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); + DTYPE arr_dtype; + int args[9]; + SPTR array = make_array_sptr("captured_vars_addrs", void_ptr_t, n_symbols); + + arg_types[0] = DT_CPTR; /* ident */ + arg_types[1] = DT_INT; /* global_tid */ + arg_types[2] = DT_INT; /* if_expr */ + arg_types[3] = DT_INT; /* num_threads */ + arg_types[4] = DT_INT; /* proc_bind */ + arg_types[5] = void_ptr_t; /* fn */ + arg_types[6] = void_ptr_t; /* wrapper_fn */ + arg_types[7] = void_ptr_ptr_t; /* args */ + arg_types[8] = DT_INT; /* n_args */ + + args[8] = gen_null_arg(); /* ident */ + args[7] = global_tid_sptr; /* global_tid */ + args[6] = ad_icon(1); /* if_expr */ + args[5] = ad_icon(-1); /* num_threads */ + args[4] = ad_icon(-1); /* proc_bind */ + args[3] = gen_null_arg(); /* fn */ + args[2] = gen_null_arg(); /* wrapper_fn */ + args[1] = ad_acon(array, 0); /* args */ + args[0] = ad_icon(n_symbols); /* n_args */ + + return mk_kmpc_api_call(KMPC_API_PARALLEL_51, 9, arg_types, args); +} + // AOCC Begin #ifdef OMP_OFFLOAD_AMD int diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 696aaf026b..89610c0fc2 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -28,6 +28,7 @@ #include "symtab.h" #include "ili.h" //AOCC Begin +typedef struct _OMPACCEL_TARGET OMPACCEL_TINFO; #include "llmputil.h" //AOCC End /** \file @@ -175,6 +176,7 @@ enum { KMPC_API_TARGET_INIT, KMPC_API_SPMD_KERNEL_INIT, // AOCC Begin + KMPC_API_PARALLEL_51, #ifdef OMP_OFFLOAD_AMD KMPC_API_TARGET_DEINIT, KMPC_API_SPMD_KERNEL_DEINIT_V2, @@ -503,6 +505,11 @@ int ll_make_kmpc_for_static_init_simple_spmd(const loop_args_t *, int); int ll_make_kmpc_target_init(OMP_TARGET_MODE); // AOCC Begin +/** + \brief Generate kmpc_parallel_51 function call +*/ +int ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * args); + #ifdef OMP_OFFLOAD_AMD /** \brief kernel deinit diff --git a/tools/flang2/flang2exe/tgtutil.h b/tools/flang2/flang2exe/tgtutil.h index cb034ed79f..d8d36ec76f 100644 --- a/tools/flang2/flang2exe/tgtutil.h +++ b/tools/flang2/flang2exe/tgtutil.h @@ -145,4 +145,9 @@ DTYPE ll_make_tgt_offload_entry(char *); void init_tgtutil(); +/** + \brief Create array sptr + */ +SPTR +make_array_sptr(char *name, DTYPE atype, int arraysize); #endif /* __TGT_RUNTIME_H__ */ From fce469492815bce98cdabcc65b2ee3c37f9f0dd7 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 4 Feb 2022 23:28:36 +0100 Subject: [PATCH 04/15] Add function which initializes mapped symbols. kmpc_parallel_51 requires that offloaded symbols are passed as addresses inside pointer array Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 39 +++++++++++++++++++++++-- tools/flang2/flang2exe/kmpcutil.cpp | 44 ++++++++++++++++++++--------- tools/flang2/flang2exe/kmpcutil.h | 3 +- 3 files changed, 70 insertions(+), 16 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 29fc63625c..9e48c12bfc 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -54,6 +54,7 @@ #ifdef OMP_OFFLOAD_LLVM #include "tgtutil.h" #include "kmpcutil.h" +#include #endif extern int in_extract_inline; /* Bottom-up auto-inlining */ @@ -451,8 +452,36 @@ eval_ilm_argument1(int opr, ILM *ilmpx, int ilmx) } } /* eval_ilm_argument1 */ -static void add_instruction(int ilix) +static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) { + int num_of_symbols = orig_symbols->n_symbols; + char allocated_symbol_name[128]; + SPTR allocated_symbol; + std::vector init_symbols(orig_symbols->n_symbols); + int store_instr; + int load_instr; + for (unsigned i = 0; i < num_of_symbols; ++i) { + snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), + ".allocated_symbol_%d", i); + allocated_symbol = getsymbol(allocated_symbol_name); + STYPEP(allocated_symbol, ST_VAR); + DTYPEP(allocated_symbol, + get_type(2,TY_PTR,DTYPEG(orig_symbols->symbols[i].device_sym))); + SCP(allocated_symbol, SC_AUTO); + store_instr = ad4ili(IL_ST, + ad_acon(orig_symbols->symbols[i].device_sym,0), + ad_acon(allocated_symbol,0), + addnme(NT_VAR, allocated_symbol, 0,0), + MSZ_I8); + chk_block(store_instr); + load_instr = mk_ompaccel_ldsptr(allocated_symbol); + chk_block(load_instr); + + init_symbols[i] = load_instr; + + } + return init_symbols; + } void @@ -692,6 +721,10 @@ eval_ilm(int ilmx) * sharing model. It does extra work and allocates device on-chip memory. * */ if (XBIT(232, 0x40) && gbl.ompaccel_intarget) { + std::vector allocated_symbols; + if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { + allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); + } ilix = ll_make_kmpc_target_init(ompaccel_tinfo_get(gbl.currsub)->mode); /* Generate new control flow for generic kernel */ @@ -719,7 +752,9 @@ eval_ilm(int ilmx) if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { ilix = ll_make_kmpc_global_thread_num(); - ilix = ll_make_kmpc_parallel_51(ilix, ompaccel_tinfo_get(gbl.currsub)); + iltb.callfg = 1; + chk_block(ilix); + ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 34ab733565..188a4ceac1 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -27,6 +27,7 @@ #define _GNU_SOURCE // for vasprintf() #include +#include #undef _GNU_SOURCE #include "kmpcutil.h" #include "error.h" @@ -1749,17 +1750,34 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) } int -ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) +ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) { static int id; - int n_symbols = symbols->n_symbols; + int n_symbols = symbols.size(); DTYPE arg_types[9]; - DTYPE void_ptr_t = create_dtype_funcprototype(); + DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); DTYPE arr_dtype; int args[9]; - SPTR array = make_array_sptr("captured_vars_addrs", void_ptr_t, n_symbols); + SPTR captured_vars = make_array_sptr(const_cast("captured_vars_addrs"), + void_ptr_t, + n_symbols); + int ilix; + int nme_args = add_arrnme(NT_ARR, + captured_vars, + addnme(NT_VAR, captured_vars, 0, 0), + 0, + ad_icon(0), + FALSE); + for (unsigned i = 0; i < symbols.size(); ++i) { + ilix = mk_ompaccel_store(symbols[i], + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + chk_block(ilix); + } +// chk_block(ilix); arg_types[0] = DT_CPTR; /* ident */ arg_types[1] = DT_INT; /* global_tid */ arg_types[2] = DT_INT; /* if_expr */ @@ -1770,15 +1788,15 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * symbols) arg_types[7] = void_ptr_ptr_t; /* args */ arg_types[8] = DT_INT; /* n_args */ - args[8] = gen_null_arg(); /* ident */ - args[7] = global_tid_sptr; /* global_tid */ - args[6] = ad_icon(1); /* if_expr */ - args[5] = ad_icon(-1); /* num_threads */ - args[4] = ad_icon(-1); /* proc_bind */ - args[3] = gen_null_arg(); /* fn */ - args[2] = gen_null_arg(); /* wrapper_fn */ - args[1] = ad_acon(array, 0); /* args */ - args[0] = ad_icon(n_symbols); /* n_args */ + args[8] = gen_null_arg(); /* ident */ + args[7] = global_tid_sptr; /* global_tid */ + args[6] = ad_icon(1); /* if_expr */ + args[5] = ad_icon(-1); /* num_threads */ + args[4] = ad_icon(-1); /* proc_bind */ + args[3] = gen_null_arg(); /* fn */ + args[2] = gen_null_arg(); /* wrapper_fn */ + args[1] = ad_acon(captured_vars, 0); /* args */ + args[0] = ad_icon(n_symbols); /* n_args */ return mk_kmpc_api_call(KMPC_API_PARALLEL_51, 9, arg_types, args); } diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 89610c0fc2..9c6dd610c3 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -30,6 +30,7 @@ //AOCC Begin typedef struct _OMPACCEL_TARGET OMPACCEL_TINFO; #include "llmputil.h" +#include //AOCC End /** \file * \brief Various definitions for the kmpc runtime @@ -508,7 +509,7 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE); /** \brief Generate kmpc_parallel_51 function call */ -int ll_make_kmpc_parallel_51(int global_tid_sptr, OMPACCEL_TINFO * args); +int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &); #ifdef OMP_OFFLOAD_AMD /** From 063001a99afd2cfa0ac1410f83f69cb345ee4ab7 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Fri, 11 Mar 2022 10:23:14 +0100 Subject: [PATCH 05/15] Add support for SPMD kernels with new OpenMP API. Flang generates new SPMD kernels which use kmpc_parallel_51 function. Signed-off-by: Dominik Adamski --- tools/flang1/flang1exe/symtab.c | 3 +- tools/flang2/flang2exe/cgmain.cpp | 3 +- tools/flang2/flang2exe/exp_ftn.cpp | 6 +- tools/flang2/flang2exe/exp_ftn.h | 2 +- tools/flang2/flang2exe/exp_rte.cpp | 4 +- tools/flang2/flang2exe/exp_rte.h | 2 +- tools/flang2/flang2exe/expand.cpp | 97 ++++++++++++++++++++----- tools/flang2/flang2exe/expsmp.cpp | 6 +- tools/flang2/flang2exe/expsmp.h | 2 + tools/flang2/flang2exe/kmpcutil.cpp | 30 +++++--- tools/flang2/flang2exe/kmpcutil.h | 2 +- tools/flang2/flang2exe/llassem.cpp | 3 +- tools/flang2/flang2exe/ompaccel_x86.cpp | 2 +- tools/flang2/flang2exe/outliner.cpp | 70 +++++++++++++++++- tools/flang2/flang2exe/outliner.h | 5 +- 15 files changed, 189 insertions(+), 48 deletions(-) diff --git a/tools/flang1/flang1exe/symtab.c b/tools/flang1/flang1exe/symtab.c index ad7cb62500..a423aef3a4 100644 --- a/tools/flang1/flang1exe/symtab.c +++ b/tools/flang1/flang1exe/symtab.c @@ -479,7 +479,8 @@ get_ieee_arith_intrin(char *nm) int getsymbol(const char *name) { - return getsym(name, strlen(name)); + int sym = getsym(name, strlen(name)); + return sym; } /** \brief Enter symbol with indicated name into symbol table, initialize diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 797024be65..1b33c6ee1c 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -4301,7 +4301,7 @@ make_stmt(STMT_Type stmt_type, int ilix, bool deletable, SPTR next_bih_label, int alignment; INSTR_LIST *Curr_Instr; - DBGTRACEIN2(" type: %s ilix: %d", stmt_names[stmt_type], ilix) + DBGTRACEIN2(" type: %s ilix: %d", stmt_names[stmt_type], ilix); curr_stmt_type = stmt_type; if (last_stmt_is_branch && stmt_type != STMT_LABEL) { @@ -14063,7 +14063,6 @@ process_formal_arguments(LL_ABI_Info *abi) /* Other by-value kinds. */ break; } - /* This op represents the real LLVM argument, not the local variable. */ arg_op = make_operand(); arg_op->ot_type = OT_VAR; diff --git a/tools/flang2/flang2exe/exp_ftn.cpp b/tools/flang2/flang2exe/exp_ftn.cpp index a2ad8d618b..9de771c26e 100644 --- a/tools/flang2/flang2exe/exp_ftn.cpp +++ b/tools/flang2/flang2exe/exp_ftn.cpp @@ -4147,7 +4147,7 @@ exp_bran(ILM_OP opc, ILM *ilmp, int curilm) /***************************************************************/ void -exp_misc(ILM_OP opc, ILM *ilmp, int curilm) +exp_misc(ILM_OP opc, ILM *ilmp, int curilm, bool process_expanded) { int tmp; int ilix, listilix; @@ -4289,11 +4289,11 @@ exp_misc(ILM_OP opc, ILM *ilmp, int curilm) break; case IM_ENDF: - exp_end(ilmp, curilm, true); + exp_end(ilmp, curilm, true, process_expanded); break; case IM_END: - exp_end(ilmp, curilm, false); + exp_end(ilmp, curilm, false, process_expanded); break; case IM_BYVAL: diff --git a/tools/flang2/flang2exe/exp_ftn.h b/tools/flang2/flang2exe/exp_ftn.h index 9857b6a9cb..4832002ca6 100644 --- a/tools/flang2/flang2exe/exp_ftn.h +++ b/tools/flang2/flang2exe/exp_ftn.h @@ -53,7 +53,7 @@ void exp_bran(ILM_OP opc, ILM *ilmp, int curilm); /** \brief ... */ -void exp_misc(ILM_OP opc, ILM *ilmp, int curilm); +void exp_misc(ILM_OP opc, ILM *ilmp, int curilm, bool process_expanded = false); /** \brief ... diff --git a/tools/flang2/flang2exe/exp_rte.cpp b/tools/flang2/flang2exe/exp_rte.cpp index c8d711abde..744a8c28b0 100644 --- a/tools/flang2/flang2exe/exp_rte.cpp +++ b/tools/flang2/flang2exe/exp_rte.cpp @@ -2139,7 +2139,7 @@ exp_alloca(ILM *ilmp) static void gen_funcret(finfo_t *); void -exp_end(ILM *ilmp, int curilm, bool is_func) +exp_end(ILM *ilmp, int curilm, bool is_func, bool process_expanded) { int tmp; int op1; @@ -2159,7 +2159,7 @@ exp_end(ILM *ilmp, int curilm, bool is_func) if (flg.omptarget && !is_func) { if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !OMPACCFUNCDEVG(gbl.currsub) /*is_gpu_output_file() */ ) { OMP_TARGET_MODE mode = ompaccel_tinfo_get(gbl.currsub)->mode; - if (!is_SPMD_mode(mode)) { + if (!is_SPMD_mode(mode) && !process_expanded) { ilix = ll_make_kmpc_target_deinit(mode); iltb.callfg = 1; chk_block(ilix); diff --git a/tools/flang2/flang2exe/exp_rte.h b/tools/flang2/flang2exe/exp_rte.h index b8a5c9ce16..7d8394c9da 100644 --- a/tools/flang2/flang2exe/exp_rte.h +++ b/tools/flang2/flang2exe/exp_rte.h @@ -100,7 +100,7 @@ void exp_cgoto(ILM *ilmp, int curilm); /** \brief ... */ -void exp_end(ILM *ilmp, int curilm, bool is_func); +void exp_end(ILM *ilmp, int curilm, bool is_func, bool process_expanded = false); /** \brief ... diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 9e48c12bfc..63d5193f90 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -55,6 +55,7 @@ #include "tgtutil.h" #include "kmpcutil.h" #include +#include #endif extern int in_extract_inline; /* Bottom-up auto-inlining */ @@ -63,6 +64,8 @@ static int create_ref(SPTR sym, int *pnmex, int basenm, int baseilix, int *pclen, int *pmxlen, int *prestype); static int jsr2qjsr(int); +SPTR +eval_ilm_check_if_skip(int ilmx, int *skip_expand = nullptr, int *process_expanded = nullptr); #define DO_PFO ((XBIT(148, 0x1000) && !XBIT(148, 0x4000)) || XBIT(148, 1)) /***************************************************************/ @@ -216,7 +219,6 @@ parse_im_file(const ILM *ilmp, int *lineno_out, int *findex_out, int *ftag_out) } /***************************************************************/ - /** \brief Expand ILMs to ILIs */ int expand(void) @@ -230,12 +232,30 @@ expand(void) int last_ftag = 0; int nextftag = 0, nextfindex = 0; int last_cpp_branch = 0; - + static int skip_expand; + static int skip_expand_sptr; + static std::map process_expanded_map = std::map(); + auto it = process_expanded_map.find(gbl.currsub); + int process_expanded = 0; + if (it != process_expanded_map.end()) + { + process_expanded = it->second; + } + else + { + process_expanded = 0; + } /* * NOTE, for an ILM: ilmx is needed to access the ILM_AUX area, ilmp is * needed to access the ILM area */ exp_init(); + + //set current target info if given target region was already processed + if(ompaccel_tinfo_get(gbl.currsub)) + { + ompaccel_tinfo_current_set(ompaccel_tinfo_get(gbl.currsub)); + } /* During expand, we want to generate unique proc ili each time a * proc ILM is processed. The assumption is that the scheduler will * cse a proc ili if it appears multiple times in a block. E.g., @@ -300,7 +320,13 @@ expand(void) ilmp = (ILM *)(ilmb.ilm_base + ilmx); opc = ILM_OPC(ilmp); - + /* Do not expand map statements for helper function for kmpc_parallel_51 */ + if ((opc == IM_MP_MAP || opc == IM_MP_EMAP) && process_expanded) + continue; + if (process_expanded) + { + gbl.ompoutlinedfunc = gbl.currsub; + } if (opc == IM_BR) { last_cpp_branch = ILM_OPND(ilmp, 1); } else if (opc == IM_LABEL) { @@ -319,8 +345,17 @@ expand(void) * variable operands */ if (IM_TRM(opc)) { int cur_label = BIH_LABEL(expb.curbih); - eval_ilm(ilmx); - } + if (!skip_expand){ + SPTR sptr1 = eval_ilm_check_if_skip(ilmx, &skip_expand, &process_expanded); + if (skip_expand) { + skip_expand_sptr = sptr1; + process_expanded_map[skip_expand_sptr] = 1; + ll_write_ilm_header((int)sptr1, ilmx); + restartRewritingILM(ilmx); + } + } else { + ll_rewrite_ilms(-1, ilmx, len); + }} else if (flg.smp && len) { ll_rewrite_ilms(-1, ilmx, len); } @@ -367,7 +402,6 @@ expand(void) new_callee_scope = 0; } while (opc != IM_END && opc != IM_ENDF); - if (DBGBIT(10, 2) && (bihb.stg_avail != 1)) { int bih; for (bih = 1; bih != 0; bih = BIH_NEXT(bih)) { @@ -424,6 +458,13 @@ expand(void) } else { fihb.nextfindex = fihb.currfindex = 1; } + if (skip_expand && !process_expanded) + { + process_expanded = 1; + unsetRewritingILM(); + } + skip_expand = 0; + return expb.nilms; } @@ -457,10 +498,12 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int num_of_symbols = orig_symbols->n_symbols; char allocated_symbol_name[128]; SPTR allocated_symbol; - std::vector init_symbols(orig_symbols->n_symbols); + std::vector init_symbols{}; int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { + if (DTYPEG(orig_symbols->symbols[i].device_sym) != DT_INT8) + continue; snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); allocated_symbol = getsymbol(allocated_symbol_name); @@ -477,17 +520,22 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) load_instr = mk_ompaccel_ldsptr(allocated_symbol); chk_block(load_instr); - init_symbols[i] = load_instr; + init_symbols.push_back(load_instr); } return init_symbols; } +void eval_ilm(int ilmx) +{ + eval_ilm_check_if_skip(ilmx, nullptr, nullptr); +} -void -eval_ilm(int ilmx) +SPTR +eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) { + SPTR sptr1 = SPTR_NULL; ILM *ilmpx; int noprs, /* number of operands in the ILM */ ilix, /* ili index */ @@ -511,7 +559,7 @@ eval_ilm(int ilmx) /* Set line no for EPARx */ gbl.lineno = ILM_OPND(ilmpx, 1); } - return; + return sptr1; } } @@ -543,12 +591,12 @@ eval_ilm(int ilmx) } } else if (opcx == IM_MP_EREDUCTION) { ompaccel_notify_reduction(false); - return; + return sptr1; } } if (ompaccel_is_reduction_region()) - return; + return sptr1; } #endif /*- @@ -647,7 +695,7 @@ eval_ilm(int ilmx) if (IM_I8(opcx)) ILM_RESTYPE(ilmx) = ILM_ISI8; - return; + return sptr1; } switch (IM_TYPE(opcx)) { /* special-cased ILM */ @@ -678,7 +726,10 @@ eval_ilm(int ilmx) break; case IMTY_MISC: /* miscellaneous */ - exp_misc(opcx, ilmpx, ilmx); + if (process_expanded && *process_expanded) + exp_misc(opcx, ilmpx, ilmx, true); + else + exp_misc(opcx, ilmpx, ilmx); break; case IMTY_FSTR: /* fortran string */ @@ -720,10 +771,11 @@ eval_ilm(int ilmx) /* We do not initialize spmd kernel library since we do not use spmd data * sharing model. It does extra work and allocates device on-chip memory. * */ - if (XBIT(232, 0x40) && gbl.ompaccel_intarget) { + if (XBIT(232, 0x40) && gbl.ompaccel_intarget && !*process_expanded) { + //TODO move initialization to separate function std::vector allocated_symbols; if (is_SPMD_mode(ompaccel_tinfo_get(gbl.currsub)->mode)) { - allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); + allocated_symbols = get_allocated_symbols(ompaccel_tinfo_get(gbl.currsub)); } ilix = ll_make_kmpc_target_init(ompaccel_tinfo_get(gbl.currsub)->mode); @@ -754,12 +806,20 @@ eval_ilm(int ilmx) ilix = ll_make_kmpc_global_thread_num(); iltb.callfg = 1; chk_block(ilix); - ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols); + sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); + ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols, sptr1); iltb.callfg = 1; chk_block(ilix); ilix = ll_make_kmpc_target_deinit(ompaccel_tinfo_get(gbl.currsub)->mode); iltb.callfg = 1; chk_block(ilix); + expb.curilt = addilt(expb.curilt, ad1ili(IL_EXIT, gbl.currsub)); + BIH_XT(expb.curbih) = 1; + BIH_LAST(expb.curbih) = 1; + wr_block(); + if (skip_expand && process_expanded && (*process_expanded == 0)){ + *skip_expand = 1; + } } iltb.callfg = 1; @@ -770,6 +830,7 @@ eval_ilm(int ilmx) #endif if (IM_I8(opcx)) ILM_RESTYPE(ilmx) = ILM_ISI8; + return sptr1; } /***************************************************************/ diff --git a/tools/flang2/flang2exe/expsmp.cpp b/tools/flang2/flang2exe/expsmp.cpp index 8cc3d1b50f..4f340845ac 100644 --- a/tools/flang2/flang2exe/expsmp.cpp +++ b/tools/flang2/flang2exe/expsmp.cpp @@ -69,8 +69,6 @@ std::list targetVector; int HasRequiresUnifiedSharedMemory = false; // AOCC End -static int incrOutlinedCnt(void); -static int decrOutlinedCnt(void); static int getOutlinedTemp(char *, int); static int isUnnamedCs(int); static int addMpUnp(void); @@ -3270,7 +3268,7 @@ no_pad_func(char *fname) NOPADP(sptr, 1); } -static int +int decrOutlinedCnt(void) { outlinedCnt--; @@ -3281,7 +3279,7 @@ decrOutlinedCnt(void) return outlinedCnt; } -static int +int incrOutlinedCnt(void) { parCnt++; diff --git a/tools/flang2/flang2exe/expsmp.h b/tools/flang2/flang2exe/expsmp.h index 3a17885011..923f737af2 100644 --- a/tools/flang2/flang2exe/expsmp.h +++ b/tools/flang2/flang2exe/expsmp.h @@ -114,4 +114,6 @@ void section_create_endblock(SPTR endLabel); /// \brief ... LLTask* llGetTask(int scope); +int incrOutlinedCnt(void); +int decrOutlinedCnt(void); #endif // EXPSMP_H_ diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 188a4ceac1..184850e1cd 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1750,10 +1750,10 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) } int -ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) +ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR helper_func) { static int id; - int n_symbols = symbols.size(); + int n_symbols = ompaccel_tinfo_get(gbl.currsub)->n_symbols;//2;//symbols.size(); DTYPE arg_types[9]; DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); @@ -1769,15 +1769,25 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) 0, ad_icon(0), FALSE); - for (unsigned i = 0; i < symbols.size(); ++i) { - ilix = mk_ompaccel_store(symbols[i], - DT_INT8, - nme_args, - ad_acon(captured_vars, i * TARGET_PTRSIZE)); + int j = 0; + for (unsigned i = 0; i < n_symbols; ++i) { + if (DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) == DT_INT8) { + ilix = mk_ompaccel_store(symbols[j++], + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } + else { + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } chk_block(ilix); } + -// chk_block(ilix); arg_types[0] = DT_CPTR; /* ident */ arg_types[1] = DT_INT; /* global_tid */ arg_types[2] = DT_INT; /* if_expr */ @@ -1786,14 +1796,14 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols) arg_types[5] = void_ptr_t; /* fn */ arg_types[6] = void_ptr_t; /* wrapper_fn */ arg_types[7] = void_ptr_ptr_t; /* args */ - arg_types[8] = DT_INT; /* n_args */ + arg_types[8] = DT_INT8; /* n_args */ args[8] = gen_null_arg(); /* ident */ args[7] = global_tid_sptr; /* global_tid */ args[6] = ad_icon(1); /* if_expr */ args[5] = ad_icon(-1); /* num_threads */ args[4] = ad_icon(-1); /* proc_bind */ - args[3] = gen_null_arg(); /* fn */ + args[3] = ad_acon(helper_func, 0); args[2] = gen_null_arg(); /* wrapper_fn */ args[1] = ad_acon(captured_vars, 0); /* args */ args[0] = ad_icon(n_symbols); /* n_args */ diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index 9c6dd610c3..e872e6e010 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -509,7 +509,7 @@ int ll_make_kmpc_target_init(OMP_TARGET_MODE); /** \brief Generate kmpc_parallel_51 function call */ -int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &); +int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &, SPTR); #ifdef OMP_OFFLOAD_AMD /** diff --git a/tools/flang2/flang2exe/llassem.cpp b/tools/flang2/flang2exe/llassem.cpp index fc1b204f71..02b1e2cab9 100644 --- a/tools/flang2/flang2exe/llassem.cpp +++ b/tools/flang2/flang2exe/llassem.cpp @@ -406,8 +406,9 @@ find_ag(const char *ag_name) int hashval = name_to_hash(ag_name, strlen(ag_name)); for (gblsym = agb.hashtb[hashval]; gblsym; gblsym = AG_HASHLK(gblsym)) - if (!strcmp(ag_name, AG_NAME(gblsym))) + if (!strcmp(ag_name, AG_NAME(gblsym))){ return gblsym; + } return SPTR_NULL; } diff --git a/tools/flang2/flang2exe/ompaccel_x86.cpp b/tools/flang2/flang2exe/ompaccel_x86.cpp index 8bafa7e6b3..d0359fac04 100644 --- a/tools/flang2/flang2exe/ompaccel_x86.cpp +++ b/tools/flang2/flang2exe/ompaccel_x86.cpp @@ -170,7 +170,7 @@ void ompaccel_x86_fix_arg_types(SPTR func_sptr) { } OMPACCEL_TINFO *tinfo = ompaccel_tinfo_get(func_sptr); - + if (!tinfo) return; // Remember all the reduction symbols of func_sptr so that we can blacklist // them during the type update. std::set reduc_syms; diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index be22268474..713bd402df 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -477,12 +477,13 @@ ll_get_shared_arg(SPTR func_sptr) } void -ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype) +ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo) { int count = 0; int sym, dtype; char name[MXIDLEN + 2]; int dpdscp = aux.dpdsc_avl; + int cnt = 0; PARAMCTP(func_sptr, paramct); DPDSCP(func_sptr, dpdscp); @@ -500,9 +501,23 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype) DTYPEP(sym, *argtype); PASSBYVALP(sym, 1); } + argtype++; STYPEP(sym, ST_VAR); aux.dpdsc_base[dpdscp++] = sym; + if (current_tinfo) + { + NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, + current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); + if (cnt >= 2) + current_tinfo->symbols[current_tinfo->n_symbols].host_sym = + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); + current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; + current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; // AOCC + current_tinfo->n_symbols++; + cnt++; + } } } @@ -1155,6 +1170,7 @@ ll_rewrite_ilms(int lineno, int ilmx, int len) /* replace host sptr with device sptrs, PLD keeps sptr in 2nd index */ op1Pld = ILM_OPND(ilmpx, 1); + //replace host sym to device sym ILM_OPND(ilmpx, 2) = ompaccel_tinfo_current_get_devsptr(ILM_SymOPND(ilmpx, 2)); // AOCC begin @@ -2416,7 +2432,6 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, for (i = 0; i < current_tinfo->n_symbols; ++i) { SPTR sptr = current_tinfo->symbols[i].host_sym; - // AOCC begin if (XBIT(232, 0x1)) { if (orig_sptr_map.find(sptr) != orig_sptr_map.end()) { @@ -2428,6 +2443,13 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, sym = ompaccel_create_device_symbol(sptr, count); count++; current_tinfo->symbols[i].device_sym = sym; + if (is_SPMD_mode(current_tinfo->mode) && DTYPEG(sym) != DT_INT8) + { + PASSBYVALP(sym, 1); + DTYPEP(sym, get_type(2, TY_PTR, DTYPEG(sym))); + } else { + PASSBYVALP(sym, 0); + } OMPACCDEVSYMP(sym, TRUE); aux.dpdsc_base[dpdscp++] = sym; } @@ -2647,6 +2669,50 @@ ompaccel_copy_arraydescriptors(SPTR arg_sptr) return device_symbol; } +SPTR +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo) +{ + OMPACCEL_TINFO *current_tinfo; + SPTR func_sptr; + + int max_nargs = orig_tinfo->n_symbols + + orig_tinfo->n_quiet_symbols + + orig_tinfo->n_reduction_symbols; + int func_args_cnt = orig_tinfo->n_symbols + 2; // global_tid, bound_tid + target_info args + std::vector func_args(func_args_cnt); + auto *symbols = orig_tinfo->symbols; + func_args[0] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // global_tid + func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid + + for (int k = 2; k < func_args_cnt; k++) { + if (DTYPEG(symbols->device_sym) == DT_INT8 ) { + func_args[k] = get_type(2, TY_PTR, DT_INT8); + } + else { + func_args[k] = DTYPEG(symbols->device_sym);} + symbols++; + } + + func_sptr = create_target_outlined_func_sptr(scope_sptr, false); + CCSYMP(func_sptr, + 1); /* currently we make all CCSYM func varargs in Fortran. */ + CFUNCP(func_sptr, 1); + TASKFNP(func_sptr, FALSE); + ISTASKDUPP(func_sptr, FALSE); + OUTLINEDP(func_sptr, gbl.currsub); + FUNCLINEP(func_sptr, gbl.lineno); + STYPEP(func_sptr, ST_ENTRY); + DTYPEP(func_sptr, DT_VOID_NONE); + DEFDP(func_sptr, 1); + SCP(func_sptr, SC_STATIC); + ADDRTKNP(func_sptr, 1); + OMPACCFUNCDEVP(func_sptr, 1); + current_tinfo = ompaccel_tinfo_create(func_sptr, max_nargs); + ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo); + ll_process_routine_parameters(func_sptr); + return func_sptr; +} + SPTR ll_make_outlined_ompaccel_func(SPTR stblk_sptr, SPTR scope_sptr, bool iskernel) { diff --git a/tools/flang2/flang2exe/outliner.h b/tools/flang2/flang2exe/outliner.h index 642e739e26..b93a25b65d 100644 --- a/tools/flang2/flang2exe/outliner.h +++ b/tools/flang2/flang2exe/outliner.h @@ -26,6 +26,7 @@ #include "symtab.h" #include "ili.h" #include +#include "kmpcutil.h" extern FILE *par_file1; extern FILE *par_file2; @@ -246,7 +247,7 @@ void ilm_outlined_pad_ilm(int curilm); /** \brief ... */ -void ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype); +void ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL_TINFO *current_tinfo = nullptr); /** \brief ... @@ -398,4 +399,6 @@ bool outlined_is_eliminated(ILM_OP opc); bool outlined_need_recompile(); void ll_set_ompaccel_currfunc(bool isILMrecompile); +SPTR +ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo); #endif /* OUTLINER_H_ */ From fe8d5eec4ce28426ae31b0737bfb67d8472e3df4 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Tue, 15 Mar 2022 15:32:52 +0100 Subject: [PATCH 06/15] Fixed handling of last argument. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 184850e1cd..92318d2f8c 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1783,6 +1783,16 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he DT_INT8, nme_args, ad_acon(captured_vars, i * TARGET_PTRSIZE)); + // Hack for correct generation of *.ll code ( perform load operation instead of raw bitcast) + if (i == n_symbols - 1) { + chk_block(ilix); + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + + } } chk_block(ilix); } From 3ee516690cb3c77d5d896874af964a6c5b719c92 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 23 Mar 2022 12:54:46 +0100 Subject: [PATCH 07/15] Fixed passing arrays for spmd kernels. Do not assign new dtype value for device symbols via get_type function. Use macros PASSBYVALP PASSBYREFP instead. Remove hack for load/store of the last symbol. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 10 --------- tools/flang2/flang2exe/outliner.cpp | 33 ++++++++++++----------------- 2 files changed, 14 insertions(+), 29 deletions(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 92318d2f8c..184850e1cd 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1783,16 +1783,6 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he DT_INT8, nme_args, ad_acon(captured_vars, i * TARGET_PTRSIZE)); - // Hack for correct generation of *.ll code ( perform load operation instead of raw bitcast) - if (i == n_symbols - 1) { - chk_block(ilix); - ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); - ilix = mk_ompaccel_store(ilix, - DT_INT8, - nme_args, - ad_acon(captured_vars, i * TARGET_PTRSIZE)); - - } } chk_block(ilix); } diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 713bd402df..ca3d1d7346 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -505,19 +505,24 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL argtype++; STYPEP(sym, ST_VAR); aux.dpdsc_base[dpdscp++] = sym; + //AOC begin if (current_tinfo) { NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); - if (cnt >= 2) + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); + if (cnt >= 2) { + PASSBYVALP(sym, false); + PASSBYREFP(sym, true); current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; - current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); - current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; - current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; // AOCC - current_tinfo->n_symbols++; - cnt++; + } + current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; + current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; + current_tinfo->n_symbols++; + cnt++; } + //AOCC end } } @@ -2443,13 +2448,6 @@ llMakeFtnOutlinedSignatureTarget(SPTR func_sptr, OMPACCEL_TINFO *current_tinfo, sym = ompaccel_create_device_symbol(sptr, count); count++; current_tinfo->symbols[i].device_sym = sym; - if (is_SPMD_mode(current_tinfo->mode) && DTYPEG(sym) != DT_INT8) - { - PASSBYVALP(sym, 1); - DTYPEP(sym, get_type(2, TY_PTR, DTYPEG(sym))); - } else { - PASSBYVALP(sym, 0); - } OMPACCDEVSYMP(sym, TRUE); aux.dpdsc_base[dpdscp++] = sym; } @@ -2685,11 +2683,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - if (DTYPEG(symbols->device_sym) == DT_INT8 ) { - func_args[k] = get_type(2, TY_PTR, DT_INT8); - } - else { - func_args[k] = DTYPEG(symbols->device_sym);} + func_args[k] = DTYPEG(symbols->device_sym); + PASSBYVALP(symbols->device_sym, false); symbols++; } @@ -2709,7 +2704,7 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or OMPACCFUNCDEVP(func_sptr, 1); current_tinfo = ompaccel_tinfo_create(func_sptr, max_nargs); ll_make_ftn_outlined_params(func_sptr, func_args_cnt, func_args.data(), current_tinfo); - ll_process_routine_parameters(func_sptr); + ll_process_routine_parameters(func_sptr); return func_sptr; } From 253c35e780073f6da40594fa0fbb256afdbc5d1c Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 24 Mar 2022 14:46:22 +0100 Subject: [PATCH 08/15] Fix passing args to helper function Fixed passing scalars which type is different than int64. Fixed passing allocatable arrays. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 2 +- tools/flang2/flang2exe/kmpcutil.cpp | 2 +- tools/flang2/flang2exe/outliner.cpp | 13 ++++++++++--- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 63d5193f90..d63b31bd6c 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -502,7 +502,7 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { - if (DTYPEG(orig_symbols->symbols[i].device_sym) != DT_INT8) + if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym))) continue; snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 184850e1cd..cbee3c2701 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1771,7 +1771,7 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he FALSE); int j = 0; for (unsigned i = 0; i < n_symbols; ++i) { - if (DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) == DT_INT8) { + if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym))) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, nme_args, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index ca3d1d7346..c24116b5bf 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -492,7 +492,10 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL aux.dpdsc_size + paramct + 100); while (paramct--) { - sprintf(name, "%sArg%d", SYMNAME(func_sptr), count++); + if (current_tinfo && cnt >= 2) + sprintf(name, "%s", SYMNAME(ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym)); + else + sprintf(name, "%sArg%d", SYMNAME(func_sptr), count++); sym = getsymbol(name); SCP(sym, SC_DUMMY); if (*argtype == DT_CPTR) { /* either i8* or actual type( pass by value). */ @@ -510,12 +513,13 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL { NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); - current_tinfo->symbols[current_tinfo->n_symbols].device_sym = static_cast(sym); if (cnt >= 2) { PASSBYVALP(sym, false); PASSBYREFP(sym, true); current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + current_tinfo->symbols[current_tinfo->n_symbols].device_sym = + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; } current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; @@ -2683,7 +2687,10 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - func_args[k] = DTYPEG(symbols->device_sym); + if(DT_ISSCALAR( DTYPEG(symbols->device_sym))) + func_args[k] = DT_CPTR; + else + func_args[k] = DTYPEG(symbols->device_sym); PASSBYVALP(symbols->device_sym, false); symbols++; } From e84c6e5d4978dcfe497d0481f7f9330bb0ce642c Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 30 Mar 2022 08:26:24 +0200 Subject: [PATCH 09/15] Pass complex arg as pointer to pair of floats --- tools/flang2/flang2exe/outliner.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index c24116b5bf..e3f73f7e9f 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2687,7 +2687,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or func_args[1] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // bound_tid for (int k = 2; k < func_args_cnt; k++) { - if(DT_ISSCALAR( DTYPEG(symbols->device_sym))) + if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) + && DTYPEG(symbols->device_sym) != DT_CMPLX) func_args[k] = DT_CPTR; else func_args[k] = DTYPEG(symbols->device_sym); From b713196551fa72155b0f96e834823f9f2234e3fc Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 31 Mar 2022 11:56:05 +0200 Subject: [PATCH 10/15] Do not modify the LLType of the argument If we generate initialization function for SPMD kernels we need to store addresses of the arguments before we call kmpc_parallel_51 function. We use ptrtoint instruction for scalar variables. Before this patch the LLVM IR code was generated wrongly for complex variables: void kernel_func( *Arg_c) //some code ptrtoint i64* %Arg_c //error Arg_c was declared as pair of floats //some code call kmpc_parallel_51() //some code This patch causes that LLVM IR contains correct ptrtoint instruction: void kernel_func( *Arg_c) //some code ptrtoint * %Arg_c //ok, Arg_c was declared as pair of floats //some code call kmpc_parallel_51() //some code Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/cgmain.cpp | 2 +- tools/flang2/flang2exe/expand.cpp | 8 +++++++- tools/shared/utils/global.h | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 1b33c6ee1c..5c0edd5ee4 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -12267,7 +12267,7 @@ process_sptr_offset(SPTR sptr, ISZ_T off) } if ((flg.smp || (XBIT(34, 0x200) || gbl.usekmpc)) && (gbl.outlined || ISTASKDUPG(GBL_CURRFUNC))) { - if (sptr == ll_get_shared_arg(gbl.currsub)) { + if (sptr == ll_get_shared_arg(gbl.currsub) && !gbl.is_init_spmd_kernel) { LLTYPE(sptr) = make_ptr_lltype(make_lltype_from_dtype(DT_INT8)); } } diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index d63b31bd6c..6c95bde29f 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -237,6 +237,11 @@ expand(void) static std::map process_expanded_map = std::map(); auto it = process_expanded_map.find(gbl.currsub); int process_expanded = 0; + // we reset flag because we do not know if we generate initialization + // function for SPMD kernel (the function with kmpc_parallel_51 call) + // or the proper kernel code (the function which is passed as an argument + // to kmpc_parallel_51 call or generic kernel + gbl.is_init_spmd_kernel = false; if (it != process_expanded_map.end()) { process_expanded = it->second; @@ -806,7 +811,8 @@ eval_ilm_check_if_skip(int ilmx, int *skip_expand, int *process_expanded) ilix = ll_make_kmpc_global_thread_num(); iltb.callfg = 1; chk_block(ilix); - sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); + gbl.is_init_spmd_kernel = true; + sptr1 = ll_make_helper_function_for_kmpc_parallel_51((SPTR)0, ompaccel_tinfo_get(gbl.currsub)); ilix = ll_make_kmpc_parallel_51(ilix, allocated_symbols, sptr1); iltb.callfg = 1; chk_block(ilix); diff --git a/tools/shared/utils/global.h b/tools/shared/utils/global.h index 14ef411d85..02988d5e0f 100644 --- a/tools/shared/utils/global.h +++ b/tools/shared/utils/global.h @@ -162,6 +162,7 @@ typedef struct { bool ompaccel_intarget; /* set when expander is in the openmp target construct */ bool ompaccel_isdevice; /* set when generating code for openmp target device */ SPTR teamPrivateArgs; /* keeps sptr that holds team private array */ + bool is_init_spmd_kernel; /* if TRUE, we generate initialization proceudre of SPMD kernel */ #endif } GBL; From ce4a1b41ed0156635a5e591a6476d497642db85c Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 4 Apr 2022 11:45:38 +0200 Subject: [PATCH 11/15] Fix passing Fortran objects Fortran objects should be passed as i64* Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 11 ++++++++--- tools/flang2/flang2exe/kmpcutil.cpp | 4 +++- tools/flang2/flang2exe/outliner.cpp | 3 +++ 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 6c95bde29f..34274c51fa 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -507,13 +507,18 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { - if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym))) - continue; + if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym)) + && STYPEG(orig_symbols->symbols[i].host_sym) != ST_STRUCT) { + continue; + } snprintf(allocated_symbol_name, sizeof(allocated_symbol_name), ".allocated_symbol_%d", i); allocated_symbol = getsymbol(allocated_symbol_name); STYPEP(allocated_symbol, ST_VAR); - DTYPEP(allocated_symbol, + if (STYPEG(orig_symbols->symbols[i].host_sym) == ST_STRUCT) + DTYPEP(allocated_symbol,DT_CPTR); + else + DTYPEP(allocated_symbol, get_type(2,TY_PTR,DTYPEG(orig_symbols->symbols[i].device_sym))); SCP(allocated_symbol, SC_AUTO); store_instr = ad4ili(IL_ST, diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index cbee3c2701..57109ac0e4 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1770,8 +1770,10 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he ad_icon(0), FALSE); int j = 0; + for (unsigned i = 0; i < n_symbols; ++i) { - if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym))) { + if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || + STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, nme_args, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index e3f73f7e9f..e36daae6a4 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2690,6 +2690,9 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) && DTYPEG(symbols->device_sym) != DT_CMPLX) func_args[k] = DT_CPTR; + else if (STYPEG(symbols->host_sym) == ST_STRUCT) { + func_args[k] = DT_CPTR; + } else func_args[k] = DTYPEG(symbols->device_sym); PASSBYVALP(symbols->device_sym, false); From c9307ca4474592f11000ce9e5f51dc35e3afaa26 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Mon, 4 Apr 2022 23:00:14 +0200 Subject: [PATCH 12/15] Fixed passing complex numbers. Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/outliner.cpp | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index e36daae6a4..c7ad31bc59 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2671,6 +2671,17 @@ ompaccel_copy_arraydescriptors(SPTR arg_sptr) return device_symbol; } +static bool is_complex_type(DTYPE dt) +{ + if (dt == DT_DCMPLX){ + return true; + } + else if (dt == DT_CMPLX){ + return true; + } + return false; +} + SPTR ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *orig_tinfo) { @@ -2688,13 +2699,15 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or for (int k = 2; k < func_args_cnt; k++) { if(DT_ISSCALAR( DTYPEG(symbols->device_sym)) - && DTYPEG(symbols->device_sym) != DT_CMPLX) + && !is_complex_type(DTYPEG(symbols->device_sym))) { func_args[k] = DT_CPTR; + } else if (STYPEG(symbols->host_sym) == ST_STRUCT) { func_args[k] = DT_CPTR; } - else + else { func_args[k] = DTYPEG(symbols->device_sym); + } PASSBYVALP(symbols->device_sym, false); symbols++; } From 8ed95898fa67f27d2df73662e564e24e90e8881e Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 6 Apr 2022 09:44:16 +0200 Subject: [PATCH 13/15] Skip symbols which are not initialized Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/kmpcutil.cpp | 27 ++++++++++++++++++++++++--- tools/flang2/flang2exe/kmpcutil.h | 12 ++++++++++++ tools/flang2/flang2exe/outliner.cpp | 3 ++- 3 files changed, 38 insertions(+), 4 deletions(-) diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 57109ac0e4..3f024f8f89 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1749,11 +1749,31 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) return mk_kmpc_api_call(KMPC_API_TARGET_INIT, 4, arg_types, args); } +int get_n_symbols(OMPACCEL_TINFO *tinfo) +{ + int orig_n_symbols = tinfo->n_symbols; + int n_symbols = orig_n_symbols; + for (int i = 0; i < orig_n_symbols; ++i) { + //skip uninitialized symbols + if (DTYPEG(tinfo->symbols[i].device_sym) == 0) { + n_symbols--; + } + } + return n_symbols; +} + +bool check_if_skip_symbol(SPTR sym) +{ + if (DTYPEG(sym) == 0) + return true; + return false; +} + int ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR helper_func) { static int id; - int n_symbols = ompaccel_tinfo_get(gbl.currsub)->n_symbols;//2;//symbols.size(); + int n_symbols = get_n_symbols(ompaccel_tinfo_get(gbl.currsub)); DTYPE arg_types[9]; DTYPE void_ptr_t = DT_ADDR;//create_dtype_funcprototype(); DTYPE void_ptr_ptr_t = get_type(2, TY_PTR, void_ptr_t); @@ -1770,8 +1790,9 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he ad_icon(0), FALSE); int j = 0; - - for (unsigned i = 0; i < n_symbols; ++i) { + for (int i = 0; i < n_symbols; ++i) { + if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) + continue; if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], diff --git a/tools/flang2/flang2exe/kmpcutil.h b/tools/flang2/flang2exe/kmpcutil.h index e872e6e010..f159c205cb 100644 --- a/tools/flang2/flang2exe/kmpcutil.h +++ b/tools/flang2/flang2exe/kmpcutil.h @@ -534,6 +534,18 @@ int ll_make_kmpc_nvptx_parallel_reduce_nowait_simple_spmd(int, int, int, SPTR, S */ int ll_make_kmpc_nvptx_end_reduce_nowait(); +/** + \brief Get number of correctly initialized number of symbols. +*/ +int get_n_symbols(OMPACCEL_TINFO *tinfo); + +/** + \brief Check if given symbol should be skipped + If DTYPE of symbol is 0 then the symbol should not be passed + as an argument to kmpc_parallel_51 function +*/ +bool check_if_skip_symbol(SPTR sym); + /* End OpenMP Accelerator RT - non standard */ #endif #endif /* KMPC_RUNTIME_H_ */ diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index c7ad31bc59..195024a11c 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -2691,7 +2691,8 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or int max_nargs = orig_tinfo->n_symbols + orig_tinfo->n_quiet_symbols + orig_tinfo->n_reduction_symbols; - int func_args_cnt = orig_tinfo->n_symbols + 2; // global_tid, bound_tid + target_info args + int func_args_cnt = get_n_symbols(orig_tinfo); + func_args_cnt += 2; // global_tid, bound_tid + target_info args std::vector func_args(func_args_cnt); auto *symbols = orig_tinfo->symbols; func_args[0] = get_type(2, TY_PTR, DT_INT8);//DT_CPTR; // global_tid From 97bd64e9d8dad114cd6396a8eb76caf1b9d5ec94 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 7 Apr 2022 16:27:26 +0200 Subject: [PATCH 14/15] Add support for passing integers by value Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/expand.cpp | 5 ++++- tools/flang2/flang2exe/kmpcutil.cpp | 10 +++++++++- tools/flang2/flang2exe/outliner.cpp | 9 +++++---- 3 files changed, 18 insertions(+), 6 deletions(-) diff --git a/tools/flang2/flang2exe/expand.cpp b/tools/flang2/flang2exe/expand.cpp index 34274c51fa..7262f8653e 100644 --- a/tools/flang2/flang2exe/expand.cpp +++ b/tools/flang2/flang2exe/expand.cpp @@ -237,6 +237,7 @@ expand(void) static std::map process_expanded_map = std::map(); auto it = process_expanded_map.find(gbl.currsub); int process_expanded = 0; + // we reset flag because we do not know if we generate initialization // function for SPMD kernel (the function with kmpc_parallel_51 call) // or the proper kernel code (the function which is passed as an argument @@ -507,6 +508,9 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) int store_instr; int load_instr; for (unsigned i = 0; i < num_of_symbols; ++i) { + if (PASSBYVALG(orig_symbols->symbols[i].device_sym) && + !PASSBYREFG(orig_symbols->symbols[i].device_sym)) + continue; if (!DT_ISSCALAR(DTYPEG(orig_symbols->symbols[i].device_sym)) && STYPEG(orig_symbols->symbols[i].host_sym) != ST_STRUCT) { continue; @@ -531,7 +535,6 @@ static std::vector get_allocated_symbols(OMPACCEL_TINFO *orig_symbols) chk_block(load_instr); init_symbols.push_back(load_instr); - } return init_symbols; diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 3f024f8f89..41fc8304a4 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1793,7 +1793,15 @@ ll_make_kmpc_parallel_51(int global_tid_sptr, std::vector &symbols, SPTR he for (int i = 0; i < n_symbols; ++i) { if (check_if_skip_symbol(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) continue; - if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || + else if (PASSBYVALG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym) && + !PASSBYREFG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) { + ilix = mk_ompaccel_ldsptr(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym); + ilix = mk_ompaccel_store(ilix, + DT_INT8, + nme_args, + ad_acon(captured_vars, i * TARGET_PTRSIZE)); + } + else if (DT_ISSCALAR(DTYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].device_sym)) || STYPEG(ompaccel_tinfo_get(gbl.currsub)->symbols[i].host_sym) == ST_STRUCT) { ilix = mk_ompaccel_store(symbols[j++], DT_INT8, diff --git a/tools/flang2/flang2exe/outliner.cpp b/tools/flang2/flang2exe/outliner.cpp index 195024a11c..6786f21e51 100644 --- a/tools/flang2/flang2exe/outliner.cpp +++ b/tools/flang2/flang2exe/outliner.cpp @@ -514,12 +514,14 @@ ll_make_ftn_outlined_params(int func_sptr, int paramct, DTYPE *argtype, OMPACCEL NEED((current_tinfo->n_symbols + 1), current_tinfo->symbols, OMPACCEL_SYM, current_tinfo->sz_symbols, current_tinfo->sz_symbols * 2); if (cnt >= 2) { - PASSBYVALP(sym, false); - PASSBYREFP(sym, true); + if (!(PASSBYVALG(sym) && !PASSBYREFG(sym) && DTYPEG(sym) == DT_INT8)) { + PASSBYVALP(sym, false); + PASSBYREFP(sym, true); + } current_tinfo->symbols[current_tinfo->n_symbols].host_sym = ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; current_tinfo->symbols[current_tinfo->n_symbols].device_sym = - ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; + ompaccel_tinfo_get(gbl.currsub)->symbols[cnt-2].device_sym; } current_tinfo->symbols[current_tinfo->n_symbols].map_type = 0; current_tinfo->symbols[current_tinfo->n_symbols].in_map = 0; @@ -2709,7 +2711,6 @@ ll_make_helper_function_for_kmpc_parallel_51(SPTR scope_sptr, OMPACCEL_TINFO *or else { func_args[k] = DTYPEG(symbols->device_sym); } - PASSBYVALP(symbols->device_sym, false); symbols++; } From 3fb23232c2cdd3c90ff6441e38b96b9ac4322a7a Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Thu, 21 Apr 2022 10:33:46 +0200 Subject: [PATCH 15/15] Use the same OpenMP API as Clang for target parallel for pragma Clang uses kmpc_parallel_51 function for handlig target parallel for pragma. Flang should use the same functions as Clang for pragma target parallel Signed-off-by: Dominik Adamski --- tools/flang2/flang2exe/cgmain.cpp | 3 ++- tools/flang2/flang2exe/kmpcutil.cpp | 28 ++++++++++++++++++++++------ tools/flang2/flang2exe/ompaccel.cpp | 3 +-- 3 files changed, 25 insertions(+), 9 deletions(-) diff --git a/tools/flang2/flang2exe/cgmain.cpp b/tools/flang2/flang2exe/cgmain.cpp index 5c0edd5ee4..f89e9d2059 100644 --- a/tools/flang2/flang2exe/cgmain.cpp +++ b/tools/flang2/flang2exe/cgmain.cpp @@ -14406,7 +14406,8 @@ INLINE void static add_property_struct(char *func_name, print_token("@"); print_token(func_name); - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for + && mode <= mode_target_parallel_for_simd) { print_token("__exec_mode = weak constant i8 2\n"); } else { diff --git a/tools/flang2/flang2exe/kmpcutil.cpp b/tools/flang2/flang2exe/kmpcutil.cpp index 41fc8304a4..789276cfae 100644 --- a/tools/flang2/flang2exe/kmpcutil.cpp +++ b/tools/flang2/flang2exe/kmpcutil.cpp @@ -1736,11 +1736,19 @@ ll_make_kmpc_target_init(OMP_TARGET_MODE mode) int args[4]; args[3] = gen_null_arg(); /* ident */ - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for && + mode <= mode_target_parallel_for_simd) { args[2] = ad_icon(2); /* SPMD Mode */ args[1] = ad_icon(0); /* UseGenericStateMachine */ - args[0] = ad_icon(0); /* RequiresFullRuntime */ -// args[0] = ad_icon(1); /* RequiresFullRuntime */ + if (mode == mode_target_parallel) { + /* RequiresFullRuntime - kmpc_parallel_51 requires full runtime */ + args[0] = ad_icon(1); + } + else { + /* RequiresFullRuntime - Old Fortran OpenMP API does not require + * full runtime */ + args[0] = ad_icon(0); + } } else { args[2] = ad_icon(1); /* Generic mode */ args[1] = ad_icon(1); /* UseGenericStateMachine */ @@ -1851,10 +1859,18 @@ ll_make_kmpc_target_deinit(OMP_TARGET_MODE mode) int args[3]; args[2] = gen_null_arg(); /* ident */ - if (is_SPMD_mode(mode)) { + if (mode >= mode_target_teams_distribute_parallel_for && + mode <= mode_target_parallel_for_simd) { args[1] = ad_icon(2); /* SPMD Mode */ - args[0] = ad_icon(0); /* RequiresFullRuntime */ -// args[0] = ad_icon(1); /* RequiresFullRuntime */ + if (mode == mode_target_parallel) { + /* RequiresFullRuntime - kmpc_parallel_51 requires full runtime */ + args[0] = ad_icon(1); + } + else { + /* RequiresFullRuntime - Old Fortran OpenMP API does not require + * full runtime */ + args[0] = ad_icon(0); + } } else { args[1] = ad_icon(1); /* Generic mode */ args[0] = ad_icon(1); /* RequiresFullRuntime */ diff --git a/tools/flang2/flang2exe/ompaccel.cpp b/tools/flang2/flang2exe/ompaccel.cpp index 9d266499c3..b5e78dccee 100644 --- a/tools/flang2/flang2exe/ompaccel.cpp +++ b/tools/flang2/flang2exe/ompaccel.cpp @@ -3726,8 +3726,7 @@ ompaccel_set_target_declare() { } bool is_SPMD_mode(OMP_TARGET_MODE mode) { - if (mode >= mode_target_teams_distribute_parallel_for - && mode <= mode_target_parallel_for_simd) { + if (mode == mode_target_parallel) { return true; } return false;