From eb6fc7519544ab369445fb87d3864b0eb1565b5c Mon Sep 17 00:00:00 2001 From: Serina Tan Date: Thu, 8 Aug 2019 15:14:23 -0400 Subject: [PATCH 1/2] Bug fix: over counting completed instruction for vector load --- src/gpgpu-sim/shader.cc | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/gpgpu-sim/shader.cc b/src/gpgpu-sim/shader.cc index 007ad4234..011091093 100644 --- a/src/gpgpu-sim/shader.cc +++ b/src/gpgpu-sim/shader.cc @@ -1696,6 +1696,7 @@ void ldst_unit::L1_latency_queue_cycle() assert( !read_sent ); l1_latency_queue[0] = NULL; if ( mf_next->get_inst().is_load() ) { + bool insn_completed = false; for ( unsigned r=0; r < MAX_OUTPUT_VALUES; r++) if (mf_next->get_inst().out[r] > 0) { @@ -1705,9 +1706,12 @@ void ldst_unit::L1_latency_queue_cycle() { m_pending_writes[mf_next->get_inst().warp_id()].erase(mf_next->get_inst().out[r]); m_scoreboard->releaseRegister(mf_next->get_inst().warp_id(),mf_next->get_inst().out[r]); - m_core->warp_inst_complete(mf_next->get_inst()); + insn_completed = true; } } + + if (insn_completed) + m_core->warp_inst_complete(mf_next->get_inst()); } //For write hit in WB policy From 79dd57a59865ee44eab621dbc0246ccbd6b84447 Mon Sep 17 00:00:00 2001 From: Serina Tan Date: Thu, 22 Aug 2019 19:36:52 -0400 Subject: [PATCH 2/2] Bug fix: cta id should be incremented in func sim whether or not checkpoint is enabled --- src/cuda-sim/cuda-sim.cc | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/src/cuda-sim/cuda-sim.cc b/src/cuda-sim/cuda-sim.cc index f7bb9ccf8..4e8bb4433 100644 --- a/src/cuda-sim/cuda-sim.cc +++ b/src/cuda-sim/cuda-sim.cc @@ -2180,9 +2180,8 @@ void gpgpu_cuda_ptx_sim_main_func( kernel_info_t &kernel, bool openCL ) //we excute the kernel one CTA (Block) at the time, as synchronization functions work block wise while(!kernel.no_more_ctas_to_run()){ - unsigned temp=kernel.get_next_cta_id_single(); + unsigned cta_id=kernel.get_next_cta_id_single(); - if(cp_op==0 || (cp_op==1 && cta_launchedgetShaderCoreConfig()->warp_size ); - cta.execute(cp_count,temp); + cta.execute(cp_count,cta_id); #if (CUDART_VERSION >= 5000) launch_all_device_kernels(); #endif - } - else - { - kernel.increment_cta_id(); - } - cta_launched++; + } + + kernel.increment_cta_id(); + cta_launched++; }