Skip to content

Commit

Permalink
SWDEV-465322: Adding support for Perfcounter SIMD Mask in ATT (#910)
Browse files Browse the repository at this point in the history
* SWDEV-465322: Adding support for r Perfcounter SIMD Mask in ATT

* Apply suggestions from code review

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Benjamin Welton <[email protected]>

* Adding unit tests

* Adding counters check for gfx9 and SQ block only

* Addressing review comments

* changing the struct size

* fixing header includes

---------

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Benjamin Welton <[email protected]>
  • Loading branch information
3 people authored Jun 12, 2024
1 parent cfe3af9 commit c497196
Show file tree
Hide file tree
Showing 12 changed files with 227 additions and 43 deletions.
10 changes: 5 additions & 5 deletions samples/advanced_thread_trace/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -402,11 +402,11 @@ tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
tool_data),
"code object tracing service configure");

std::vector<rocprofiler_att_parameter_t> parameters;
parameters.push_back({ROCPROFILER_ATT_PARAMETER_TARGET_CU, TARGET_CU});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, SIMD_SELECT});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, BUFFER_SIZE});
parameters.push_back({ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, SE_MASK});
std::vector<rocprofiler_att_parameter_t> parameters = {
{ROCPROFILER_ATT_PARAMETER_TARGET_CU, {TARGET_CU}},
{ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, {SIMD_SELECT}},
{ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, {BUFFER_SIZE}},
{ROCPROFILER_ATT_PARAMETER_SHADER_ENGINE_MASK, {SE_MASK}}};

ROCPROFILER_CALL(rocprofiler_configure_thread_trace_service(client_ctx,
parameters.data(),
Expand Down
34 changes: 20 additions & 14 deletions samples/common/defines.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,41 +21,47 @@
// SOFTWARE.

#pragma once
#define ROCPROFILER_VAR_NAME_COMBINE(X, Y) X##Y
#define ROCPROFILER_VARIABLE(X, Y) ROCPROFILER_VAR_NAME_COMBINE(X, Y)

#define ROCPROFILER_WARN(result) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \
if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::string status_msg = \
rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \
std::cerr << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \
<< " returned error code " << CHECKSTATUS << ": " << status_msg \
<< ". This is just a warning!" << std::endl; \
<< " returned error code " << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \
<< ": " << status_msg << ". This is just a warning!" << std::endl; \
} \
}

#define ROCPROFILER_CHECK(result) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \
if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::string status_msg = \
rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \
std::stringstream errmsg{}; \
errmsg << "[" << __FILE__ << ":" << __LINE__ << "] " << #result \
<< " failed with error code " << CHECKSTATUS << " :: " << status_msg; \
<< " failed with error code " << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \
<< " :: " << status_msg; \
throw std::runtime_error(errmsg.str()); \
} \
}

#define ROCPROFILER_CALL(result, msg) \
{ \
rocprofiler_status_t CHECKSTATUS = result; \
if(CHECKSTATUS != ROCPROFILER_STATUS_SUCCESS) \
rocprofiler_status_t ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) = result; \
if(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) != ROCPROFILER_STATUS_SUCCESS) \
{ \
std::string status_msg = rocprofiler_get_status_string(CHECKSTATUS); \
std::string status_msg = \
rocprofiler_get_status_string(ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__)); \
std::cerr << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg \
<< " failed with error code " << CHECKSTATUS << ": " << status_msg \
<< std::endl; \
<< " failed with error code " << ROCPROFILER_VARIABLE(CHECKSTATUS, __LINE__) \
<< ": " << status_msg << std::endl; \
std::stringstream errmsg{}; \
errmsg << "[" #result "][" << __FILE__ << ":" << __LINE__ << "] " << msg " failure (" \
<< status_msg << ")"; \
Expand Down
13 changes: 12 additions & 1 deletion source/include/rocprofiler-sdk/amd_detail/thread_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <rocprofiler-sdk/defines.h>
#include <rocprofiler-sdk/fwd.h>
#include <rocprofiler-sdk/hsa.h>
#include <cstdint>

ROCPROFILER_EXTERN_C_INIT

Expand All @@ -43,13 +44,23 @@ typedef enum
ROCPROFILER_ATT_PARAMETER_BUFFER_SIZE, ///< Size of combined GPU buffer for ATT
ROCPROFILER_ATT_PARAMETER_SIMD_SELECT, ///< Bitmask (GFX9) or ID (Navi) of SIMDs
ROCPROFILER_ATT_PARAMETER_CODE_OBJECT_TRACE_ENABLE, ///< Enables Codeobj Markers IDs into ATT
ROCPROFILER_ATT_PARAMETER_PERFCOUNTER, ///< Enables Perfcounter with simd mask (GFX9 only)
ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL, ///< Defines the update period (GFX9 only)
ROCPROFILER_ATT_PARAMETER_LAST
} rocprofiler_att_parameter_type_t;

typedef struct
{
rocprofiler_att_parameter_type_t type;
uint64_t value;
union
{
uint64_t value;
struct
{
rocprofiler_counter_id_t counter_id;
uint64_t simd_mask : 4;
};
};
} rocprofiler_att_parameter_t;

typedef enum
Expand Down
20 changes: 17 additions & 3 deletions source/lib/rocprofiler-sdk/aql/packet_construct.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -216,20 +216,34 @@ ThreadTraceAQLPacketFactory::ThreadTraceAQLPacketFactory(const hsa::AgentCache&
uint32_t shader_engine_mask = static_cast<uint32_t>(params.shader_engine_mask);
uint32_t simd = static_cast<uint32_t>(params.simd_select);
uint32_t buffer_size = static_cast<uint32_t>(params.buffer_size);
uint32_t perf_ctrl = static_cast<uint32_t>(params.perfcounter_ctrl);

aql_params.clear();
aql_params.push_back({HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_COMPUTE_UNIT_TARGET, cu});
aql_params.push_back({HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SE_MASK, shader_engine_mask});
aql_params.push_back({HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_SIMD_SELECTION, simd});
aql_params.push_back({HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_ATT_BUFFER_SIZE, buffer_size});
aql_params.push_back({HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_CTRL, perf_ctrl});
for(uint32_t perf_counter : params.perfcounters)
{
aql_params.push_back(
{HSA_VEN_AMD_AQLPROFILE_PARAMETER_NAME_PERFCOUNTER_NAME, perf_counter});
}
}

const std::vector<hsa_ven_amd_aqlprofile_parameter_t>&
ThreadTraceAQLPacketFactory::get_aql_params()
{
return aql_params;
}

std::unique_ptr<hsa::TraceControlAQLPacket>
ThreadTraceAQLPacketFactory::construct_packet()
{
uint32_t num_params = static_cast<uint32_t>(aql_params.size());
auto profile = aqlprofile_att_profile_t{tracepool.gpu_agent, aql_params.data(), num_params};
auto packet = std::make_unique<hsa::TraceControlAQLPacket>(this->tracepool, profile);
uint32_t num_params = static_cast<uint32_t>(get_aql_params().size());
auto profile =
aqlprofile_att_profile_t{tracepool.gpu_agent, get_aql_params().data(), num_params};
auto packet = std::make_unique<hsa::TraceControlAQLPacket>(this->tracepool, profile);
packet->clear();
return packet;
}
Expand Down
9 changes: 5 additions & 4 deletions source/lib/rocprofiler-sdk/aql/packet_construct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,10 +91,11 @@ class ThreadTraceAQLPacketFactory
const thread_trace_parameter_pack& params,
const CoreApiTable& coreapi,
const AmdExtTable& ext);
std::unique_ptr<hsa::TraceControlAQLPacket> construct_packet();
std::unique_ptr<hsa::CodeobjMarkerAQLPacket> construct_load_marker_packet(uint64_t id,
uint64_t addr,
uint64_t size);
const std::vector<hsa_ven_amd_aqlprofile_parameter_t>& get_aql_params();
std::unique_ptr<hsa::TraceControlAQLPacket> construct_packet();
std::unique_ptr<hsa::CodeobjMarkerAQLPacket> construct_load_marker_packet(uint64_t id,
uint64_t addr,
uint64_t size);
std::unique_ptr<hsa::CodeobjMarkerAQLPacket> construct_unload_marker_packet(uint64_t id);

private:
Expand Down
20 changes: 20 additions & 0 deletions source/lib/rocprofiler-sdk/counters/metrics.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,26 @@ getMetricIdMap()
return id_map;
}

const MetricIdMap*
getPerfCountersIdMap()
{
// Only GFX9 counters in SQ Block are supported
static MetricIdMap*& att_perf_counters_map =
common::static_object<MetricIdMap>::construct([]() {
MetricIdMap map;
std::string agent_prefix{"gfx9"};
auto is_gfx9 = [&](auto& agent_name) {
return (agent_name.find(agent_prefix) != std::string::npos);
};
for(const auto& [agent_name, metrics] : *CHECK_NOTNULL(getMetricMap()))
if(is_gfx9(agent_name))
for(const auto& metric : metrics)
if(metric.block() == "SQ") map.emplace(metric.id(), metric);
return map;
}());
return att_perf_counters_map;
}

const MetricMap*
getMetricMap()
{
Expand Down
7 changes: 7 additions & 0 deletions source/lib/rocprofiler-sdk/counters/metrics.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ getMetricsForAgent(const std::string&);
const MetricIdMap*
getMetricIdMap();

/**
* Get the metrics for perfcounters options in thread trace
* applicable only for GFX9 agents and SQ block counters
*/
const MetricIdMap*
getPerfCountersIdMap();

/**
* Checks if a metric is valid for a given agent
**/
Expand Down
10 changes: 6 additions & 4 deletions source/lib/rocprofiler-sdk/thread_trace/att_core.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,13 @@ struct thread_trace_parameter_pack
uint64_t buffer_size = DEFAULT_BUFFER_SIZE;

// GFX9 Only
std::vector<std::string> perfcounters;
std::vector<uint32_t> perfcounters;

static constexpr size_t DEFAULT_SIMD = 0x7;
static constexpr size_t DEFAULT_SE_MASK = 0x21;
static constexpr size_t DEFAULT_BUFFER_SIZE = 0x8000000;
static constexpr size_t DEFAULT_SIMD = 0x7;
static constexpr size_t DEFAULT_PERFCOUNTER_SIMD_MASK = 0xF;
static constexpr size_t DEFAULT_SE_MASK = 0x21;
static constexpr size_t DEFAULT_BUFFER_SIZE = 0x8000000;
static constexpr size_t PERFCOUNTER_SIMD_MASK_SHIFT = 28;
};

namespace hsa
Expand Down
27 changes: 25 additions & 2 deletions source/lib/rocprofiler-sdk/thread_trace/att_service.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,28 @@
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.

#include <glog/logging.h>
#include <rocprofiler-sdk/rocprofiler.h>
#include <cstdint>

#include "lib/rocprofiler-sdk/aql/helpers.hpp"
#include "lib/rocprofiler-sdk/context/context.hpp"
#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp"
#include "lib/rocprofiler-sdk/registration.hpp"
#include "rocprofiler-sdk/amd_detail/thread_trace.h"

namespace
{
uint32_t
get_mask(const rocprofiler::counters::Metric* metric, uint64_t simds_selected)
{
uint32_t mask = std::atoi(metric->event().c_str());
if(simds_selected == 0)
simds_selected = rocprofiler::thread_trace_parameter_pack::DEFAULT_PERFCOUNTER_SIMD_MASK;
mask |= simds_selected << rocprofiler::thread_trace_parameter_pack::PERFCOUNTER_SIMD_MASK_SHIFT;
return mask;
}
} // namespace

extern "C" {
rocprofiler_status_t ROCPROFILER_API
Expand All @@ -51,6 +67,7 @@ rocprofiler_configure_thread_trace_service(rocprofiler_context_id_t
param_pack.callback_userdata = callback_userdata;
bool bEnableCodeobj = false;

const auto& id_map = *CHECK_NOTNULL(rocprofiler::counters::getPerfCountersIdMap());
for(size_t p = 0; p < num_parameters; p++)
{
const rocprofiler_att_parameter_t& param = parameters[p];
Expand All @@ -68,10 +85,16 @@ rocprofiler_configure_thread_trace_service(rocprofiler_context_id_t
case ROCPROFILER_ATT_PARAMETER_CODE_OBJECT_TRACE_ENABLE:
bEnableCodeobj = param.value != 0;
break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTER:
if(const auto* metric_ptr =
rocprofiler::common::get_val(id_map, param.counter_id.handle))
param_pack.perfcounters.push_back(get_mask(metric_ptr, param.simd_mask));
break;
case ROCPROFILER_ATT_PARAMETER_PERFCOUNTERS_CTRL:
param_pack.perfcounter_ctrl = param.value;
break;
case ROCPROFILER_ATT_PARAMETER_LAST: return ROCPROFILER_STATUS_ERROR_INVALID_ARGUMENT;
}
// for(int i = 0; i < parameters.perfcounter_num; i++)
// thread_tracer->perfcounters.emplace_back(parameters.perfcounter[i]);
}

ctx->thread_trace = std::make_shared<rocprofiler::GlobalThreadTracer>(param_pack);
Expand Down
Loading

0 comments on commit c497196

Please sign in to comment.