From 81d140756509924263e3e7215ebdc8d3b0294838 Mon Sep 17 00:00:00 2001 From: Benjamin Welton Date: Wed, 19 Jun 2024 00:11:03 -0700 Subject: [PATCH] Incremental Counter Profile Creation (#933) * Incremental Counter Profile Creation Adds support for incremental counter creation. How this functions is the behavior of rocprofiler_create_profile_config has been changed. rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, rocprofiler_counter_id_t* counters_list, size_t counters_count, rocprofiler_profile_config_id_t* config_id) The behavior of this function now allows an existing config_id to be supplied via config_id. The counters contained in this config will be copied over and used as a base for a new config along with any counters supplied in counters_list. The new config id is returned via config_id and can be used in future dispatch/agent counting sessions. A new config is created over modifying an existing config since there is no gaurentee that the existing config isn't already in use. While we could add locks (or other mutual exclusion properties) to check if its in use and reject an update, the benefit from doing so is minor in comparison to just creating a new config. This also side steps a common pattern a tool may use to add additional counters at some point later on during execution. Now they can do that without destroying the existing config. --------- Co-authored-by: Benjamin Welton Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .../print_functional_counters.cpp | 14 ++--- source/include/rocprofiler-sdk/fwd.h | 1 + .../include/rocprofiler-sdk/profile_config.h | 10 ++- .../rocprofiler-sdk/aql/packet_construct.cpp | 13 ++-- .../rocprofiler-sdk/aql/packet_construct.hpp | 5 +- .../rocprofiler-sdk/aql/tests/aql_test.cpp | 15 +---- .../rocprofiler-sdk/counters/controller.cpp | 20 +++++- .../rocprofiler-sdk/counters/controller.hpp | 4 +- .../rocprofiler-sdk/counters/tests/core.cpp | 62 +++++++++++++++++++ source/lib/rocprofiler-sdk/profile_config.cpp | 39 +++++++++--- source/lib/rocprofiler-sdk/rocprofiler.cpp | 2 + tests/thread-trace/single_dispatch.cpp | 1 - 12 files changed, 140 insertions(+), 46 deletions(-) diff --git a/samples/counter_collection/print_functional_counters.cpp b/samples/counter_collection/print_functional_counters.cpp index 4c3da4ef..af2b5e6f 100644 --- a/samples/counter_collection/print_functional_counters.cpp +++ b/samples/counter_collection/print_functional_counters.cpp @@ -4,12 +4,11 @@ #include #include #include -#include #include #include -#include #include +#include #include #include @@ -308,13 +307,14 @@ dispatch_callback(rocprofiler_profile_counting_dispatch_data_t dispatch_data, rocprofiler_profile_config_id_t profile; // Select the next counter to collect. - ROCPROFILER_CALL( - rocprofiler_create_profile_config( - dispatch_data.dispatch_info.agent_id, &(cap.remaining.back()), 1, &profile), - "Could not construct profile cfg"); + if(rocprofiler_create_profile_config( + dispatch_data.dispatch_info.agent_id, &(cap.remaining.back()), 1, &profile) == + ROCPROFILER_STATUS_SUCCESS) + { + *config = profile; + } cap.remaining.pop_back(); - *config = profile; } int diff --git a/source/include/rocprofiler-sdk/fwd.h b/source/include/rocprofiler-sdk/fwd.h index 6fa3f661..d1e67203 100644 --- a/source/include/rocprofiler-sdk/fwd.h +++ b/source/include/rocprofiler-sdk/fwd.h @@ -105,6 +105,7 @@ typedef enum // NOLINT(performance-enum-size) ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE, ///< The service is not available. ///< Please refer to API functions that return this ///< status code for more information. + ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT, ///< Exceeds hardware limits for collection ROCPROFILER_STATUS_LAST, } rocprofiler_status_t; diff --git a/source/include/rocprofiler-sdk/profile_config.h b/source/include/rocprofiler-sdk/profile_config.h index e7c97094..0ab4e911 100644 --- a/source/include/rocprofiler-sdk/profile_config.h +++ b/source/include/rocprofiler-sdk/profile_config.h @@ -40,12 +40,18 @@ ROCPROFILER_EXTERN_C_INIT * be used across many contexts. The profile has a fixed set of counters * that are collected (and specified by counter_list). The available * counters for an agent can be queried using - * @ref rocprofiler_iterate_agent_supported_counters. + * @ref rocprofiler_iterate_agent_supported_counters. An existing profile + * may be supplied via config_id to use as a base for the new profile. + * All counters in the existing profile will be copied over to the new + * profile. The existing profile will remain unmodified and usable with + * the new profile id being returned in config_id. * * @param [in] agent_id Agent identifier * @param [in] counters_list List of GPU counters * @param [in] counters_count Size of counters list - * @param [out] config_id Identifier for GPU counters group + * @param [in/out] config_id Identifier for GPU counters group. If an existing + profile is supplied, that profiles counters will be copied + over to a new profile (returned via this id) * @return ::rocprofiler_status_t * @retval ROCPROFILER_STATUS_SUCCESS if profile created * @retval ROCPROFILER_STATUS_ERROR if profile could not be created diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp index 67ba50ea..5319af1f 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.cpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.cpp @@ -27,6 +27,7 @@ #include #include #include "glog/logging.h" +#include "rocprofiler-sdk/fwd.h" #define CHECK_HSA(fn, message) \ { \ @@ -91,9 +92,6 @@ CounterPacketConstruct::CounterPacketConstruct(rocprofiler_agent_id_t event_id)] = x; } } - // Check that we can collect all of the metrics in a single execution - // with a single AQL packet - can_collect(); _events = get_all_events(); } @@ -293,7 +291,7 @@ CounterPacketConstruct::get_counter_events(const counters::Metric& metric) const throw std::runtime_error(fmt::format("Cannot Find Events for {}", metric)); } -void +rocprofiler_status_t CounterPacketConstruct::can_collect() { // Verify that the counters fit within harrdware limits @@ -318,13 +316,10 @@ CounterPacketConstruct::can_collect() { if(auto* max = CHECK_NOTNULL(common::get_val(max_allowed, block_name)); count > *max) { - throw std::runtime_error( - fmt::format("Block {} exceeds max number of hardware counters ({} > {})", - static_cast(block_name.first), - count, - *max)); + return ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT; } } + return ROCPROFILER_STATUS_SUCCESS; } } // namespace aql } // namespace rocprofiler diff --git a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp index ac31e989..78b0f2ad 100644 --- a/source/lib/rocprofiler-sdk/aql/packet_construct.hpp +++ b/source/lib/rocprofiler-sdk/aql/packet_construct.hpp @@ -36,6 +36,7 @@ #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" #include "lib/rocprofiler-sdk/hsa/queue.hpp" #include "lib/rocprofiler-sdk/thread_trace/att_core.hpp" +#include "rocprofiler-sdk/fwd.h" namespace rocprofiler { @@ -62,6 +63,8 @@ class CounterPacketConstruct rocprofiler_agent_id_t agent() const { return _agent; } + rocprofiler_status_t can_collect(); + private: static constexpr size_t MEM_PAGE_ALIGN = 0x1000; static constexpr size_t MEM_PAGE_MASK = MEM_PAGE_ALIGN - 1; @@ -75,8 +78,6 @@ class CounterPacketConstruct std::vector events; }; - void can_collect(); - rocprofiler_agent_id_t _agent; std::vector _metrics; std::vector _events; diff --git a/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp b/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp index 3f702ab4..ae51423a 100644 --- a/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp +++ b/source/lib/rocprofiler-sdk/aql/tests/aql_test.cpp @@ -26,6 +26,7 @@ #include "lib/rocprofiler-sdk/counters/tests/hsa_tables.hpp" #include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" #include "lib/rocprofiler-sdk/hsa/queue_controller.hpp" +#include "rocprofiler-sdk/fwd.h" #include @@ -104,18 +105,8 @@ TEST(aql_profile, too_many_counters) ROCP_INFO << fmt::format("Found Agent: {}", agent.get_hsa_agent().handle); auto metrics = rocprofiler::findDeviceMetrics(agent, {}); - EXPECT_THROW( - { - try - { - CounterPacketConstruct(agent.get_rocp_agent()->id, metrics); - } catch(const std::exception& e) - { - EXPECT_NE(e.what(), nullptr) << e.what(); - throw; - } - }, - std::runtime_error); + EXPECT_NE(CounterPacketConstruct(agent.get_rocp_agent()->id, metrics).can_collect(), + ROCPROFILER_STATUS_SUCCESS); } hsa_shut_down(); } diff --git a/source/lib/rocprofiler-sdk/counters/controller.cpp b/source/lib/rocprofiler-sdk/counters/controller.cpp index b13b6b19..4ea09c33 100644 --- a/source/lib/rocprofiler-sdk/counters/controller.cpp +++ b/source/lib/rocprofiler-sdk/counters/controller.cpp @@ -164,10 +164,24 @@ get_controller() return controller; } -uint64_t -create_counter_profile(std::shared_ptr&& config) +rocprofiler_status_t +create_counter_profile(std::shared_ptr config) { - return get_controller().add_profile(std::move(config)); + auto status = ROCPROFILER_STATUS_SUCCESS; + if(status = counters::counter_callback_info::setup_profile_config(config); + status != ROCPROFILER_STATUS_SUCCESS) + { + return status; + } + + if(status = config->pkt_generator->can_collect(); status != ROCPROFILER_STATUS_SUCCESS) + { + return status; + } + + get_controller().add_profile(std::move(config)); + + return status; } void diff --git a/source/lib/rocprofiler-sdk/counters/controller.hpp b/source/lib/rocprofiler-sdk/counters/controller.hpp index 5083810a..488abaab 100644 --- a/source/lib/rocprofiler-sdk/counters/controller.hpp +++ b/source/lib/rocprofiler-sdk/counters/controller.hpp @@ -101,8 +101,8 @@ class CounterController CounterController& get_controller(); -uint64_t -create_counter_profile(std::shared_ptr&& config); +rocprofiler_status_t +create_counter_profile(std::shared_ptr config); void destroy_counter_profile(uint64_t id); diff --git a/source/lib/rocprofiler-sdk/counters/tests/core.cpp b/source/lib/rocprofiler-sdk/counters/tests/core.cpp index b223f2e3..035dfe7b 100644 --- a/source/lib/rocprofiler-sdk/counters/tests/core.cpp +++ b/source/lib/rocprofiler-sdk/counters/tests/core.cpp @@ -672,6 +672,68 @@ TEST(core, start_stop_callback_ctx) context::pop_client(1); } +TEST(core, test_profile_incremental) +{ + ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); + test_init(); + ASSERT_TRUE(hsa::get_queue_controller() != nullptr); + auto agents = hsa::get_queue_controller()->get_supported_agents(); + ASSERT_GT(agents.size(), 0); + for(const auto& [_, agent] : agents) + { + auto metrics = findDeviceMetrics(agent, {}); + ASSERT_FALSE(metrics.empty()); + ASSERT_TRUE(agent.get_rocp_agent()); + + std::map> metric_blocks; + for(const auto& metric : metrics) + { + if(!metric.block().empty()) + { + metric_blocks[metric.block()].push_back(metric); + } + } + + rocprofiler_profile_config_id_t cfg_id = {}; + + // Add one counter from each block to incrementally to make sure we can + // add them incrementally + for(const auto& [block_name, block_metrics] : metric_blocks) + { + rocprofiler_profile_config_id_t old_id = cfg_id; + rocprofiler_counter_id_t id = {.handle = block_metrics.front().id()}; + ROCPROFILER_CALL( + rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &cfg_id), + "Unable to create profile incrementally when we should be able to"); + EXPECT_NE(old_id.handle, cfg_id.handle) + << "We expect that the handle changes this is due to the existing profile being " + "unmodifiable after creation: " + << block_name; + } + + // Check that we encounter an error of exceeds hardware limits eventually + auto status = ROCPROFILER_STATUS_SUCCESS; + for(const auto& metric : metrics) + { + /** + * Check profile construction + */ + rocprofiler_counter_id_t id = {.handle = metric.id()}; + if(status = + rocprofiler_create_profile_config(agent.get_rocp_agent()->id, &id, 1, &cfg_id); + status != ROCPROFILER_STATUS_SUCCESS) + { + break; + } + } + EXPECT_EQ(status, ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT); + } + + registration::set_init_status(1); + + registration::finalize(); +} + TEST(core, public_api_iterate_agents) { ASSERT_EQ(hsa_init(), HSA_STATUS_SUCCESS); diff --git a/source/lib/rocprofiler-sdk/profile_config.cpp b/source/lib/rocprofiler-sdk/profile_config.cpp index 86aa4524..b68fda2c 100644 --- a/source/lib/rocprofiler-sdk/profile_config.cpp +++ b/source/lib/rocprofiler-sdk/profile_config.cpp @@ -22,15 +22,14 @@ #include #include +#include +#include -#include "lib/common/synchronized.hpp" #include "lib/common/utility.hpp" #include "lib/rocprofiler-sdk/agent.hpp" -#include "lib/rocprofiler-sdk/aql/helpers.hpp" +#include "lib/rocprofiler-sdk/counters/controller.hpp" #include "lib/rocprofiler-sdk/counters/core.hpp" -#include "lib/rocprofiler-sdk/counters/evaluate_ast.hpp" #include "lib/rocprofiler-sdk/counters/metrics.hpp" -#include "lib/rocprofiler-sdk/hsa/agent_cache.hpp" extern "C" { /** @@ -39,7 +38,9 @@ extern "C" { * @param [in] agent Agent identifier * @param [in] counters_list List of GPU counters * @param [in] counters_count Size of counters list - * @param [out] config_id Identifier for GPU counters group + * @param [in/out] config_id Identifier for GPU counters group. If an existing + profile is supplied, that profiles counters will be copied + over to a new profile (returned via this id). * @return ::rocprofiler_status_t */ rocprofiler_status_t @@ -48,7 +49,8 @@ rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, size_t counters_count, rocprofiler_profile_config_id_t* config_id) { - const auto* agent = ::rocprofiler::agent::get_agent(agent_id); + std::unordered_set already_added; + const auto* agent = ::rocprofiler::agent::get_agent(agent_id); if(!agent) return ROCPROFILER_STATUS_ERROR_AGENT_NOT_FOUND; std::shared_ptr config = @@ -61,6 +63,9 @@ rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, const auto* metric_ptr = rocprofiler::common::get_val(id_map, counter_id.handle); if(!metric_ptr) return ROCPROFILER_STATUS_ERROR_COUNTER_NOT_FOUND; + // Don't add duplicates + if(!already_added.emplace(metric_ptr->id()).second) continue; + if(!rocprofiler::counters::checkValidMetric(std::string(agent->name), *metric_ptr)) { return ROCPROFILER_STATUS_ERROR_METRIC_NOT_VALID_FOR_AGENT; @@ -68,8 +73,26 @@ rocprofiler_create_profile_config(rocprofiler_agent_id_t agent_id, config->metrics.push_back(*metric_ptr); } - config->agent = agent; - config_id->handle = rocprofiler::counters::create_counter_profile(std::move(config)); + if(config_id->handle != 0) + { + // Copy existing counters from previous config + if(auto existing = rocprofiler::counters::get_profile_config(*config_id)) + { + for(const auto& metric : existing->metrics) + { + if(!already_added.emplace(metric.id()).second) continue; + config->metrics.push_back(metric); + } + } + } + + config->agent = agent; + if(auto status = rocprofiler::counters::create_counter_profile(config); + status != ROCPROFILER_STATUS_SUCCESS) + { + return status; + } + *config_id = config->id; return ROCPROFILER_STATUS_SUCCESS; } diff --git a/source/lib/rocprofiler-sdk/rocprofiler.cpp b/source/lib/rocprofiler-sdk/rocprofiler.cpp index d5a1ed1e..37e85fba 100644 --- a/source/lib/rocprofiler-sdk/rocprofiler.cpp +++ b/source/lib/rocprofiler-sdk/rocprofiler.cpp @@ -111,6 +111,8 @@ ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_NOT_AVAILABLE, "The service is not available." "Please refer to API functions that return this status code" "for more information.") +ROCPROFILER_STATUS_STRING(ROCPROFILER_STATUS_ERROR_EXCEEDS_HW_LIMIT, + "Request exceeds the capabilities of the hardware to collect") template const char* diff --git a/tests/thread-trace/single_dispatch.cpp b/tests/thread-trace/single_dispatch.cpp index a911e2d3..b4dd1842 100644 --- a/tests/thread-trace/single_dispatch.cpp +++ b/tests/thread-trace/single_dispatch.cpp @@ -65,7 +65,6 @@ dispatch_callback(rocprofiler_queue_id_t /* queue_id */, ToolData& tool = *reinterpret_cast(userdata); dispatch_userdata->ptr = userdata; - static std::atomic call_id{0}; static std::string_view desired_func_name = "branching_kernel"; try