Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rocr/aie: AIE Queue Processing #251

Draft
wants to merge 6 commits into
base: amd-staging
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 14 additions & 3 deletions runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,8 @@ hsa_status_t XdnaDriver::GetAgentProperties(core::Agent &agent) const {
return HSA_STATUS_ERROR;
}

aie_agent.SetNumCols(aie_metadata.cols);
// Right now can only target N-1 columns
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We may need some additional context here on why we can only target N-1 columns (e.g., current hardware limitations, something else).

aie_agent.SetNumCols(aie_metadata.cols - 1);
aie_agent.SetNumCoreRows(aie_metadata.core.row_count);

return HSA_STATUS_SUCCESS;
Expand Down Expand Up @@ -147,7 +148,7 @@ XdnaDriver::AllocateMemory(const core::MemoryRegion &mem_region,
}

if (m_region.kernarg()) {
create_bo_args.type = AMDXDNA_BO_CMD;
create_bo_args.type = AMDXDNA_BO_SHMEM;
} else {
create_bo_args.type = AMDXDNA_BO_DEV;
}
Expand Down Expand Up @@ -216,7 +217,7 @@ hsa_status_t XdnaDriver::CreateQueue(core::Queue &queue) const {
// TODO: Make this configurable.
.max_opc = 0x800,
// This field is for the number of core tiles.
.num_tiles = aie_agent.GetNumCores(),
.num_tiles = static_cast<uint32_t>(aie_agent.GetNumCores()),
.mem_size = 0,
.umq_doorbell = 0};

Expand Down Expand Up @@ -316,6 +317,16 @@ hsa_status_t XdnaDriver::InitDeviceHeap() {
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could return a const std::unordered_map<uint32_t, void*>& and avoid the copy here. Do we ever expect the function to fail?

vmem_handle_mappings = this->vmem_handle_mappings;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetFd(int &fd) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, we can just return the fd.

fd = fd_;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::FreeDeviceHeap() {
if (dev_heap_parent) {
munmap(dev_heap_parent, dev_heap_align * 2 - 1);
Expand Down
6 changes: 6 additions & 0 deletions runtime/hsa-runtime/core/inc/amd_aie_agent.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ class AieAgent : public core::Agent {
return system_allocator_;
}

/// @brief Getter for the AIE system deallocator.
const std::function<void(void*)>& system_deallocator() const { return system_deallocator_; }

// AIE agent methods.
/// @brief Get the number of columns on this AIE agent.
int GetNumCols() const { return num_cols_; }
Expand All @@ -117,6 +120,9 @@ class AieAgent : public core::Agent {
core::MemoryRegion::AllocateFlags flags)>
system_allocator_;


std::function<void(void*)> system_deallocator_;

const hsa_profile_t profile_ = HSA_PROFILE_BASE;
const uint32_t min_aql_size_ = 0x40;
const uint32_t max_aql_size_ = 0x40;
Expand Down
76 changes: 71 additions & 5 deletions runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,35 @@
#include "core/inc/queue.h"
#include "core/inc/runtime.h"
#include "core/inc/signal.h"
#include "core/util/locks.h"

/*
* Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
*/
struct amdxdna_cmd_chain {
__u32 command_count;
__u32 submit_index;
__u32 error_index;
__u32 reserved[3];
__u64 data[] __counted_by(command_count);
};


/* Exec buffer command header format */
struct amdxdna_cmd {
union {
struct {
__u32 state : 4;
__u32 unused : 6;
__u32 extra_cu_masks : 2;
__u32 count : 11;
__u32 opcode : 5;
__u32 reserved : 4;
};
__u32 header;
};
__u32 data[] __counted_by(count);
};

namespace rocr {
namespace AMD {
Expand All @@ -71,7 +99,7 @@ class AieAqlQueue : public core::Queue,

AieAqlQueue() = delete;
AieAqlQueue(AieAgent *agent, size_t req_size_pkts, uint32_t node_id);
~AieAqlQueue();
~AieAqlQueue() override;

hsa_status_t Inactivate() override;
hsa_status_t SetPriority(HSA_QUEUE_PRIORITY priority) override;
Expand Down Expand Up @@ -100,7 +128,7 @@ class AieAqlQueue : public core::Queue,
void *value) override;

// AIE-specific API
AieAgent &GetAgent() { return agent_; }
AieAgent &GetAgent() const { return agent_; }
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If you're giving non-const access to agent_ then this function should be non-const.

void SetHwCtxHandle(uint32_t hw_ctx_handle) {
hw_ctx_handle_ = hw_ctx_handle;
}
Expand All @@ -116,7 +144,7 @@ class AieAqlQueue : public core::Queue,
hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
hsa_signal_t *signal = NULL) override;

uint32_t queue_id_ = INVALID_QUEUEID;
uint64_t queue_id_ = INVALID_QUEUEID;
/// @brief ID of AIE device on which this queue has been mapped.
uint32_t node_id_ = std::numeric_limits<uint32_t>::max();
/// @brief Queue size in bytes.
Expand All @@ -131,6 +159,44 @@ class AieAqlQueue : public core::Queue,
/// @brief Base of the queue's ring buffer storage.
void *ring_buf_ = nullptr;

static hsa_status_t SubmitCmd(
uint32_t hw_ctx_handle, int fd, void *queue_base,
uint64_t read_dispatch_id, uint64_t write_dispatch_id,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings);

/// @brief Creates a command BO and returns a pointer to the memory and
// the corresponding handle
///
/// @param size size of memory to allocate
/// @param handle A pointer to the BO handle
/// @param cmd A pointer to the buffer
static hsa_status_t CreateCmd(uint32_t size, uint32_t *handle,
amdxdna_cmd **cmd, int fd);

/// @brief Adds all BOs in a command packet payload to a vector
/// and replaces the handles with a virtual address
///
/// @param count Number of entries in the command
/// @param bo_args A pointer to a vector that contains all bo handles
/// @param cmd_pkt_payload A pointer to the payload of the command
static void RegisterCmdBOs(
uint32_t count, std::vector<uint32_t> &bo_args,
hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload,
std::unordered_map<uint32_t, void *> &vmem_handle_mappings);

/// @brief Syncs all BOs referenced in bo_args
///
/// @param bo_args vector containing handles of BOs to sync
static hsa_status_t SyncBos(std::vector<uint32_t> &bo_args, int fd);

/// @brief Executes a command and waits for its completion
///
/// @param exec_cmd Structure containing the details of the command to execute
/// @param hw_ctx_handle the handle of the hardware context to run this
/// command
static hsa_status_t ExecCmdAndWait(amdxdna_drm_exec_cmd *exec_cmd,
uint32_t hw_ctx_handle, int fd);

/// @brief Handle for an application context on the AIE device.
///
/// Each user queue will have an associated context. This handle is assigned
Expand All @@ -154,4 +220,4 @@ class AieAqlQueue : public core::Queue,
} // namespace AMD
} // namespace rocr

#endif // header guard
#endif // HSA_RUNTIME_CORE_INC_AMD_HW_AQL_AIE_COMMAND_PROCESSOR_H_
8 changes: 4 additions & 4 deletions runtime/hsa-runtime/core/inc/amd_xdna_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

#include "core/inc/driver.h"
#include "core/inc/memory_region.h"
#include "core/driver/xdna/uapi/amdxdna_accel.h"

namespace rocr {
namespace core {
Expand All @@ -69,6 +70,9 @@ class XdnaDriver : public core::Driver {
hsa_status_t Init() override;
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;

hsa_status_t GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings);
hsa_status_t GetFd(int &fd);

hsa_status_t GetAgentProperties(core::Agent &agent) const override;
hsa_status_t
GetMemoryProperties(uint32_t node_id,
Expand Down Expand Up @@ -110,10 +114,6 @@ class XdnaDriver : public core::Driver {
void *dev_heap_aligned = nullptr;
static constexpr size_t dev_heap_size = 48 * 1024 * 1024;
static constexpr size_t dev_heap_align = 64 * 1024 * 1024;

/// @brief DRM buffer object handle for the device heap. Assigned by the
/// kernel-mode driver.
uint32_t dev_heap_handle = 0;
};

} // namespace AMD
Expand Down
2 changes: 2 additions & 0 deletions runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,8 @@ void AieAgent::InitAllocators() {
? mem
: nullptr;
};

system_deallocator_ = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); };
break;
}
}
Expand Down
Loading