Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Ported mem-free changes to mono repo #6

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions include/pando-lib-galois/containers/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
#include <pando-rt/memory/allocate_memory.hpp>
#include <pando-rt/memory/global_ptr.hpp>
#include <pando-rt/pando-rt.hpp>
#include <pando-rt/sync/wait_group.hpp>
#include <pando-rt/utility/math.hpp>

namespace galois {
Expand Down Expand Up @@ -105,6 +106,18 @@ class Array {
m_size = 0;
}

/**
* @brief Deinitializes the array.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
static_assert(std::is_trivially_destructible_v<T>,
"Array only supports trivially destructible types");

pando::deallocateMemory(m_data, m_size, wgh);
m_data = nullptr;
m_size = 0;
}

constexpr pando::GlobalPtr<T> get(std::uint64_t pos) noexcept {
return &m_data[pos];
}
Expand Down
16 changes: 16 additions & 0 deletions include/pando-lib-galois/containers/dist_array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,22 @@ class DistArray {
m_data.deinitialize();
}

/**
* @brief Deinitializes the array.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
static_assert(std::is_trivially_destructible_v<T>,
"Array only supports trivially destructible types");

if (m_data.data() == nullptr) {
return;
}
for (pando::Array<T> array : m_data) {
array.deinitialize(wgh);
}
m_data.deinitialize(wgh);
}

constexpr pando::GlobalRef<T> operator[](std::uint64_t pos) noexcept {
return *get(pos);
}
Expand Down
6 changes: 6 additions & 0 deletions include/pando-lib-galois/containers/hashtable.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,12 @@ class HashTable {
m_size = 0;
}

// @brief Deinitialize the hashtable
void deinitialize(pando::WaitGroup::HandleType wgh) {
m_buffer.deinitialize(wgh);
m_size = 0;
}

// @brief Resizes the backing array to `capacity`.
pando::Status resize(std::size_t capacity) {
if (capacity <= m_buffer.size()) {
Expand Down
4 changes: 4 additions & 0 deletions include/pando-lib-galois/containers/host_indexed_map.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ class HostIndexedMap {
deallocateMemory(m_items, getNumHosts());
}

void deinitialize(pando::WaitGroup::HandleType wgh) {
deallocateMemory(m_items, getNumHosts(), wgh);
}

pando::GlobalRef<T> getLocal() noexcept {
return m_items[getCurrentNode()];
}
Expand Down
16 changes: 16 additions & 0 deletions include/pando-lib-galois/containers/per_thread.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,22 @@ class PerThreadVector {
m_data.deinitialize();
}

/**
* @brief Deinitializes the PerThreadVector array.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
if (m_indices.m_data.data() != nullptr) {
m_indices.deinitialize(wgh);
}
if (m_data.m_data.data() == nullptr) {
return;
}
for (pando::Vector<T> vec : m_data) {
vec.deinitialize(wgh);
}
m_data.deinitialize(wgh);
}

/**
* @brief Returns the current hardware thread's vector.
*/
Expand Down
8 changes: 8 additions & 0 deletions include/pando-lib-galois/containers/stack.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,14 @@ class Stack {
m_size = 0;
}

/**
* @brief Deinitializes the container.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
m_buf.deinitialize(wgh);
m_size = 0;
}

bool empty() const {
return size() == 0;
}
Expand Down
16 changes: 16 additions & 0 deletions pando-rt/include/pando-rt/containers/array.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@

#include "../memory/allocate_memory.hpp"
#include "../memory/global_ptr.hpp"
#include "../memory/deallocate_memory_wait.hpp"
#include "../utility/math.hpp"
#include "../sync/wait_group.hpp"

namespace pando {

Expand Down Expand Up @@ -111,6 +113,20 @@ class Array {
m_memoryType = MemoryType::Unknown;
}

/**
* @brief Deinitializes the array.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
// TODO(ypapadop-amd) Only trivially destructible objects are supported, since deinitialize()
// does not call their destructor.
static_assert(std::is_trivially_destructible_v<T>,
"Array only supports trivially destructible types");
deallocateMemory(m_data, m_size, wgh);
m_data = nullptr;
m_size = 0;
m_memoryType = MemoryType::Unknown;
}

MemoryType getMemoryType() const noexcept {
return m_memoryType;
}
Expand Down
8 changes: 8 additions & 0 deletions pando-rt/include/pando-rt/containers/vector.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,14 @@ class Vector {
m_size = 0;
}

/**
* @brief Deinitializes the container.
*/
void deinitialize(pando::WaitGroup::HandleType wgh) {
m_buf.deinitialize(wgh);
m_size = 0;
}

/**
* @brief Returns the memory this vector is allocated in.
*/
Expand Down
95 changes: 95 additions & 0 deletions pando-rt/include/pando-rt/memory/deallocate_memory_wait.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
// SPDX-License-Identifier: MIT
/* Copyright (c) 2023. University of Texas at Austin. All rights reserved. */
/* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved. */

#ifndef PANDO_RT_MEMORY_DEALLOCATE_MEMORY_WAIT_HPP_
#define PANDO_RT_MEMORY_DEALLOCATE_MEMORY_WAIT_HPP_

#include <cstdint>

#include "../execution/execute_on.hpp"
#include "../execution/execute_on_wait.hpp"
#include "../locality.hpp"
#include "../memory/global_ptr.hpp"
#include "../status.hpp"
#include "../stdlib.hpp"
#include "../sync/future.hpp"
#include "../sync/wait_group.hpp"
#include "../utility/expected.hpp"
#include "memory_type.hpp"

namespace pando {

namespace detail {

/**
* @brief Deallocates the memory @p p points to.
*
* @param[in] p pointer to memory to deallocate
* @param[in] size number of bytes to deallocate
* @param[in] wgh wait group that waits on deallocate
*
* @ingroup ROOT
*/
void deallocateMemoryWaitImpl(GlobalPtr<void> p, std::uint64_t size, pando::WaitGroup::HandleType wgh);

} // namespace detail

/**
* @brief Deallocates memory previously allocated with `allocateMemory`.
*
* @tparam T object type to deallocate memory for
*
* @param[in] p pointer to memory to deallocate
* @param[in] n number of objects to @p p points to
* @param[in] wgh wait group that waits on deallocate
*
* @ingroup ROOT
*/
template <typename T>
void deallocateMemory(GlobalPtr<T> p, std::uint64_t n, pando::WaitGroup::HandleType wgh) {
if (p == nullptr) {
return;
}

const auto numBytes = n * sizeof(T);
const auto place = pando::localityOf(p);
const auto memoryType = memoryTypeOf(p);

switch (memoryType) {
case MemoryType::Main: {
// CP / harts have direct access to main memory on their node
if (place.node == getCurrentNode()) {
detail::deallocateMemoryImpl(p, numBytes);
} else {
wgh.addOne();
if (executeOn(place, &detail::deallocateMemoryWaitImpl, static_cast<GlobalPtr<void>>(p),
numBytes, wgh) != Status::Success) {
PANDO_ABORT("Deallocation failed");
}
}
} break;

case MemoryType::L2SP: {
// only harts on the same pod as the requested L2SP have direct access
const auto thisPlace = getCurrentPlace();
if (!isOnCP() && (thisPlace.node == place.node) && (thisPlace.pod == place.pod)) {
detail::deallocateMemoryImpl(p, numBytes);
} else {
if (executeOn(place, &detail::deallocateMemoryWaitImpl, static_cast<GlobalPtr<void>>(p),
numBytes, wgh) != Status::Success) {
PANDO_ABORT("Deallocation failed");
}
}
} break;

case MemoryType::L1SP:
default:
PANDO_ABORT("Invalid pointer to deallocate");
break;
}
}

} // namespace pando

#endif // PANDO_RT_MEMORY_DEALLOCATE_MEMORY_WAIT_HPP_
132 changes: 132 additions & 0 deletions pando-rt/include/pando-rt/sync/wait_group.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2023. University of Texas at Austin. All rights reserved.

#ifndef PANDO_RT_SYNC_WAIT_GROUP_HPP_
#define PANDO_RT_SYNC_WAIT_GROUP_HPP_

#include "../memory/allocate_memory.hpp"
#include "../memory/global_ptr.hpp"
#include "../pando-rt.hpp"
#include "../sync/atomic.hpp"
#include "../sync/notification.hpp"
#include <pando-rt/tracing.hpp>
#include "export.h"

namespace pando {
/**
* @brief This is a termination detection mechanism that is used for detecting nested parallelism
*/
class WaitGroup {
///@brief This is a pointer to the counter used by everyone
pando::GlobalPtr<std::int64_t> m_count = nullptr;

public:
class HandleType {
pando::GlobalPtr<std::int64_t> m_count = nullptr;
explicit HandleType(pando::GlobalPtr<std::int64_t> countPtr) : m_count(countPtr) {}
friend WaitGroup;

public:
HandleType() : m_count(nullptr) {}
HandleType(const HandleType&) = default;
HandleType& operator=(const HandleType&) = default;

HandleType(HandleType&&) = default;
HandleType& operator=(HandleType&&) = default;

/**
* @brief adds a number of more items to arrive at the barrier before release
*
* @param[in] delta the amount of things to wait on
*/
void add(std::uint32_t delta) {
pando::atomicFetchAdd(m_count, static_cast<std::int64_t>(delta), std::memory_order_release);
}
/**
* @brief adds to the barrier to represent one more done to wait on
*/
void addOne() {
add(static_cast<std::uint32_t>(1));
}
/**
* @brief Signals that one of the things in the WaitGroup has completed.
*/
void done() {
pando::atomicDecrement(m_count, static_cast<std::int64_t>(1), std::memory_order_release);
}
};

WaitGroup() : m_count(nullptr) {}

WaitGroup(const WaitGroup&) = delete;
WaitGroup& operator=(const WaitGroup&) = delete;

WaitGroup(WaitGroup&&) = delete;
WaitGroup& operator=(WaitGroup&&) = delete;

/**
* @brief initializes the WaitGroup
*
* @param[in] initialCount the count that the WaitGroup should start with
* @param[in] place the location the counter should be allocated at
* @param[in] memoryType the type of memory the waitgroup should be allocated in
*
* @warning one of the initialize methods must be called before use
*/
[[nodiscard]] pando::Status initialize(std::uint32_t initialCount, pando::Place place,
pando::MemoryType memoryType) {
const auto expected = pando::allocateMemory<std::int64_t>(1, place, memoryType);
if (!expected.hasValue()) {
return expected.error();
}
m_count = expected.value();
*m_count = static_cast<std::int64_t>(initialCount);
pando::atomicThreadFence(std::memory_order_release);
return pando::Status::Success;
}

/**
* @brief initializes the WaitGroup
*
* @param[in] initialCount the count that the WaitGroup should start with
*
* @warning one of the initialize methods must be called before use
*/
[[nodiscard]] pando::Status initialize(std::uint32_t initialCount) {
return initialize(initialCount, pando::getCurrentPlace(), pando::MemoryType::Main);
}

/**
* @brief deinitializes the waitgroup and frees associated memory
*
* @warning not threadsafe but designed to be idempotent.
*/
void deinitialize() {
if (m_count != nullptr) {
pando::deallocateMemory(m_count, 1);
m_count = nullptr;
}
}

HandleType getHandle() {
return HandleType{m_count};
}

/**
* @brief Waits until the number of items to wait on is zero.
*/
[[nodiscard]] pando::Status wait() {
pando::waitUntil([this] {
const bool ready = *m_count <= static_cast<std::int64_t>(0);
return ready;
});
pando::atomicThreadFence(std::memory_order_acquire);
PANDO_MEM_STAT_NEW_PHASE();
if (*m_count < static_cast<std::int64_t>(0)) {
return pando::Status::Error;
}
return pando::Status::Success;
}
};
} // namespace pando
#endif // PANDO_RT_SYNC_WAIT_GROUP_HPP_
Loading