Skip to content

Commit

Permalink
[CPU] Refactor memory control and allocation
Browse files Browse the repository at this point in the history
  • Loading branch information
EgorDuplensky committed Nov 11, 2024
1 parent 3c5744e commit b002dd0
Show file tree
Hide file tree
Showing 87 changed files with 1,000 additions and 351 deletions.
2 changes: 2 additions & 0 deletions src/inference/dev_api/openvino/runtime/memory_solver.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,13 +52,15 @@ class MemorySolver {
struct Box {
/** Execution order index of first use. The data will be produced here. */
int start;
// intel_cpu::GlobalExecutionIndex start;

/**
* The execution order index of last use. After that data will be released.
* -1 is a reserved value for "till to end". The data will be alive to very
* end of execution.
*/
int finish;
// intel_cpu::GlobalExecutionIndex finish;

/** Size of data. In abstract unit of measure (byte, simd, cache line, ...) */
int64_t size;
Expand Down
26 changes: 26 additions & 0 deletions src/plugins/intel_cpu/src/allocation_context.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
// Copyright (C) 2024 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
//

#pragma once

#include <memory>
#include <unordered_map>
#include <vector>

namespace ov {
namespace intel_cpu {

class Node;
class Edge;

using GlobalExecutionIndex = std::unordered_map<std::shared_ptr<Node>, std::pair<int, int>>;

struct AllocationContext {
std::vector<std::shared_ptr<Edge>> edges;
GlobalExecutionIndex execIndex;
std::vector<size_t> syncPoints;
};

} // namespace intel_cpu
} // namespace ov
20 changes: 15 additions & 5 deletions src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include "compiled_model.h"
#include "async_infer_request.h"
#include "graph.h"
#include "infer_request.h"
#include "itt.h"
#include "low_precision/low_precision.hpp"
Expand All @@ -19,6 +20,7 @@
#include "openvino/runtime/threading/cpu_streams_info.hpp"
#include "openvino/runtime/threading/cpu_message.hpp"
#include "utils/serialize.hpp"
#include "memory_control.hpp"

#include "cpu/x64/cpu_isa_traits.hpp"
#include <cstring>
Expand Down Expand Up @@ -52,7 +54,8 @@ CompiledModel::CompiledModel(const std::shared_ptr<ov::Model>& model,
m_cfg{cfg},
m_name{model->get_name()},
m_loaded_from_cache(loaded_from_cache),
m_sub_memory_manager(sub_memory_manager) {
m_sub_memory_manager(sub_memory_manager),
m_networkMemoryControl(std::make_shared<NetworkMemoryControl>()) {
m_mutex = std::make_shared<std::mutex>();
const auto& core = m_plugin->get_core();
if (!core)
Expand Down Expand Up @@ -155,17 +158,24 @@ CompiledModel::GraphGuard::Lock CompiledModel::get_graph() const {
GraphContext::Ptr ctx;
{
std::lock_guard<std::mutex> lock{*m_mutex.get()};
MemoryControl* memoryControl = m_networkMemoryControl->createMemoryControlUnit();
auto isQuantizedFlag = (m_cfg.lpTransformsMode == Config::On) &&
ov::pass::low_precision::LowPrecision::isFunctionQuantized(m_model);

ctx = std::make_shared<GraphContext>(m_cfg,
m_socketWeights[socketId],
isQuantizedFlag,
memoryControl,
m_networkMemoryControl,
streamsExecutor,
m_sub_memory_manager);
m_sub_memory_manager,
true);
}

const std::shared_ptr<const ov::Model> model = m_model;
graphLock._graph.CreateGraph(model, ctx);
// @todo propagate input / output memory descriptors
graphLock._graph.Init(model, ctx);
// @todo pass input / output memory
graphLock._graph.Activate();
} catch (...) {
exception = std::current_exception();
}
Expand Down Expand Up @@ -346,7 +356,7 @@ void CompiledModel::release_memory() {
for (auto&& graph : m_graphs) {
GraphGuard::Lock graph_lock{graph};
auto ctx = graph_lock._graph.getGraphContext();
ctx->getNetworkMemoryControl()->releaseMemory();
m_networkMemoryControl->releaseMemory();
}
}

Expand Down
8 changes: 8 additions & 0 deletions src/plugins/intel_cpu/src/compiled_model.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#pragma once

#include <memory>
#include <string>
#include <vector>

Expand All @@ -19,6 +20,8 @@
namespace ov {
namespace intel_cpu {

class NetworkMemoryControl;

class CompiledModel : public ov::ICompiledModel {
public:
typedef std::shared_ptr<CompiledModel> Ptr;
Expand Down Expand Up @@ -51,6 +54,10 @@ class CompiledModel : public ov::ICompiledModel {

void release_memory() override;

std::shared_ptr<NetworkMemoryControl> get_network_memory_control() const {
return m_networkMemoryControl;
}

private:
std::shared_ptr<ov::ISyncInferRequest> create_sync_infer_request() const override;
friend class SyncInferRequest;
Expand Down Expand Up @@ -91,6 +98,7 @@ class CompiledModel : public ov::ICompiledModel {

std::vector<std::shared_ptr<CompiledModel>> m_sub_compiled_models;
std::shared_ptr<SubMemoryManager> m_sub_memory_manager = nullptr;
std::shared_ptr<NetworkMemoryControl> m_networkMemoryControl = nullptr;
bool m_has_sub_compiled_models = false;
};

Expand Down
12 changes: 10 additions & 2 deletions src/plugins/intel_cpu/src/edge.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ Edge::ReorderStatus Edge::needReorder() {
}

void Edge::reuse(MemoryPtr ptr) {
OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse initialized memory in " + name());
OPENVINO_ASSERT(ptr != nullptr, "Attempt to reuse uninitialized memory in " + name());
memoryPtr = ptr;
changeStatus(Status::Allocated);

Expand Down Expand Up @@ -293,6 +293,11 @@ std::string Edge::name() const {
std::stringstream result;

result << parentPtr->getName() << " port " << parent_port << " <-> " << childPtr->getName() << " port " << child_port;
// result << parentPtr->getName()<< " port " << parent_port
// << " <-> "
// << childPtr->getName() << " port " << child_port
// << " status: "
// << static_cast<int>(getStatus());

return result.str();
}
Expand Down Expand Up @@ -411,6 +416,9 @@ const MemoryDesc& Edge::getOutputDesc() const {
}

const MemoryDesc& Edge::getDesc() const {
OPENVINO_ASSERT(!one_of(status, Status::Validated, Status::Allocated),
"Desc of an Allocated edge ", name(), " must be accessed through the memory object");

if (!getInputDesc().isCompatible(getOutputDesc()))
OPENVINO_THROW("Cannot get descriptor for edge: ", getParent()->getName(), "->", getChild()->getName());

Expand Down Expand Up @@ -441,7 +449,7 @@ void Edge::validate() {
getChild();

if (status != Status::Allocated || !memoryPtr) {
OPENVINO_THROW("Error memory is not allocated!");
OPENVINO_THROW("Error memory is not allocated for edge: ", name());
}
status = Status::Validated;
}
Expand Down
12 changes: 6 additions & 6 deletions src/plugins/intel_cpu/src/edge.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ class Edge {
int pr_port = 0, int ch_port = 0);

enum class Status {
Uninitialized,
NeedAllocation,
NotAllocated,
Allocated,
Validated
Uninitialized, // base edge is unknown yet
NeedAllocation, // edge is the base edge
NotAllocated, // edge is a referencing edge
Allocated, // edge memory is allocated
Validated // edge is validated
};

enum class ReorderStatus {
Expand Down Expand Up @@ -82,6 +82,7 @@ class Edge {
}

std::string name() const;
const MemoryDesc& getDesc() const;

private:
std::weak_ptr<Node> parent;
Expand All @@ -99,7 +100,6 @@ class Edge {
PortDescBaseCPtr getInputPortDesc() const;
PortDescBaseCPtr getOutputPortDesc() const;

const MemoryDesc& getDesc() const;
bool enforceReorder();

void collectConsumers(std::vector<std::shared_ptr<Node>>& result) const;
Expand Down
Loading

0 comments on commit b002dd0

Please sign in to comment.