Skip to content

Commit

Permalink
[GPU] Reserve CPU resource for GPU inference (openvinotoolkit#19214)
Browse files Browse the repository at this point in the history
* Update.

* Update.

* Update.

* add test case.

* Update.

* Update test cases.

* Update.

* Update.

* Updated.

* Updated.

* Updated.

---------

Co-authored-by: Chen Peter <[email protected]>
  • Loading branch information
yangwang201911 and peterchen-intel authored Oct 17, 2023
1 parent 494f3d4 commit 86000bb
Show file tree
Hide file tree
Showing 6 changed files with 75 additions and 18 deletions.
34 changes: 18 additions & 16 deletions src/inference/src/dev/threading/cpu_streams_executor_internal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -131,23 +131,25 @@ void reserve_cpu_by_streams_info(const std::vector<std::vector<int>> _streams_in
}

for (size_t i = 0; i < _cpu_mapping_table.size(); i++) {
std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) +
std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) +
std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
for (size_t j = 0; j < stream_conditions.size(); j++) {
if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) !=
stream_conditions[j].end()) {
_stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
_cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status;
if (static_cast<int>(_stream_processors[stream_pos[j]].size()) ==
streams_table[j][THREADS_PER_STREAM]) {
stream_pos[j]++;
stream_num[j]++;
}
if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) {
stream_conditions[j].clear();
if (_cpu_mapping_table[i][CPU_MAP_USED_FLAG] == NOT_USED) {
std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) +
std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) +
std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
for (size_t j = 0; j < stream_conditions.size(); j++) {
if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) !=
stream_conditions[j].end()) {
_stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
_cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status;
if (static_cast<int>(_stream_processors[stream_pos[j]].size()) ==
streams_table[j][THREADS_PER_STREAM]) {
stream_pos[j]++;
stream_num[j]++;
}
if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) {
stream_conditions[j].clear();
}
break;
}
break;
}
}
}
Expand Down
41 changes: 39 additions & 2 deletions src/inference/tests/unit/cpu_reserve_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,12 @@ class LinuxCpuReserveTests : public ov::test::TestsCommon,
test_data._cpu_status);

ASSERT_EQ(test_data._stream_processors, test_processors);
for (size_t i = 0; i < test_processors.size(); i++) {
for (size_t j = 0; j < test_processors[i].size(); j++) {
ASSERT_EQ(test_data._cpu_mapping_table[test_processors[i][j]][CPU_MAP_USED_FLAG],
test_data._cpu_status);
}
}
}
};

Expand Down Expand Up @@ -869,8 +875,6 @@ LinuxCpuReserveTestCase _1socket_16cores_hyper_4streams_ecoreonly = {
},
NOT_USED,
};
// streams_info_table={1, MAIN_CORE_PROC, 36}, but the number of physical cores is 18,
// in this case, threads are assigned on physical and logical cores.
LinuxCpuReserveTestCase _1socket_18cores_hyper_1streams = {
36,
1,
Expand Down Expand Up @@ -933,6 +937,38 @@ LinuxCpuReserveTestCase _1socket_18cores_hyper_2streams = {
},
NOT_USED,
};
// other plugin reserved 2 threads
LinuxCpuReserveTestCase _1socket_18cores_hyper_plugin_reserve_2threads = {
36,
1,
{{36, 18, 0, 18, 0, 0}},
{
{0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1}, {1, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
{2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1}, {3, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
{4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1}, {5, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
{6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1}, {7, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
{8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1}, {9, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
{10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
{12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
{14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
{16, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 0, 0, 9, HYPER_THREADING_PROC, 8, -1},
{18, 0, 0, 0, MAIN_CORE_PROC, 0, CPU_USED}, {19, 0, 0, 1, MAIN_CORE_PROC, 1, CPU_USED},
{20, 0, 0, 2, MAIN_CORE_PROC, 2, -1}, {21, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
{22, 0, 0, 4, MAIN_CORE_PROC, 4, -1}, {23, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
{24, 0, 0, 6, MAIN_CORE_PROC, 6, -1}, {25, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
{26, 0, 0, 8, MAIN_CORE_PROC, 8, -1}, {27, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
{28, 0, 0, 10, MAIN_CORE_PROC, 10, -1}, {29, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
{30, 0, 0, 12, MAIN_CORE_PROC, 12, -1}, {31, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
{32, 0, 0, 14, MAIN_CORE_PROC, 14, -1}, {33, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
{34, 0, 0, 16, MAIN_CORE_PROC, 16, -1}, {35, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
},
{{16, MAIN_CORE_PROC, 1, 0, 0}, {16, HYPER_THREADING_PROC, 1, 0, 0}},
{
{20}, {21}, {22}, {23}, {24}, {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}, {33}, {34}, {35},
{0}, {1}, {2}, {3}, {4}, {5}, {6}, {7}, {8}, {9}, {10}, {11}, {12}, {13}, {14}, {15},
},
NOT_USED,
};
LinuxCpuReserveTestCase _1socket_32cores_hyper_1streams = {
32,
1,
Expand Down Expand Up @@ -986,6 +1022,7 @@ INSTANTIATE_TEST_SUITE_P(CPUReserve,
_1socket_16cores_hyper_4streams_ecoreonly,
_1socket_18cores_hyper_1streams,
_1socket_18cores_hyper_2streams,
_1socket_18cores_hyper_plugin_reserve_2threads,
_1socket_32cores_hyper_1streams));
#endif
} // namespace
15 changes: 15 additions & 0 deletions src/plugins/intel_gpu/src/plugin/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,20 @@ std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::sh
if (config.get_property(ov::internal::exclusive_async_requests)) {
//exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
return plugin->get_executor_manager()->get_executor("GPU");
} else if (config.get_property(ov::hint::enable_cpu_pinning)) {
auto executor_config =
ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
0,
0,
ov::threading::IStreamsExecutor::ThreadBindingType::CORES,
1,
0,
0,
ov::threading::IStreamsExecutor::Config::PreferredCoreType::BIG,
{{config.get_property(ov::num_streams), MAIN_CORE_PROC, 1, 0, 0}},
true};
auto post_config = ov::threading::IStreamsExecutor::Config::reserve_cpu_threads(executor_config);
return std::make_shared<ov::threading::CPUStreamsExecutor>(post_config);
} else {
return std::make_shared<ov::threading::CPUStreamsExecutor>(
ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)});
Expand Down Expand Up @@ -250,6 +264,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {

// Configs
ov::PropertyName{ov::enable_profiling.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RO},
ov::PropertyName{ov::hint::model_priority.name(), PropertyMutability::RO},
ov::PropertyName{ov::intel_gpu::hint::host_task_priority.name(), PropertyMutability::RO},
ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO},
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/plugin/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -593,6 +593,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
};

Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_gpu/src/runtime/execution_config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ void ExecutionConfig::set_default() {
std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()),
std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
std::make_tuple(ov::hint::num_requests, 0),
std::make_tuple(ov::hint::enable_cpu_pinning, false),

std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM),
std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ const std::vector<ov::AnyMap> gpu_setcore_properties = {
const std::vector<ov::AnyMap> gpu_compileModel_properties = {
{ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
ov::hint::num_requests(10),
ov::hint::enable_cpu_pinning(true),
ov::enable_profiling(true)}};

INSTANTIATE_TEST_SUITE_P(smoke_gpuCompileModelBehaviorTests,
Expand Down

0 comments on commit 86000bb

Please sign in to comment.