[GPU] Reserve CPU resource for GPU inference (openvinotoolkit#19214)

* Update. * Update. * Update. * add test case. * Update. * Update test cases. * Update. * Update. * Updated. * Updated. * Updated. --------- Co-authored-by: Chen Peter <[email protected]>
kblaszczak-intel · Oct 17, 2023 · 86000bb · 86000bb
1 parent 494f3d4
commit 86000bb
Show file tree

Hide file tree

Showing 6 changed files with 75 additions and 18 deletions.
diff --git a/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp b/src/inference/src/dev/threading/cpu_streams_executor_internal.cpp
@@ -131,23 +131,25 @@ void reserve_cpu_by_streams_info(const std::vector<std::vector<int>> _streams_in
     }
 
     for (size_t i = 0; i < _cpu_mapping_table.size(); i++) {
-        std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) +
-                                 std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) +
-                                 std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
-        for (size_t j = 0; j < stream_conditions.size(); j++) {
-            if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) !=
-                stream_conditions[j].end()) {
-                _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
-                _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status;
-                if (static_cast<int>(_stream_processors[stream_pos[j]].size()) ==
-                    streams_table[j][THREADS_PER_STREAM]) {
-                    stream_pos[j]++;
-                    stream_num[j]++;
-                }
-                if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) {
-                    stream_conditions[j].clear();
+        if (_cpu_mapping_table[i][CPU_MAP_USED_FLAG] == NOT_USED) {
+            std::string cpu_string = std::to_string(_cpu_mapping_table[i][CPU_MAP_CORE_TYPE]) +
+                                     std::to_string(_cpu_mapping_table[i][CPU_MAP_NUMA_NODE_ID]) +
+                                     std::to_string(_cpu_mapping_table[i][CPU_MAP_SOCKET_ID]);
+            for (size_t j = 0; j < stream_conditions.size(); j++) {
+                if (std::find(stream_conditions[j].begin(), stream_conditions[j].end(), cpu_string) !=
+                    stream_conditions[j].end()) {
+                    _stream_processors[stream_pos[j]].push_back(_cpu_mapping_table[i][CPU_MAP_PROCESSOR_ID]);
+                    _cpu_mapping_table[i][CPU_MAP_USED_FLAG] = _cpu_status;
+                    if (static_cast<int>(_stream_processors[stream_pos[j]].size()) ==
+                        streams_table[j][THREADS_PER_STREAM]) {
+                        stream_pos[j]++;
+                        stream_num[j]++;
+                    }
+                    if (stream_num[j] >= streams_table[j][NUMBER_OF_STREAMS]) {
+                        stream_conditions[j].clear();
+                    }
+                    break;
                 }
-                break;
             }
         }
     }

diff --git a/src/inference/tests/unit/cpu_reserve_test.cpp b/src/inference/tests/unit/cpu_reserve_test.cpp
@@ -44,6 +44,12 @@ class LinuxCpuReserveTests : public ov::test::TestsCommon,
                                                    test_data._cpu_status);
 
         ASSERT_EQ(test_data._stream_processors, test_processors);
+        for (size_t i = 0; i < test_processors.size(); i++) {
+            for (size_t j = 0; j < test_processors[i].size(); j++) {
+                ASSERT_EQ(test_data._cpu_mapping_table[test_processors[i][j]][CPU_MAP_USED_FLAG],
+                          test_data._cpu_status);
+            }
+        }
     }
 };
 
@@ -869,8 +875,6 @@ LinuxCpuReserveTestCase _1socket_16cores_hyper_4streams_ecoreonly = {
     },
     NOT_USED,
 };
-// streams_info_table={1, MAIN_CORE_PROC, 36}, but the number of physical cores is 18,
-// in this case, threads are assigned on physical and logical cores.
 LinuxCpuReserveTestCase _1socket_18cores_hyper_1streams = {
     36,
     1,
@@ -933,6 +937,38 @@ LinuxCpuReserveTestCase _1socket_18cores_hyper_2streams = {
     },
     NOT_USED,
 };
+// other plugin reserved 2 threads
+LinuxCpuReserveTestCase _1socket_18cores_hyper_plugin_reserve_2threads = {
+    36,
+    1,
+    {{36, 18, 0, 18, 0, 0}},
+    {
+        {0, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},  {1, 0, 0, 0, HYPER_THREADING_PROC, 0, -1},
+        {2, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},  {3, 0, 0, 1, HYPER_THREADING_PROC, 1, -1},
+        {4, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},  {5, 0, 0, 2, HYPER_THREADING_PROC, 2, -1},
+        {6, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},  {7, 0, 0, 3, HYPER_THREADING_PROC, 3, -1},
+        {8, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},  {9, 0, 0, 4, HYPER_THREADING_PROC, 4, -1},
+        {10, 0, 0, 5, HYPER_THREADING_PROC, 5, -1}, {11, 0, 0, 5, HYPER_THREADING_PROC, 5, -1},
+        {12, 0, 0, 6, HYPER_THREADING_PROC, 6, -1}, {13, 0, 0, 6, HYPER_THREADING_PROC, 6, -1},
+        {14, 0, 0, 7, HYPER_THREADING_PROC, 7, -1}, {15, 0, 0, 7, HYPER_THREADING_PROC, 7, -1},
+        {16, 0, 0, 8, HYPER_THREADING_PROC, 8, -1}, {17, 0, 0, 9, HYPER_THREADING_PROC, 8, -1},
+        {18, 0, 0, 0, MAIN_CORE_PROC, 0, CPU_USED}, {19, 0, 0, 1, MAIN_CORE_PROC, 1, CPU_USED},
+        {20, 0, 0, 2, MAIN_CORE_PROC, 2, -1},       {21, 0, 0, 3, MAIN_CORE_PROC, 3, -1},
+        {22, 0, 0, 4, MAIN_CORE_PROC, 4, -1},       {23, 0, 0, 5, MAIN_CORE_PROC, 5, -1},
+        {24, 0, 0, 6, MAIN_CORE_PROC, 6, -1},       {25, 0, 0, 7, MAIN_CORE_PROC, 7, -1},
+        {26, 0, 0, 8, MAIN_CORE_PROC, 8, -1},       {27, 0, 0, 9, MAIN_CORE_PROC, 9, -1},
+        {28, 0, 0, 10, MAIN_CORE_PROC, 10, -1},     {29, 0, 0, 11, MAIN_CORE_PROC, 11, -1},
+        {30, 0, 0, 12, MAIN_CORE_PROC, 12, -1},     {31, 0, 0, 13, MAIN_CORE_PROC, 13, -1},
+        {32, 0, 0, 14, MAIN_CORE_PROC, 14, -1},     {33, 0, 0, 15, MAIN_CORE_PROC, 15, -1},
+        {34, 0, 0, 16, MAIN_CORE_PROC, 16, -1},     {35, 0, 0, 17, MAIN_CORE_PROC, 17, -1},
+    },
+    {{16, MAIN_CORE_PROC, 1, 0, 0}, {16, HYPER_THREADING_PROC, 1, 0, 0}},
+    {
+        {20}, {21}, {22}, {23}, {24}, {25}, {26}, {27}, {28}, {29}, {30}, {31}, {32}, {33}, {34}, {35},
+        {0},  {1},  {2},  {3},  {4},  {5},  {6},  {7},  {8},  {9},  {10}, {11}, {12}, {13}, {14}, {15},
+    },
+    NOT_USED,
+};
 LinuxCpuReserveTestCase _1socket_32cores_hyper_1streams = {
     32,
     1,
@@ -986,6 +1022,7 @@ INSTANTIATE_TEST_SUITE_P(CPUReserve,
                                          _1socket_16cores_hyper_4streams_ecoreonly,
                                          _1socket_18cores_hyper_1streams,
                                          _1socket_18cores_hyper_2streams,
+                                         _1socket_18cores_hyper_plugin_reserve_2threads,
                                          _1socket_32cores_hyper_1streams));
 #endif
 }  // namespace
diff --git a/src/plugins/intel_gpu/src/plugin/compiled_model.cpp b/src/plugins/intel_gpu/src/plugin/compiled_model.cpp
@@ -35,6 +35,20 @@ std::shared_ptr<ov::threading::ITaskExecutor> create_task_executor(const std::sh
     if (config.get_property(ov::internal::exclusive_async_requests)) {
         //exclusive_async_requests essentially disables the streams (and hence should be checked first) => aligned with the CPU behavior
         return plugin->get_executor_manager()->get_executor("GPU");
+    } else if (config.get_property(ov::hint::enable_cpu_pinning)) {
+        auto executor_config =
+            ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor",
+                                                    0,
+                                                    0,
+                                                    ov::threading::IStreamsExecutor::ThreadBindingType::CORES,
+                                                    1,
+                                                    0,
+                                                    0,
+                                                    ov::threading::IStreamsExecutor::Config::PreferredCoreType::BIG,
+                                                    {{config.get_property(ov::num_streams), MAIN_CORE_PROC, 1, 0, 0}},
+                                                    true};
+        auto post_config = ov::threading::IStreamsExecutor::Config::reserve_cpu_threads(executor_config);
+        return std::make_shared<ov::threading::CPUStreamsExecutor>(post_config);
     } else {
         return std::make_shared<ov::threading::CPUStreamsExecutor>(
             ov::threading::IStreamsExecutor::Config{"Intel GPU plugin executor", config.get_property(ov::num_streams)});
@@ -250,6 +264,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
 
             // Configs
             ov::PropertyName{ov::enable_profiling.name(), PropertyMutability::RO},
+            ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RO},
             ov::PropertyName{ov::hint::model_priority.name(), PropertyMutability::RO},
             ov::PropertyName{ov::intel_gpu::hint::host_task_priority.name(), PropertyMutability::RO},
             ov::PropertyName{ov::intel_gpu::hint::queue_priority.name(), PropertyMutability::RO},

diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp
@@ -593,6 +593,7 @@ std::vector<ov::PropertyName> Plugin::get_supported_properties() const {
         ov::PropertyName{ov::num_streams.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::num_requests.name(), PropertyMutability::RW},
         ov::PropertyName{ov::hint::inference_precision.name(), PropertyMutability::RW},
+        ov::PropertyName{ov::hint::enable_cpu_pinning.name(), PropertyMutability::RW},
         ov::PropertyName{ov::device::id.name(), PropertyMutability::RW},
     };
 

diff --git a/src/plugins/intel_gpu/src/runtime/execution_config.cpp b/src/plugins/intel_gpu/src/runtime/execution_config.cpp
@@ -48,6 +48,7 @@ void ExecutionConfig::set_default() {
         std::make_tuple(ov::hint::performance_mode, ov::hint::PerformanceMode::LATENCY, PerformanceModeValidator()),
         std::make_tuple(ov::hint::execution_mode, ov::hint::ExecutionMode::PERFORMANCE),
         std::make_tuple(ov::hint::num_requests, 0),
+        std::make_tuple(ov::hint::enable_cpu_pinning, false),
 
         std::make_tuple(ov::intel_gpu::hint::host_task_priority, ov::hint::Priority::MEDIUM),
         std::make_tuple(ov::intel_gpu::hint::queue_throttle, ov::intel_gpu::hint::ThrottleLevel::MEDIUM),

diff --git a/...intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp b/...intel_gpu/tests/functional/shared_tests_instances/behavior/ov_plugin/properties_tests.cpp
@@ -42,6 +42,7 @@ const std::vector<ov::AnyMap> gpu_setcore_properties = {
 const std::vector<ov::AnyMap> gpu_compileModel_properties = {
     {ov::hint::performance_mode(ov::hint::PerformanceMode::LATENCY),
      ov::hint::num_requests(10),
+     ov::hint::enable_cpu_pinning(true),
      ov::enable_profiling(true)}};
 
 INSTANTIATE_TEST_SUITE_P(smoke_gpuCompileModelBehaviorTests,