Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable new property model_distribution_policy for CPU inference #23077

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
Show all changes
37 commits
Select commit Hold shift + click to select a range
13319df
initial implementation
wangleis Feb 26, 2024
68bb894
update for test case
wangleis Feb 26, 2024
14c3f27
update for comments
wangleis Feb 26, 2024
1f6a0ca
Merge branch 'master' into property_max_threads_per_stream
wangleis Feb 26, 2024
5724b77
update for python
wangleis Feb 26, 2024
65c312b
update for python
wangleis Feb 26, 2024
d5f43a0
update for python
wangleis Feb 26, 2024
0b09543
update for python
wangleis Feb 26, 2024
d4ef1e0
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 1, 2024
fe5173d
change default value to PER_SOCKET
wangleis Mar 13, 2024
9a1cc28
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 13, 2024
19c1ed2
update property name and value
wangleis Mar 17, 2024
ba6d37f
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 17, 2024
562b01a
update code style
wangleis Mar 17, 2024
9c4a951
update property name and value
wangleis Mar 18, 2024
ad744b8
support combined properties
wangleis Mar 18, 2024
d456451
update code style
wangleis Mar 19, 2024
911a79e
update test case for combined properties
wangleis Mar 19, 2024
9f1189f
update test case for combined properties
wangleis Mar 19, 2024
6858272
update test case for combined properties
wangleis Mar 19, 2024
7d0af10
update for combined properties
wangleis Mar 19, 2024
f6d3bdd
Merge branch 'master' into property_max_threads_per_stream
wangleis Mar 19, 2024
03d09e8
remove CAPI interface
wangleis Mar 20, 2024
eef60ac
draft implementation for std::set value
wangleis Mar 20, 2024
77f30a9
update c++ implementation for std::set value
wangleis Mar 20, 2024
5269cac
update c++ implementation for std::set value
wangleis Mar 20, 2024
018eabb
update c++ implementation for std::set value
wangleis Mar 20, 2024
922554c
remove unused function
wangleis Mar 20, 2024
828e583
update python
wangleis Mar 20, 2024
73ce757
update python test case
wangleis Mar 20, 2024
d18568e
update python code style
wangleis Mar 20, 2024
988cb56
update python code style
wangleis Mar 20, 2024
e9d2590
update code style
wangleis Mar 20, 2024
9db4500
update for comments
wangleis Mar 21, 2024
c779701
update for typo
wangleis Mar 21, 2024
0ae8b3e
remove value NONE for ModelDistributionPolicy
wangleis Mar 21, 2024
88d9929
fix typo
wangleis Mar 21, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/bindings/c/docs/api_overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -309,6 +309,8 @@ OPENVINO_C_VAR(const char*) ov_property_key_affinity;

OPENVINO_C_VAR(const char*) ov_property_key_inference_num_threads;

OPENVINO_C_VAR(const char*) ov_property_key_hint_max_threads_per_stream;

OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_cpu_pinning;

OPENVINO_C_VAR(const char*) ov_property_key_hint_enable_hyper_threading;
Expand Down
7 changes: 7 additions & 0 deletions src/bindings/c/include/openvino/c/ov_property.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,13 @@ ov_property_key_affinity;
OPENVINO_C_VAR(const char*)
ov_property_key_inference_num_threads;

/**
* @brief Read-write property<int32_t string> to set/get the maximum number of threads per stream of CPU inference.
* @ingroup ov_property_c_api
*/
OPENVINO_C_VAR(const char*)
ov_property_key_hint_max_threads_per_stream;

/**
* @brief Read-write property, it is high-level OpenVINO hint for using CPU pinning to bind CPU threads to processors
* during inference
Expand Down
1 change: 1 addition & 0 deletions src/bindings/c/src/ov_property.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ const char* ov_property_key_cache_mode = "CACHE_MODE";
const char* ov_property_key_num_streams = "NUM_STREAMS";
const char* ov_property_key_affinity = "AFFINITY";
const char* ov_property_key_inference_num_threads = "INFERENCE_NUM_THREADS";
const char* ov_property_key_hint_max_threads_per_stream = "MAX_THREADS_PER_STREAM";
const char* ov_property_key_hint_performance_mode = "PERFORMANCE_HINT";
const char* ov_property_key_hint_enable_cpu_pinning = "ENABLE_CPU_PINNING";
const char* ov_property_key_hint_scheduling_core_type = "SCHEDULING_CORE_TYPE";
Expand Down
8 changes: 8 additions & 0 deletions src/bindings/c/tests/ov_core_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,14 @@ TEST_P(ov_core_test, ov_core_set_property_enum_invalid) {
EXPECT_STREQ(val_type, ret);
ov_free(ret);

const char* key_type = ov_property_key_hint_max_threads_per_stream;
const char* val_type = "PER_PLATFORM";
OV_EXPECT_OK(ov_core_set_property(core, device_name.c_str(), key_type, val_type));
ret = nullptr;
OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
EXPECT_STREQ(val_type, ret);
ov_free(ret);

OV_EXPECT_NOT_OK(ov_core_set_property(core, device_name.c_str(), key_type, invalid_val));
ret = nullptr;
OV_EXPECT_OK(ov_core_get_property(core, device_name.c_str(), key_type, &ret));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from openvino._pyopenvino.properties.hint import performance_mode
from openvino._pyopenvino.properties.hint import enable_cpu_pinning
from openvino._pyopenvino.properties.hint import scheduling_core_type
from openvino._pyopenvino.properties.hint import max_threads_per_stream
from openvino._pyopenvino.properties.hint import enable_hyper_threading
from openvino._pyopenvino.properties.hint import execution_mode
from openvino._pyopenvino.properties.hint import num_requests
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ void regmodule_properties(py::module m) {
wrap_property_RW(m_hint, ov::hint::performance_mode, "performance_mode");
wrap_property_RW(m_hint, ov::hint::enable_cpu_pinning, "enable_cpu_pinning");
wrap_property_RW(m_hint, ov::hint::scheduling_core_type, "scheduling_core_type");
wrap_property_RW(m_hint, ov::hint::max_threads_per_stream, "max_threads_per_stream");
wrap_property_RW(m_hint, ov::hint::enable_hyper_threading, "enable_hyper_threading");
wrap_property_RW(m_hint, ov::hint::execution_mode, "execution_mode");
wrap_property_RW(m_hint, ov::hint::num_requests, "num_requests");
Expand Down
6 changes: 5 additions & 1 deletion src/bindings/python/tests/test_runtime/test_properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,11 @@ def test_properties_ro(ov_property_ro, expected_value):
"SCHEDULING_CORE_TYPE",
((hints.SchedulingCoreType.PCORE_ONLY, hints.SchedulingCoreType.PCORE_ONLY),),
),
(
hints.max_threads_per_stream,
"MAX_THREADS_PER_STREAM",
((hints.MaxThreadsPerStream.PER_PLATFORM, hints.MaxThreadsPerStream.PER_PLATFORM),),
),
(
hints.enable_hyper_threading,
"ENABLE_HYPER_THREADING",
Expand Down Expand Up @@ -541,7 +546,6 @@ def test_single_property_setting(device):
props.affinity: "NONE",
"INFERENCE_PRECISION_HINT": Type.f32,
hints.performance_mode: hints.PerformanceMode.LATENCY,
hints.scheduling_core_type: hints.SchedulingCoreType.PCORE_ONLY,
hints.num_requests: 12,
"NUM_STREAMS": streams.Num(5),
"ENABLE_MMAP": "NO",
Expand Down
51 changes: 51 additions & 0 deletions src/inference/include/openvino/runtime/properties.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,42 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
}
/** @endcond */

enum class MaxThreadsPerStream {
AUTO, //!< Using all threads per platform for one stream. Will create sub stream on dual socket platform.
PER_PLATFORM, //!< Using all threads per platform for one stream even on dual socket platform.
PER_SOCKET, //!< Using all threads per socket for one stream on dual socket platform.
};

/** @cond INTERNAL */
inline std::ostream& operator<<(std::ostream& os, const MaxThreadsPerStream& stream_mode) {
switch (stream_mode) {
case MaxThreadsPerStream::AUTO:
return os << "AUTO";
case MaxThreadsPerStream::PER_PLATFORM:
return os << "PER_PLATFORM";
case MaxThreadsPerStream::PER_SOCKET:
return os << "PER_SOCKET";
default:
OPENVINO_THROW("Unsupported mode!");
}
}

inline std::istream& operator>>(std::istream& is, MaxThreadsPerStream& stream_mode) {
std::string str;
is >> str;
if (str == "AUTO") {
stream_mode = MaxThreadsPerStream::AUTO;
} else if (str == "PER_PLATFORM") {
stream_mode = MaxThreadsPerStream::PER_PLATFORM;
} else if (str == "PER_SOCKET") {
stream_mode = MaxThreadsPerStream::PER_SOCKET;
} else {
OPENVINO_THROW("Unsupported mode: ", str);
}
return is;
}
/** @endcond */

/**
* @brief This property defines CPU core type which can be used during inference.
* @ingroup ov_runtime_cpp_prop_api
Expand All @@ -399,6 +435,21 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type)
*/
static constexpr Property<SchedulingCoreType> scheduling_core_type{"SCHEDULING_CORE_TYPE"};

/**
* @brief This property defines max threads per stream used for CPU inference.
* @ingroup ov_runtime_cpp_prop_api
*
* Developer can use this property to select max threads per stream for CPU inference. Please refer MaxThreadsPerStream
* for all definition of types.
*
* The following code is an example to only use all threads per socket for one stream on dual sockets platform.
*
* @code
* ie.set_property(ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET));
* @endcode
*/
static constexpr Property<SchedulingCoreType> max_threads_per_stream{"MAX_THREADS_PER_STREAM"};

/**
* @brief This property allows CPU pinning during inference.
* @ingroup ov_runtime_cpp_prop_api
Expand Down
6 changes: 5 additions & 1 deletion src/plugins/intel_cpu/src/compiled_model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -193,6 +193,7 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
RO_property(ov::hint::num_requests.name()),
RO_property(ov::hint::enable_cpu_pinning.name()),
RO_property(ov::hint::scheduling_core_type.name()),
RO_property(ov::hint::max_threads_per_stream.name()),
RO_property(ov::hint::enable_hyper_threading.name()),
RO_property(ov::execution_devices.name()),
RO_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down Expand Up @@ -246,7 +247,10 @@ ov::Any CompiledModel::get_property(const std::string& name) const {
const bool use_pin = config.enableCpuPinning;
return decltype(ov::hint::enable_cpu_pinning)::value_type(use_pin);
} else if (name == ov::hint::scheduling_core_type) {
const auto core_type = config.schedulingCoreType;
const auto stream_mode = config.schedulingCoreType;
return stream_mode;
} else if (name == ov::hint::max_threads_per_stream) {
const auto core_type = config.maxThreadsPerStream;
return core_type;
} else if (name == ov::hint::enable_hyper_threading.name()) {
const bool use_ht = config.enableHyperThreading;
Expand Down
15 changes: 15 additions & 0 deletions src/plugins/intel_cpu/src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,21 @@ void Config::readProperties(const ov::AnyMap& prop, const ModelType modelType) {
'/',
ov::hint::SchedulingCoreType::ECORE_ONLY);
}
} else if (key == ov::hint::max_threads_per_stream.name()) {
try {
maxThreadsPerStream = val.as<ov::hint::MaxThreadsPerStream>();
} catch (ov::Exception&) {
OPENVINO_THROW("Wrong value ",
val.as<std::string>(),
"for property key ",
ov::hint::max_threads_per_stream.name(),
". Expected only ",
ov::hint::MaxThreadsPerStream::AUTO,
'/',
ov::hint::MaxThreadsPerStream::PER_PLATFORM,
'/',
ov::hint::MaxThreadsPerStream::PER_SOCKET);
}
} else if (key == ov::hint::enable_hyper_threading.name()) {
try {
enableHyperThreading = val.as<bool>();
Expand Down
1 change: 1 addition & 0 deletions src/plugins/intel_cpu/src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ struct Config {
bool enableCpuPinning = true;
bool changedCpuPinning = false;
ov::hint::SchedulingCoreType schedulingCoreType = ov::hint::SchedulingCoreType::ANY_CORE;
ov::hint::MaxThreadsPerStream maxThreadsPerStream = ov::hint::MaxThreadsPerStream::AUTO;
bool enableHyperThreading = true;
bool changedHyperThreading = false;
Config::LatencyThreadingMode latencyThreadingMode = Config::LatencyThreadingMode::PER_SOCKET;
Expand Down
4 changes: 4 additions & 0 deletions src/plugins/intel_cpu/src/plugin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,9 @@ ov::Any Engine::get_property(const std::string& name, const ov::AnyMap& options)
} else if (name == ov::hint::scheduling_core_type) {
const auto core_type = engConfig.schedulingCoreType;
return core_type;
} else if (name == ov::hint::max_threads_per_stream) {
const auto stream_mode = engConfig.maxThreadsPerStream;
return stream_mode;
} else if (name == ov::hint::enable_hyper_threading) {
const bool ht_value = engConfig.enableHyperThreading;
return decltype(ov::hint::enable_hyper_threading)::value_type(ht_value);
Expand Down Expand Up @@ -479,6 +482,7 @@ ov::Any Engine::get_ro_property(const std::string& name, const ov::AnyMap& optio
RW_property(ov::hint::num_requests.name()),
RW_property(ov::hint::enable_cpu_pinning.name()),
RW_property(ov::hint::scheduling_core_type.name()),
RW_property(ov::hint::max_threads_per_stream.name()),
RW_property(ov::hint::enable_hyper_threading.name()),
RW_property(ov::device::id.name()),
RW_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,18 @@ const std::vector<ov::AnyMap> testing_property_for_scheduling_core_type_3 = {
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ANY_CORE)},
{ov::hint::scheduling_core_type(ov::hint::SchedulingCoreType::ECORE_ONLY)}};

const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_1 = {
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)}};

const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream_2 = {
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_PLATFORM)},
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};

const std::vector<ov::AnyMap> testing_property_for_max_threads_per_stream = {
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::AUTO)},
{ov::hint::max_threads_per_stream(ov::hint::MaxThreadsPerStream::PER_SOCKET)}};

const std::vector<ov::AnyMap> testing_property_for_enable_hyper_threading = {{ov::hint::enable_hyper_threading(true)},
{ov::hint::enable_hyper_threading(false)}};

Expand All @@ -131,6 +143,9 @@ INSTANTIATE_TEST_SUITE_P(smoke_ExportImportTest,
testing_property_for_scheduling_core_type_1,
testing_property_for_scheduling_core_type_2,
testing_property_for_scheduling_core_type_3,
testing_property_for_max_threads_per_stream_1,
testing_property_for_max_threads_per_stream_2,
testing_property_for_max_threads_per_stream_3,
testing_property_for_enable_hyper_threading,
testing_property_for_enable_cpu_pinning)));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ TEST_F(OVClassConfigTestCPU, smoke_CpuExecNetworkSupportedPropertiesAreAvailable
RO_property(ov::hint::num_requests.name()),
RO_property(ov::hint::enable_cpu_pinning.name()),
RO_property(ov::hint::scheduling_core_type.name()),
RO_property(ov::hint::max_threads_per_stream.name()),
RO_property(ov::hint::enable_hyper_threading.name()),
RO_property(ov::execution_devices.name()),
RO_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ TEST_F(OVClassConfigTestCPU, smoke_PluginAllSupportedPropertiesAreAvailable) {
RW_property(ov::hint::num_requests.name()),
RW_property(ov::hint::enable_cpu_pinning.name()),
RW_property(ov::hint::scheduling_core_type.name()),
RW_property(ov::hint::max_threads_per_stream.name()),
RW_property(ov::hint::enable_hyper_threading.name()),
RW_property(ov::device::id.name()),
RW_property(ov::intel_cpu::denormals_optimization.name()),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,15 @@ std::vector<ov::AnyMap> OVPropertiesTestsWithCompileModelProps::getRWOptionalPro
}
}

if (props.empty() || std::find(props.begin(), props.end(), ov::hint::max_threads_per_stream.name()) != props.end()) {
ov::hint::SchedulingCoreType maxThreadsPerStreams[] = {ov::hint::MaxThreadsPerStream::AUTO,
ov::hint::MaxThreadsPerStream::PER_PLATFORM,
ov::hint::MaxThreadsPerStream::PER_SOCKET};
for (auto& maxThreadsPerStream : maxThreadsPerStreams) {
res.push_back({ov::hint::max_threads_per_stream(maxThreadsPerStream)});
}
}

if (props.empty() || std::find(props.begin(), props.end(), ov::enable_mmap.name()) != props.end()) {
res.push_back({ov::enable_mmap(true)});
res.push_back({ov::enable_mmap(false)});
Expand Down
Loading