diff --git a/src/inference/include/openvino/runtime/properties.hpp b/src/inference/include/openvino/runtime/properties.hpp index 741d0c4945920f..72ffdfc63ff8c2 100644 --- a/src/inference/include/openvino/runtime/properties.hpp +++ b/src/inference/include/openvino/runtime/properties.hpp @@ -400,10 +400,11 @@ inline std::istream& operator>>(std::istream& is, SchedulingCoreType& core_type) static constexpr Property scheduling_core_type{"SCHEDULING_CORE_TYPE"}; enum class ModelDistributionPolicy { - TENSOR_PARALLEL = 0, // Split tensor into several parts and distribute them between sockets/devices during model - // compilation. At inference time sockets/devices process tensors in parallel and do - // syncronization at the end ensuring mathematical correctness. - PIPELINE_PARALLEL = 1, // Split subgraph into parts and run one part per device in parallel. + TENSOR_PARALLEL = 0, // Distribute tensor to multiple sockets/devices during model compilation. At inference + // time, sockets/devices process individual tensor in parallel. + PIPELINE_PARALLEL = 1, // Distribute tensor to multiple sockets/devices during model compilation. At inference + // time, sockets/devices process individual tensor one by one. And each socket/device + // processes a portion of a different tensor in parallel. }; /** @cond INTERNAL */ @@ -435,29 +436,20 @@ inline std::istream& operator>>(std::istream& is, ModelDistributionPolicy& strea /** * @brief This property defines model distribution policy for inference with multiple sockets/devices. * @ingroup ov_runtime_cpp_prop_api - * * This property can be used to select model distribution policy between execution units (e.g. between CPU sockets/NUMA * nodes or between different GPUs). - * -- TENSOR_PARALLEL : Split tensor into several parts and distribute them between sockets/devices during model - * compilation. At inference time sockets/devices process tensors in parallel and do - * syncronization at the end ensuring mathematical correctness. - * -- PIPELINE_PARALLEL : Split tensor into several parts and disribute them between sockets/devices during model - * compilation. At inference time sockets/devices process single tensor one by one. And each - * sockets/devices syncronization at the end ensuring mathematical correctness. - * compilation. At inference time sockets/devices process tensors in parallel and do syncronization - * at the end ensuring mathematical correctness. - * -- PIPELINE_PARALLEL: Split subgraph into parts and run one part per device in parallel. + * -- TENSOR_PARALLEL : Distribute tensor to multiple sockets/devices during model compilation. At inference time, + * sockets/devices process individual tensor in parallel. + * -- PIPELINE_PARALLEL : Distribute tensor to multiple sockets/devices during model compilation. At inference time, + * sockets/devices process individual tensor one by one. And each socket/device processes a + * portion of a different tensor in parallel. * - * The following code is an example how TENSOR_PARALLEL or PIPELINE_PARALLEL model disrtibution policy might be enabled. + * The following code is an example how TENSOR_PARALLEL or PIPELINE_PARALLEL model distribution policy might be enabled. * * @code * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})); * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::PIPELINE_PARALLEL})); * @endcode - * - * @code - * ie.set_property(ov::hint::model_distribution_policy({ov::hint::ModelDistributionPolicy::TENSOR_PARALLEL})); - * @endcode */ static constexpr Property> model_distribution_policy{"MODEL_DISTRIBUTION_POLICY"};