Merge branch 'master' into itikhono/bug_fix/dq_markup

itikhono · Feb 25, 2025 · 3468ff5 · 3468ff5
2 parents c709d6d + 569ebea
commit 3468ff5
Show file tree

Hide file tree

Showing 63 changed files with 885 additions and 440 deletions.
diff --git a/.github/actions/common/artifact_utils.py b/.github/actions/common/artifact_utils.py
@@ -18,10 +18,10 @@ def add_common_args(parser: argparse.ArgumentParser):
                         default=os.getenv('ARTIFACTS_SHARE'))
     group = parser.add_mutually_exclusive_group(required=True)
     group.add_argument('-d', '--storage_dir', help='Subdirectory name for artifacts, same as product type',
-                       choices=[platform_key.value for platform_key in ProductType])
-    group.add_argument('-p', '--platform', type=str,
+                       choices=[product_type.value for product_type in ProductType], type=str.lower)
+    group.add_argument('-p', '--platform', type=str.lower,
                        help='Platform for which to restore artifacts. Used if storage_dir is not set',
-                       choices=[product_type.value for product_type in PlatformKey])
+                       choices=[platform_key.value for platform_key in PlatformKey])
 
 
 def get_event_type(event_name: str = os.getenv('GITHUB_EVENT_NAME')) -> str:

diff --git a/.github/actions/common/constants.py b/.github/actions/common/constants.py
@@ -14,10 +14,10 @@ class EventType(Enum):
     'public_linux_ubuntu_22_04_x86_64_release',
     'public_linux_ubuntu_22_04_dpcpp_x86_64_release',
     'public_linux_ubuntu_24_04_x86_64_release',
-    'public_windows_vs2019_Release',
-    'public_windows_vs2019_Debug',
-    'public_windows_vs2022_Release',
-    'public_windows_vs2022_Debug',
+    'public_windows_vs2019_release',
+    'public_windows_vs2019_debug',
+    'public_windows_vs2022_release',
+    'public_windows_vs2022_debug',
     'public_manylinux2014_x86_64_release',
 )
 ProductType = Enum('ProductType', {t.upper(): t for t in productTypes})

diff --git a/.github/actions/store_artifacts/store_artifacts.py b/.github/actions/store_artifacts/store_artifacts.py
@@ -4,6 +4,7 @@
 from __future__ import annotations
 
 import argparse
+import hashlib
 import logging
 import os
 import sys
@@ -62,6 +63,33 @@ def rotate_dir(directory: Path) -> bool:
     return True
 
 
+def generate_sha256sum(file_path: str | Path) -> str:
+    """
+    Generates the SHA-256 checksum for the given file.
+
+    :param file_path: Path to the file
+    :return: SHA-256 checksum as a hexadecimal string
+    """
+    sha256 = hashlib.sha256()
+    with open(file_path, 'rb') as file:
+        for chunk in iter(lambda: file.read(4096), b''):
+            sha256.update(chunk)
+    return sha256.hexdigest()
+
+
+def store_checksums(artifact_path: Path) -> None:
+    """
+    Generates SHA-256 checksums for a given artifact and stores them in separate files.
+
+    :param artifact_path: Path to either a single artifact file or the directory with files to generate checksums for
+    """
+    files = [path for path in artifact_path.rglob('*') if path.is_file] if artifact_path.is_dir() else [artifact_path]
+    for file in files:
+        hashsum_filepath = file.with_suffix('.sha256')
+        with open(hashsum_filepath, 'w') as hashsum_file:
+            hashsum_file.write(generate_sha256sum(file))
+
+
 def main():
     action_utils.init_logger()
     logger = logging.getLogger(__name__)
@@ -79,6 +107,14 @@ def main():
     error_found = False
     for artifact in args.artifacts.split():
         artifact_path = Path(artifact)
+
+        logger.debug(f"Calculating checksum for {artifact_path}")
+        try:
+            store_checksums(artifact_path)
+        except Exception as e:
+            logger.error(f'Failed to calculate checksum for {artifact}: {e}')
+            error_found = True
+
         logger.debug(f"Copying {artifact_path} to {storage / artifact_path.name}")
         try:
             with preserve_stats_context():
@@ -87,6 +123,9 @@ def main():
                 else:
                     storage.mkdir(parents=True, exist_ok=True)
                     shutil.copy2(artifact_path, storage / artifact_path.name)
+                    if artifact_path.with_suffix('.sha256').exists():
+                        shutil.copy2(artifact_path.with_suffix('.sha256'),
+                                     storage / artifact_path.with_suffix('.sha256').name)
         except Exception as e:
             logger.error(f'Failed to copy {artifact}: {e}')
             error_found = True

diff --git a/.github/workflows/workflow_rerunner.yml b/.github/workflows/workflow_rerunner.yml
@@ -12,6 +12,7 @@ on:
       - Linux ARM64 (Ubuntu 20.04, Python 3.11)
       - Linux Static CC (Ubuntu 22.04, Python 3.11, Clang)
       - Linux RISC-V with Conan (Ubuntu 22.04, Python 3.10)
+      - Linux (Ubuntu 22.04, Python 3.11, Intel DPC++ Compiler)
       - Fedora 29 (RHEL 8.4), Python 3.9
       - Windows (VS 2022, Python 3.11, Release)
       - Windows (VS 2022, Python 3.11, Debug)

diff --git a/cmake/developer_package/add_target_helpers.cmake b/cmake/developer_package/add_target_helpers.cmake
@@ -91,7 +91,7 @@ function(ov_add_target)
     # defining a target
     if (ARG_TYPE STREQUAL EXECUTABLE)
         add_executable(${ARG_NAME} ${all_sources})
-    elseif(ARG_TYPE STREQUAL STATIC OR ARG_TYPE STREQUAL SHARED)
+    elseif(ARG_TYPE STREQUAL STATIC OR ARG_TYPE STREQUAL SHARED OR ARG_TYPE STREQUAL OBJECT)
         add_library(${ARG_NAME} ${ARG_TYPE} ${all_sources})
     else()
         message(SEND_ERROR "Invalid target type ${ARG_TYPE} specified for target name ${ARG_NAME}")

diff --git a/docs/dev/index.md b/docs/dev/index.md
@@ -110,7 +110,7 @@ OpenVINO Components include:
     * [bindings](../../src/bindings) - contains all available OpenVINO bindings which are maintained by the OpenVINO team.
         * [c](../../src/bindings/c) - C API for OpenVINO™ Runtime
         * [python](../../src/bindings/python) - Python API for OpenVINO™ Runtime
-* [Plugins](../../src/plugins) - contains OpenVINO plugins which are maintained in open-source by the OpenVINO team. For more information, take a look at the [list of supported devices](https://docs.openvino.ai/2025/about-openvino/compatibility-and-support/supported-devices.html).
+* [Plugins](../../src/plugins) - contains OpenVINO plugins which are maintained in open-source by the OpenVINO team. For more information, take a look at the [list of supported devices](https://docs.openvino.ai/2025/documentation/compatibility-and-support/supported-devices.html).
 * [Frontends](../../src/frontends) - contains available OpenVINO frontends that allow reading models from the native framework format.
 * [OpenVINO Model Converter (OVC)](https://docs.openvino.ai/2025/openvino-workflow/model-preparation.html) - is a cross-platform command-line tool that facilitates the transition between training and deployment environments, and adjusts deep learning models for optimal execution on end-point target devices.
 * [Samples](https://github.com/openvinotoolkit/openvino/tree/master/samples) - applications in C, C++ and Python languages that show basic OpenVINO use cases.

diff --git a/src/common/transformations/include/transformations/utils/utils.hpp b/src/common/transformations/include/transformations/utils/utils.hpp
@@ -193,6 +193,8 @@ TRANSFORMATIONS_API bool constantIsEqualTo(const std::shared_ptr<ov::op::v0::Con
 
 TRANSFORMATIONS_API bool has_f16_constants(const std::shared_ptr<const ov::Model>& function);
 
+TRANSFORMATIONS_API bool is_large_language_model(const ov::Model& model);
+
 /**
  * \brief Check if 'other_shape' can be broadcasted to 'ref_shape'
  *

diff --git a/src/common/transformations/src/transformations/utils/utils.cpp b/src/common/transformations/src/transformations/utils/utils.cpp
@@ -12,11 +12,15 @@
 #include "openvino/core/validation_util.hpp"
 #include "openvino/op/add.hpp"
 #include "openvino/op/broadcast.hpp"
+#include "openvino/op/concat.hpp"
 #include "openvino/op/constant.hpp"
+#include "openvino/op/convert.hpp"
 #include "openvino/op/divide.hpp"
 #include "openvino/op/gather.hpp"
 #include "openvino/op/multiply.hpp"
+#include "openvino/op/paged_attention.hpp"
 #include "openvino/op/parameter.hpp"
+#include "openvino/op/read_value.hpp"
 #include "openvino/op/relu.hpp"
 #include "openvino/op/reshape.hpp"
 #include "openvino/op/shape_of.hpp"
@@ -25,6 +29,9 @@
 #include "openvino/op/tanh.hpp"
 #include "openvino/op/util/multi_subgraph_base.hpp"
 #include "openvino/op/util/shape_of_base.hpp"
+#include "openvino/pass/pattern/op/optional.hpp"
+#include "openvino/pass/pattern/op/or.hpp"
+#include "openvino/pass/pattern/op/wrap_type.hpp"
 
 namespace ov {
 namespace op {
@@ -133,6 +140,28 @@ bool has_f16_constants(const std::shared_ptr<const ov::Model>& function) {
     return false;
 }
 
+bool is_large_language_model(const ov::Model& model) {
+    using namespace ov::pass::pattern;
+
+    const auto past = wrap_type<ov::op::v6::ReadValue>();
+    const auto convert_past = ov::pass::pattern::optional<ov::op::v0::Convert>(past);
+    const auto beam_idx = wrap_type<ov::op::v0::Parameter>();
+    const auto gather_past = wrap_type<ov::op::v8::Gather>({convert_past, beam_idx, wrap_type<ov::op::v0::Constant>()});
+    const auto gather_convert = ov::pass::pattern::optional<ov::op::v0::Convert>(gather_past);
+    const auto concat_past_input =
+        std::make_shared<ov::pass::pattern::op::Or>(OutputVector{convert_past, gather_convert});
+    const auto concat = wrap_type<ov::op::v0::Concat>({concat_past_input, any_input()});
+    const auto convert_present = ov::pass::pattern::optional<ov::op::v0::Convert>(concat);
+    const auto present = wrap_type<ov::op::v6::Assign>({convert_present});
+    const auto kvcache_matcher = std::make_shared<ov::pass::pattern::Matcher>(present, "KVCacheMatcher");
+
+    for (const auto& op : model.get_ops()) {
+        if (kvcache_matcher->match(op->output(0)) || ov::is_type<ov::op::PagedAttentionExtension>(op))
+            return true;
+    }
+    return false;
+}
+
 bool check_for_broadcast(const ov::PartialShape& ref_shape, const ov::PartialShape& other_shape) {
     if (ref_shape.rank().is_dynamic() || other_shape.rank().is_dynamic()) {
         return false;

diff --git a/src/core/include/openvino/core/type.hpp b/src/core/include/openvino/core/type.hpp
@@ -101,6 +101,12 @@ is_type(Value value) {
     return value && value->get_type_info().is_castable(Type::get_type_info_static());
 }
 
+/// \brief Tests if value is a pointer/shared_ptr that can be statically cast to any of the specified types
+template <typename Type, typename... Types, typename Value>
+bool is_type_any_of(Value value) {
+    return is_type<Type>(value) || (is_type_any_of<Types>(value) || ...);
+}
+
 /// Casts a Value* to a Type* if it is of type Type, nullptr otherwise
 template <typename Type, typename Value>
 typename std::enable_if<std::is_convertible<decltype(static_cast<Type*>(std::declval<Value>())), Type*>::value,

diff --git a/src/plugins/intel_cpu/src/cpu_memory.cpp b/src/plugins/intel_cpu/src/cpu_memory.cpp
@@ -9,6 +9,7 @@
 #include "memory_desc/cpu_memory_desc_utils.h"
 #include "nodes/common/cpu_memcpy.h"
 #include "nodes/reorder.h"
+#include "utils/bfloat16.hpp"
 #include "utils/debug_capabilities.h"
 #if defined(__linux__)
 #    include <sys/syscall.h> /* Definition of SYS_* constants */
@@ -30,19 +31,44 @@ BlockedMemoryDescPtr IMemory::getDescWithType<BlockedMemoryDesc, 0, 0>() const {
 }
 
 namespace {
-inline void setSubnormalsToZero(float* data, size_t size) {
+inline void setSubnormalsToZeroAndbf16Saturation(float* data, size_t size, bool ftz, bool bf16saturation) {
     uint32_t* u32data = reinterpret_cast<uint32_t*>(data);
-    for (size_t i = 0; i < size; ++i) {
-        if ((u32data[i] & (0xFF << 23)) == 0) {
-            u32data[i] = 0;
+    float* floatdata = reinterpret_cast<float*>(data);
+    if (ftz && bf16saturation) {
+        for (size_t i = 0; i < size; ++i) {
+            if ((u32data[i] & (0xFF << 23)) == 0) {
+                u32data[i] = 0;
+            } else if (!std::isnan(floatdata[i]) && !std::isinf(floatdata[i])) {
+                floatdata[i] = (floatdata[i] < static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest()))
+                                   ? static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest())
+                               : (floatdata[i] > static_cast<float>(std::numeric_limits<ov::bfloat16>::max()))
+                                   ? static_cast<float>(std::numeric_limits<ov::bfloat16>::max())
+                                   : floatdata[i];
+            }
+        }
+    } else if (ftz) {
+        for (size_t i = 0; i < size; ++i) {
+            if ((u32data[i] & (0xFF << 23)) == 0) {
+                u32data[i] = 0;
+            }
+        }
+    } else if (bf16saturation) {
+        for (size_t i = 0; i < size; ++i) {
+            if (!std::isnan(floatdata[i]) && !std::isinf(floatdata[i])) {
+                floatdata[i] = (floatdata[i] < static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest()))
+                                   ? static_cast<float>(std::numeric_limits<ov::bfloat16>::lowest())
+                               : (floatdata[i] > static_cast<float>(std::numeric_limits<ov::bfloat16>::max()))
+                                   ? static_cast<float>(std::numeric_limits<ov::bfloat16>::max())
+                                   : floatdata[i];
+            }
         }
     }
 }
 
-void transferData(const IMemory& src, const IMemory& dst, bool ftz) {
+void transferData(const IMemory& src, const IMemory& dst, bool ftz, bool bf16saturation) {
     node::Reorder::reorderData(src, dst);
 
-    if (!ftz) {
+    if (!ftz && !bf16saturation) {
         return;
     }
     if (src.getDesc().getPrecision() != ov::element::f32 || dst.getDesc().getPrecision() != ov::element::f32) {
@@ -62,7 +88,7 @@ void transferData(const IMemory& src, const IMemory& dst, bool ftz) {
     // actual FTZ
     auto* memData = static_cast<float*>(dst.getData());
     memData += offset;
-    setSubnormalsToZero(memData, dst.getSize() / sizeof(float));
+    setSubnormalsToZeroAndbf16Saturation(memData, dst.getSize() / sizeof(float), ftz, bf16saturation);
 }
 
 }  // namespace
@@ -125,11 +151,11 @@ void Memory::create(MemoryDescPtr desc, const void* data, bool pads_zeroing) {
     }
 }
 
-void Memory::load(const IMemory& src, bool ftz) const {
+void Memory::load(const IMemory& src, bool ftz, bool bf16saturation) const {
     if (src.getDesc().getPrecision() == element::string) {
         OPENVINO_THROW("[CPU] Memory object cannot load string data.");
     }
-    transferData(src, *this, ftz);
+    transferData(src, *this, ftz, bf16saturation);
 }
 
 void Memory::nullify() {
@@ -273,12 +299,12 @@ StringMemory::StringMemory(dnnl::engine engine, MemoryDescPtr desc, const void*
     }
 }
 
-void StringMemory::load(const IMemory& src, bool ftz) const {
+void StringMemory::load(const IMemory& src, bool ftz, bool bf16saturation) const {
     if (src.getDesc().getPrecision() != element::string) {
         OPENVINO_THROW("[CPU] String memory cannot load a non-string object.");
     }
 
-    transferData(src, *this, false);
+    transferData(src, *this, false, false);
 }
 
 void* StringMemory::getData() const {
@@ -472,11 +498,11 @@ void StaticMemory::redefineDesc(MemoryDescPtr desc) {
     OPENVINO_THROW("Unexpected: Memory descriptor may not be modified in StaticMemory object");
 }
 
-void StaticMemory::load(const IMemory& src, bool ftz) const {
+void StaticMemory::load(const IMemory& src, bool ftz, bool bf16saturation) const {
     if (src.getDesc().getPrecision() == element::string) {
         OPENVINO_THROW("[CPU] StaticMemory cannot load string data.");
     }
-    transferData(src, *this, ftz);
+    transferData(src, *this, ftz, bf16saturation);
 }
 
 MemoryBlockPtr StaticMemory::getMemoryBlock() const {

diff --git a/src/plugins/intel_cpu/src/cpu_memory.h b/src/plugins/intel_cpu/src/cpu_memory.h
@@ -188,7 +188,7 @@ class IMemory {
     // Caution!!! This action invalidates the previous data layout. The old data may become unreachable.
     virtual void redefineDesc(MemoryDescPtr desc) = 0;
 
-    virtual void load(const IMemory& src, bool ftz) const = 0;
+    virtual void load(const IMemory& src, bool ftz, bool bf16saturation) const = 0;
 
     virtual MemoryBlockPtr getMemoryBlock() const = 0;
 
@@ -260,7 +260,7 @@ class StaticMemory final : public IMemory {
     // Always throws since a static memory descriptor should not be modified
     void redefineDesc(MemoryDescPtr desc) override;
 
-    void load(const IMemory& src, bool ftz) const override;
+    void load(const IMemory& src, bool ftz, bool bf16saturation) const override;
 
     MemoryBlockPtr getMemoryBlock() const override;
 
@@ -315,7 +315,7 @@ class Memory : public IMemory {
 
     void redefineDesc(MemoryDescPtr desc) override;
 
-    void load(const IMemory& src, bool ftz) const override;
+    void load(const IMemory& src, bool ftz, bool bf16saturation) const override;
     void nullify() override;
 
     dnnl::engine getEngine() const {
@@ -421,7 +421,7 @@ class StringMemory : public IMemory {
 
     void redefineDesc(MemoryDescPtr desc) override;
 
-    void load(const IMemory& src, bool ftz) const override;
+    void load(const IMemory& src, bool ftz, bool bf16saturation) const override;
 
     MemoryBlockPtr getMemoryBlock() const override;
 

diff --git a/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp b/src/plugins/intel_cpu/src/dnnl_postops_composer.cpp
@@ -659,7 +659,7 @@ static MemoryPtr prepackDecompressionParams(const MemoryCPtr& paramsPtr,
         srcFormat);
     auto srcMem = std::make_shared<Memory>(engine, srcMemoryDesc, paramsPtr->getData());
 
-    dstMem->load(*srcMem, true);
+    dstMem->load(*srcMem, true, false);
     return dstMem;
 }
 

diff --git a/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp b/src/plugins/intel_cpu/src/emitters/plugin/x64/jit_bf16_emitters.hpp
@@ -17,7 +17,12 @@ class jit_uni_vcvtneps2bf16 : public jit_emitter {
                           conversion_mode mode = conversion_mode::default_mode)
         : jit_emitter(host, host_isa, exec_prc),
           mode_(mode) {
-        prepare_table();
+        // only saturation_mode or non avx512_core_bf16/avx2_vnni_2 platforms requires table
+        if ((!dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx512_core_bf16) &&
+             !dnnl::impl::cpu::x64::mayiuse(dnnl::impl::cpu::x64::avx2_vnni_2)) ||
+            mode_ == conversion_mode::saturation_mode) {
+            prepare_table();
+        }
     }
 
     size_t get_inputs_num() const override {

diff --git a/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/aarch64/jit_kernel_emitter.cpp
@@ -172,7 +172,7 @@ void jit_kernel_emitter::emit_impl(const std::vector<size_t>& in, const std::vec
         auto expected_out_type = snippets::RegType::undefined;
         const auto& node = expression->get_node();
         // Note: currently only a few operations are allowed to have mixed in/out register types => skip validation here
-        if (!ov::is_type<snippets::op::LoopEnd>(node) && !ov::is_type<snippets::op::RegSpillBase>(node) &&
+        if (!ov::is_type_any_of<snippets::op::LoopEnd, snippets::op::RegSpillBase>(node) &&
             !std::dynamic_pointer_cast<jit_nop_emitter>(emitter)) {
             std::tie(expected_in_type, expected_out_type) = get_expected_reg_types(emitter);
         }

diff --git a/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp b/src/plugins/intel_cpu/src/emitters/snippets/x64/jit_kernel_emitter.cpp
@@ -125,7 +125,7 @@ void jit_kernel_emitter::emit_impl(const std::vector<size_t>& in, const std::vec
         const auto& node = expression->get_node();
         // Note: A few operations are allowed to have mixed register types on their inputs (or outputs) => skip
         // validation here
-        if (!ov::is_type<snippets::op::LoopEnd>(node) && !ov::is_type<snippets::op::RegSpillBase>(node) &&
+        if (!ov::is_type_any_of<snippets::op::LoopEnd, snippets::op::RegSpillBase>(node) &&
             !std::dynamic_pointer_cast<jit_nop_emitter>(emitter)) {
             std::tie(expected_in_type, expected_out_type) = get_expected_reg_types(emitter);
         }
-Original file line number
+Diff line change
@@ Expand Up @@
     TRANSFORMATIONS_API bool has_f16_constants(const std::shared_ptr<const ov::Model>& function);
+    TRANSFORMATIONS_API bool is_large_language_model(const ov::Model& model);
     /**
      * \brief Check if 'other_shape' can be broadcasted to 'ref_shape'
      *
@@ Expand Down @@