Merge branch 'master' into itikhono/bug_fix/moc_in_prepostprocessing

itikhono · Feb 19, 2025 · 84adf5d · 84adf5d
2 parents 6c54bcf + 6d19e8b
commit 84adf5d
Show file tree

Hide file tree

Showing 264 changed files with 20,443 additions and 2,312 deletions.
diff --git a/cmake/developer_package/plugins/plugins.cmake b/cmake/developer_package/plugins/plugins.cmake
@@ -109,7 +109,7 @@ function(ov_add_plugin)
         if (OV_PLUGIN_ADD_CLANG_TIDY)
             if (ENABLE_CLANG_TIDY)
                 set_target_properties(${OV_PLUGIN_NAME} PROPERTIES
-                    CXX_CLANG_TIDY "clang-tidy-${CLANG_TIDY_REQUIRED_VERSION};--extra-arg=-Wno-unused-command-line-argument")
+                    CXX_CLANG_TIDY "${CLANG_TIDY};--extra-arg=-Wno-unused-command-line-argument")
             endif()
         endif()
 

diff --git a/...entation/openvino-ir-format/operation-sets/operation-specs/signals/istft-16.rst b/...entation/openvino-ir-format/operation-sets/operation-specs/signals/istft-16.rst
@@ -0,0 +1,217 @@
+.. {#openvino_docs_ops_signals_ISTFT_16}
+
+Inverse Short Time Fourier Transformation (ISTFT)
+=================================================
+
+.. meta::
+  :description: Learn about ISTFT-16 - a signal processing operation
+
+**Versioned name**: *ISTFT-16*
+
+**Category**: *Signal processing*
+
+**Short description**: *ISTFT* operation performs Inverse Short-Time Fourier Transform (complex-to-real).
+
+**Detailed description**: *ISTFT* performs Inverse Short-Time Fourier Transform of complex-valued input tensor 
+of shape ``[fft_results, frames, 2]`` or ``[batch, fft_results, frames, 2]``, where:
+
+  * ``batch`` is a batch size dimension
+  * ``frames`` is a number of frames calculated as ``((signal_length - frame_size) / frame_step) + 1`` of the original signal if not centered, or ``(signal_length / frame_step) + 1`` otherwise.
+  * ``fft_results`` is a number calculated as ``(frame_size / 2) + 1`` of the original signal
+  * ``2`` is the last dimension for complex value represented by floating-point values pair (real and imaginary part accordingly)
+
+The output is a restored real-valued signal in a discrete time domain. The shape of the output is 1D ``[signal_length]`` or 2D ``[batch, signal_length]``.
+If the ``signal_length`` is not provided as an input value, it is calculated according to the following rules:
+
+  * ``default_signal_length = (frames - 1) * frame_step`` for ``center == true`` 
+  * ``default_signal_length = (frames - 1) * frame_step + frame_size`` for ``center == false`` 
+
+If the ``signal_length`` input is provided, the number of output values will be adjusted accordingly. 
+  * If ``signal_length > default_signal_length`` the output is padded with zeros at the end.
+  * If ``signal_length < default_signal_length`` any additional generated samples are cut to the ``signal_length`` size.
+
+The ``window_length`` can not be larger than ``frame_size``, but if smaller the window values will be padded with zeros on the left and right side. The size of the left padding is calculated as ``(frame_size - window_length) // 2``, then right padding size is filled to match the ``frame_size``.  
+
+**Attributes**:
+
+* *center*
+
+  * **Description**: Flag that indicates whether padding has been applied to the original signal. It affects output shape, if the ``signal_length`` input is not provided.
+  * **Range of values**:
+
+    * ``false`` - padding has not been applied, default signal length is calculated as ``(frames - 1) * frame_step + frame_size``
+    * ``true`` - padding has been applied, default signal length is calculated as ``(frames - 1) * frame_step``
+  * **Type**: ``boolean``
+  * **Required**: *yes*
+
+* *normalized*
+
+  * **Description**: Flag that indicates whether the input has been normalized. It is needed to correctly restore the signal and denormalize the output. Output of the STFT is divided by ``sqrt(frame_size)``, when normalized.
+  * **Range of values**:
+
+    * ``false`` - input has not been normalized
+    * ``true`` - input has been normalized
+  * **Type**: ``boolean``
+  * **Required**: *yes*
+
+
+**Inputs**
+
+* **1**: ``data`` - Tensor of type *T*, the ISTFT data input (compatible with a result of STFT operation). **Required.**
+
+  * The data input shape can be 3D ``[fft_results, frames, 2]`` or 4D ``[batch, fft_results, frames, 2]``.
+* **2**: ``window`` - Tensor of type *T* and 1D shape ``[window_length]``, specifying the window values applied to restore the signal. The ``window_length`` is required to be equal or smaller than ``frame_size``, if smaller the window will be padded with zeros on the left and right sides. **Required.**
+* **3**: ``frame_size`` - Scalar tensor of type *T_INT* describing the size of a single frame of the signal to be provided as input to FFT. **Required.**
+* **4**: ``frame_step`` - Scalar tensor of type *T_INT* describing the distance (number of samples) between successive frames. **Required.**
+* **5**: ``signal_length`` - Scalar or single element 1D tensor of type *T_INT* describing the desired length of the output signal, if not provided it's calculated accordingly to the rules presented in the detailed description above. **Optional.**
+
+
+**Outputs**
+
+* **1**: ``signal`` - Tensor of type *T* and 1D shape ``[signal_length]`` or 2D shape ``[batch, signal_length]`` with a real valued signal data. **Required.**
+
+**Types**
+
+* *T*: any supported floating-point type.
+
+* *T_INT*: ``int64`` or ``int32``.
+
+
+**Examples**:
+
+*Example 3D input, 1D output signal, center=false, default signal_length:*
+
+.. code-block:: xml
+   :force:
+
+    <layer ... type="ISTFT" ... >
+        <data center="false" ... />
+        <input>
+            <port id="0">
+                <dim>6</dim>
+                <dim>16</dim>
+                <dim>2</dim>
+            </port>
+            <port id="1">
+                <dim>7</dim>
+            </port>
+            <port id="2"></port> <!-- frame_size value: 11 -->
+            <port id="3"></port> <!-- frame_step value: 3 -->
+        </input>
+        <output>
+            <port id="4">
+                <dim>56</dim>
+            </port>
+        </output>
+    </layer>
+
+*Example 4D input, 2D output signal, center=false, default signal_length:*
+
+.. code-block:: xml
+   :force:
+
+    <layer ... type="ISTFT" ... >
+        <data center="false" ... />
+        <input>
+            <port id="0">
+                <dim>4</dim>
+                <dim>6</dim>
+                <dim>16</dim>
+                <dim>2</dim>
+            </port>
+            <port id="1">
+                <dim>7</dim>
+            </port>
+            <port id="2"></port> <!-- frame_size value: 11 -->
+            <port id="3"></port> <!-- frame_step value: 3 -->
+        </input>
+        <output>
+            <port id="4">
+                <dim>4</dim>
+                <dim>56</dim>
+            </port>
+        </output>
+    </layer>
+
+
+*Example 3D input, 1D output signal, center=true, default signal_length:*
+
+.. code-block:: xml
+   :force:
+
+    <layer ... type="ISTFT" ... >
+        <data center="true" ... />
+        <input>
+            <port id="0">
+                <dim>6</dim>
+                <dim>16</dim>
+                <dim>2</dim>
+            </port>
+            <port id="1">
+                <dim>7</dim>
+            </port>
+            <port id="2"></port> <!-- frame_size value: 11 -->
+            <port id="3"></port> <!-- frame_step value: 3 -->
+        </input>
+        <output>
+            <port id="4">
+                <dim>45</dim>
+            </port>
+        </output>
+    </layer>
+
+*Example 4D input, 2D output signal, center=true, default signal_length:*
+
+.. code-block:: xml
+   :force:
+
+    <layer ... type="ISTFT" ... >
+        <data center="true" ... />
+        <input>
+            <port id="0">
+                <dim>4</dim>
+                <dim>6</dim>
+                <dim>16</dim>
+                <dim>2</dim>
+            </port>
+            <port id="1">
+                <dim>7</dim>
+            </port>
+            <port id="2"></port> <!-- frame_size value: 11 -->
+            <port id="3"></port> <!-- frame_step value: 3 -->
+        </input>
+        <output>
+            <port id="4">
+                <dim>4</dim>
+                <dim>45</dim>
+            </port>
+        </output>
+    </layer>
+
+
+*Example 3D input, 1D output signal, center=false, signal_length input provided:*
+
+.. code-block:: xml
+   :force:
+
+    <layer ... type="ISTFT" ... >
+        <data center="false" ... />
+        <input>
+            <port id="0">
+                <dim>6</dim>
+                <dim>16</dim>
+                <dim>2</dim>
+            </port>
+            <port id="1">
+                <dim>7</dim>
+            </port>
+            <port id="2"></port> <!-- frame_size value: 11 -->
+            <port id="3"></port> <!-- frame_step value: 3 -->
+            <port id="4"></port> <!-- signal_length value: 64 -->
+        </input>
+        <output>
+            <port id="5">
+                <dim>64</dim>
+            </port>
+        </output>
+    </layer>
diff --git a/samples/cpp/benchmark_app/inputs_filling.cpp b/samples/cpp/benchmark_app/inputs_filling.cpp
@@ -610,6 +610,8 @@ ov::Tensor get_random_tensor(const std::pair<std::string, benchmark_app::InputIn
         return create_tensor_random_4bit(inputInfo.second, 0, 15);
     } else if (type == ov::element::i4) {
         return create_tensor_random_4bit(inputInfo.second, 0, 15);
+    } else if (type == ov::element::nf4) {
+        return create_tensor_random_4bit(inputInfo.second, 0, 15);
     } else if (type == ov::element::string) {
         const auto& in_info = inputInfo.second;
         const auto tensor_size = ov::shape_size(in_info.dataShape);

diff --git a/src/common/util/include/openvino/util/common_util.hpp b/src/common/util/include/openvino/util/common_util.hpp
@@ -180,5 +180,13 @@ constexpr std::array<std::conditional_t<std::is_void_v<T>, std::common_type_t<Ar
     return {std::forward<Args>(args)...};
 }
 
+#if defined(_WIN32)
+bool may_i_use_dynamic_code();
+#else
+constexpr bool may_i_use_dynamic_code() {
+    return true;
+}
+#endif
+
 }  // namespace util
 }  // namespace ov
diff --git a/src/common/util/include/openvino/util/file_util.hpp b/src/common/util/include/openvino/util/file_util.hpp
@@ -239,7 +239,11 @@ inline std::string from_file_path(const ov::util::Path& path) {
 
 // TODO: remove this function after all calls use Path
 inline FilePath to_file_path(const ov::util::Path& path) {
+#if defined(_WIN32) && defined(OPENVINO_ENABLE_UNICODE_PATH_SUPPORT)
+    return ov::util::string_to_wstring(path.string());
+#else
     return path.native();
+#endif
 }
 
 #ifdef OPENVINO_ENABLE_UNICODE_PATH_SUPPORT

diff --git a/src/common/util/src/common_util.cpp b/src/common/util/src/common_util.cpp
@@ -6,6 +6,10 @@
 
 #include <algorithm>
 
+#if defined(_WIN32)
+#    include <windows.h>
+#endif
+
 std::string ov::util::to_lower(const std::string& s) {
     std::string rc = s;
     std::transform(rc.begin(), rc.end(), rc.begin(), ::tolower);
@@ -60,3 +64,12 @@ std::string ov::util::filter_lines_by_prefix(const std::string& str, const std::
     }
     return res.str();
 }
+
+#if defined(_WIN32)
+bool ov::util::may_i_use_dynamic_code() {
+    HANDLE handle = GetCurrentProcess();
+    PROCESS_MITIGATION_DYNAMIC_CODE_POLICY dynamic_code_policy = {0};
+    GetProcessMitigationPolicy(handle, ProcessDynamicCodePolicy, &dynamic_code_policy, sizeof(dynamic_code_policy));
+    return dynamic_code_policy.ProhibitDynamicCode != TRUE;
+}
+#endif
diff --git a/src/core/include/openvino/op/util/attr_types.hpp b/src/core/include/openvino/op/util/attr_types.hpp
@@ -20,7 +20,7 @@ enum class PadMode { CONSTANT = 0, EDGE, REFLECT, SYMMETRIC };
 OPENVINO_API
 std::ostream& operator<<(std::ostream& s, const PadMode& type);
 
-/// \brief Fill modes for the `SegmentMax` operator.
+/// \brief Fill modes to set default value for operators like `SegmentMax`.
 enum class FillMode { ZERO = 0, LOWEST };
 
 OPENVINO_API

diff --git a/src/core/include/openvino/opsets/opset16_tbl.hpp b/src/core/include/openvino/opsets/opset16_tbl.hpp
@@ -16,3 +16,4 @@ _OPENVINO_OP_REG(ShapeOf, ov::op::v3)
 // New operations added in opset16
 _OPENVINO_OP_REG(Identity, ov::op::v16)
 _OPENVINO_OP_REG(ISTFT, ov::op::v16)
+_OPENVINO_OP_REG(SegmentMax, ov::op::v16)
diff --git a/src/core/reference/include/openvino/reference/segment_max.hpp b/src/core/reference/include/openvino/reference/segment_max.hpp
@@ -0,0 +1,55 @@
+// Copyright (C) 2018-2025 Intel Corporation
+// SPDX-License-Identifier: Apache-2.0
+//
+
+#pragma once
+
+#include <algorithm>
+#include <limits>
+#include <vector>
+
+#include "openvino/core/shape.hpp"
+
+namespace ov::reference {
+
+template <typename T, typename T_idx, std::enable_if_t<std::is_same<std::decay_t<T_idx>, int64_t>::value>* = nullptr>
+void segment_max(const T* data,
+                 const Shape& data_shape,
+                 const T_idx* segment_ids,
+                 T* out,
+                 const Shape& output_shape,
+                 const T empty_segment_value) {
+    const T_idx num_segments = output_shape[0];
+    const auto inner_dim_size = shape_size(data_shape.begin() + 1, data_shape.end());
+
+    // Initialize output with empty_segment_value
+    std::fill(out, out + num_segments * inner_dim_size, empty_segment_value);
+
+    // Iterate over each element in the first dimension
+    for (size_t i = 0; i < data_shape[0]; ++i) {
+        const T_idx segment_id = segment_ids[i];
+        if (segment_id >= num_segments) {
+            continue;
+        }
+        // Iterate over each element in the inner dimensions
+        for (size_t j = 0; j < inner_dim_size; ++j) {
+            const size_t index = i * inner_dim_size + j;
+            const size_t out_index = segment_id * inner_dim_size + j;
+            // Update the maximum value for the current segment and inner dimension
+            out[out_index] = std::max(out[out_index], data[index]);
+        }
+    }
+}
+
+template <typename T, typename T_idx, std::enable_if_t<!std::is_same<std::decay_t<T_idx>, int64_t>::value>* = nullptr>
+void segment_max(const T* data,
+                 const Shape& data_shape,
+                 const T_idx* segment_ids,
+                 T* out,
+                 const Shape& output_shape,
+                 const T empty_segment_value) {
+    std::vector<int64_t> segment_ids_int64(segment_ids, segment_ids + data_shape[0]);
+    segment_max(data, data_shape, segment_ids_int64.data(), out, output_shape, empty_segment_value);
+}
+
+}  // namespace ov::reference