Skip to content

Commit

Permalink
Merge branch 'master' into alvoron_fq_decomposition_arm
Browse files Browse the repository at this point in the history
  • Loading branch information
alvoron authored Feb 15, 2025
2 parents c9809be + e737014 commit 39ce30d
Show file tree
Hide file tree
Showing 41 changed files with 770 additions and 97 deletions.
14 changes: 11 additions & 3 deletions .github/workflows/code_style.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
level: warning
fail_on_error: true
fail_level: error
filter_mode: nofilter
exclude: |
"*/thirdparty/*"
"./temp/*"
clang-format-aarch64:
runs-on: ubuntu-22.04
Expand Down Expand Up @@ -71,7 +75,11 @@ jobs:
with:
github_token: ${{ secrets.GITHUB_TOKEN }}
level: warning
fail_on_error: true
fail_level: error
filter_mode: nofilter
exclude: |
"*/thirdparty/*"
"./temp/*"
ShellCheck:
runs-on: ubuntu-22.04
Expand Down Expand Up @@ -103,7 +111,7 @@ jobs:
level: style
reporter: github-pr-review
check_all_files_with_shebangs: true
fail_on_error: true
fail_level: error
exclude: |
"*/thirdparty/*"
"./temp/*"
Expand Down
55 changes: 35 additions & 20 deletions src/bindings/python/src/openvino/frontend/pytorch/fx_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ def __init__(self, pt_module, fx_gm=None, nodes=None,
self._input_signature = []
self._example_input = None

if issubclass(type(pt_module), torch.fx.graph_module.GraphModule):
if isinstance(pt_module, torch.fx.graph_module.GraphModule):
self._input_is_list = None
self._nodes = list(pt_module.graph.nodes)
found_types = []
Expand All @@ -187,38 +187,34 @@ def __init__(self, pt_module, fx_gm=None, nodes=None,
if value.op == 'placeholder':
self._inputs.append(i)
self._input_signature.append(value.name)
if hasattr(value, "meta") and ('tensor_meta' in value.meta.keys()) and value.meta['tensor_meta']:
found_shapes.append(value.meta['tensor_meta'].shape)
found_types.append(
OVAny(pt_to_ov_type_map[str(value.meta['tensor_meta'].dtype)]))
else:
found_shapes.append(None)
found_types.append(None)

found_shapes.append(self.get_found_shape(value))
found_types.append(self.get_found_dtype(value))
if found_shapes[-1] is not None:
new_shape = []
for dim in found_shapes[-1]:
if (dynamic_shapes or type(dim).__name__ == "SymInt"):
new_shape.append(-1)
else:
new_shape.append(dim)
found_shapes[-1] = torch.Size(new_shape)

elif value.op == 'output':
# Instead of putting output index, refer to its target
uargs = self.unpack_containers(value.args)
self._outputs = [(arg[0], self._nodes.index(arg[1]))
for arg in uargs if arg[1] is not None]
for idx, shape in enumerate(found_shapes):
if shape is not None:
new_shape = []
for dim in shape:
if (dynamic_shapes or type(dim).__name__ == "SymInt"):
new_shape.append(-1)
else:
new_shape.append(dim)
found_shapes[idx] = torch.Size(new_shape)

if not input_shapes or len(input_shapes) == 0:
self.input_shapes = found_shapes
if not input_types or len(input_types) == 0:
self.input_types = found_types

if hasattr(pt_module, "forward"):
input_params = inspect.signature(pt_module.forward).parameters
if hasattr(self.pt_module, "forward"):
input_params = inspect.signature(self.pt_module.forward).parameters
self._input_signature = list(input_params)

elif issubclass(type(pt_module), torch.fx.Node):
elif isinstance(pt_module, torch.fx.Node):
self._nodes = nodes # passed from outer context

# FIXME: Quadratic complexity nodes*nodes considering the outer loop over all nodes
Expand All @@ -234,6 +230,23 @@ def __init__(self, pt_module, fx_gm=None, nodes=None,
self.input_types.append(
BaseFXDecoder.get_type_for_value(arg))

@staticmethod
def get_found_shape(value) -> str:
# If input is a tensor, read the shape from meta data
if hasattr(value, "meta"):
if ('tensor_meta' in value.meta.keys()) and value.meta['tensor_meta']:
return value.meta['tensor_meta'].shape
if ('val' in value.meta.keys()) and isinstance(value.meta["val"], torch.Tensor):
return value.meta['val'].shape
return None

@staticmethod
def get_found_dtype(value) -> str:
# If input is a tensor, read the data type from meta data
if hasattr(value, "meta") and ('tensor_meta' in value.meta.keys()) and value.meta['tensor_meta']:
return OVAny(pt_to_ov_type_map[str(value.meta['tensor_meta'].dtype)])
return None

def get_input_signature_name(self, index: int) -> str:
if self._input_signature is not None and index < len(self._input_signature):
return self._input_signature[index]
Expand Down Expand Up @@ -331,6 +344,8 @@ def get_subgraph_decoder(self, index):

def get_op_type(self):
if self.pt_module.op == 'call_function':
if type(self.pt_module.target).__name__ == "EdgeOpOverload":
return self.pt_module.target.__name__
return str(self.pt_module.target)
elif self.pt_module.op == 'get_attr':
return 'get_attr' # FIXME should be aligned with get_attr from TS implementation
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def __init__(self, options):
"torch.ops.aten.argmin.default": None,
"torch.ops.aten.as_strided.default": None,
"torch.ops.aten.as_strided_.default": None,
"torch.ops.aten.as_strided_copy.default": None,
"torch.ops.aten.asin.default": None,
"torch.ops.aten.asinh.default": None,
"torch.ops.aten.asinh.default": None,
Expand Down Expand Up @@ -118,6 +119,7 @@ def __init__(self, options):
"torch.ops.aten.erf.default": None,
"torch.ops.aten.exp.default": None,
"torch.ops.aten.expand.default": None,
"torch.ops.aten.expand_copy.default": None,
"torch.ops.aten.fake_quantize_per_channel_affine_cachemask.default": None,
"torch.ops.aten.fill.Scalar": None,
"torch.ops.aten.fill_.Scalar": None,
Expand Down Expand Up @@ -196,6 +198,7 @@ def __init__(self, options):
"torch.ops.aten.new_zeros.default": None,
"torch.ops.aten.ones.default": None,
"torch.ops.aten.permute.default": None,
"torch.ops.aten.permute_copy.default": None,
"torch.ops.aten.pow.Scalar": None,
"torch.ops.aten.pow.Tensor_Scalar": None,
"torch.ops.aten.pow.Tensor_Tensor": None,
Expand All @@ -213,6 +216,7 @@ def __init__(self, options):
"torch.ops.aten.scatter.src": None,
"torch.ops.aten.scatter.value": None,
"torch.ops.aten.select.int": None,
"torch.ops.aten.select_copy.int": None,
"torch.ops.aten.select_scatter.default": None,
"torch.ops.aten.sigmoid.default": None,
"torch.ops.aten.sigmoid_.default": None,
Expand All @@ -222,13 +226,16 @@ def __init__(self, options):
"torch.ops.aten.sin.default": None,
"torch.ops.aten.sinh.default": None,
"torch.ops.aten.slice.Tensor": None,
"torch.ops.aten.slice_copy.Tensor": None,
"torch.ops.aten.slice_scatter.default": None,
"torch.ops.aten.sort.default": None,
"torch.ops.aten.split.Tensor": None,
"torch.ops.aten.split_with_sizes.default": None,
"torch.ops.aten.split_with_sizes_copy.default": None,
"torch.ops.aten.sqrt.default": None,
"torch.ops.aten.squeeze.dim": None,
"torch.ops.aten.squeeze.dims": None,
"torch.ops.aten.squeeze_copy.dims": None,
"torch.ops.aten.stack.default": None,
"torch.ops.aten.std.correction": None,
"torch.ops.aten.sub.default": None,
Expand All @@ -246,10 +253,12 @@ def __init__(self, options):
"torch.ops.aten.unbind.int": None,
"torch.ops.aten.unfold.default": None,
"torch.ops.aten.unsqueeze.default": None,
"torch.ops.aten.unsqueeze_copy.default": None,
"torch.ops.aten.upsample_nearest2d.default": None,
"torch.ops.aten.var.correction": None,
"torch.ops.aten.var_mean.correction": None,
"torch.ops.aten.view.default": None,
"torch.ops.aten.view_copy.default": None,
"torch.ops.aten.where.self": None,
"torch.ops.aten.zeros.default": None,
"torch.ops.aten.zeros_like.default": None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include "itt.hpp"
#include "openvino/core/rt_info.hpp"
#include "openvino/core/validation_util.hpp"
#include "openvino/op/add.hpp"
#include "openvino/op/broadcast.hpp"
#include "openvino/op/concat.hpp"
Expand Down Expand Up @@ -68,9 +69,23 @@ std::shared_ptr<ov::Node> ov::pass::ScaledDotProductAttentionDecomposition::deco
auto one_f = register_new_node<v1::ConvertLike>(one_i, query);
auto zero_f = register_new_node<v1::ConvertLike>(zero_i, query);

auto build_extract_dim_subgraph = [this, &zero_i](const std::shared_ptr<v3::ShapeOf>& shape_of,
const int64_t idx) -> std::shared_ptr<ov::Node> {
const auto dim_to_extract_const = v0::Constant::create(element::i32, Shape{}, {idx});
const auto gather = std::make_shared<v8::Gather>(shape_of, dim_to_extract_const, zero_i);
// When dim_to_extract is static but the whole shape is dynamic,
// ConstantFolding can't fold ShapeOf->Gather subgraph in this case.
// So it's better to explicitly extract the needed dimension.
if (auto constant = ov::util::get_constant_from_source(gather)) {
return register_new_node(constant);
}
register_new_node(dim_to_extract_const);
return register_new_node(gather);
};

Output<Node> scale;
if (node->get_input_size() < 5) {
scale = register_new_node<v8::Gather>(q_shape, minus_one, zero_i)->output(0);
scale = build_extract_dim_subgraph(q_shape, -1);
scale = register_new_node<v1::ConvertLike>(scale, query);
auto sqrt_scale = register_new_node<v0::Sqrt>(scale);
scale = register_new_node<v1::Divide>(one_f, sqrt_scale);
Expand Down Expand Up @@ -112,8 +127,8 @@ std::shared_ptr<ov::Node> ov::pass::ScaledDotProductAttentionDecomposition::deco
atten_mask = mask;
}
} else {
auto target_s_len = register_new_node<v8::Gather>(q_shape, minus_two, zero_i);
auto source_s_len = register_new_node<v8::Gather>(k_shape, minus_two, zero_i);
auto target_s_len = build_extract_dim_subgraph(q_shape, -2);
auto source_s_len = build_extract_dim_subgraph(k_shape, -2);
auto ssl = register_new_node<v0::Unsqueeze>(source_s_len, zero_i);
auto tsl = register_new_node<v0::Unsqueeze>(target_s_len, zero_i);
auto mask_shape = register_new_node<v0::Concat>(OutputVector{tsl, ssl}, 0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,13 +31,12 @@
using namespace ov;
using namespace testing;

const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(
const std::shared_ptr<ov::Node> query,
const std::shared_ptr<ov::Node> key,
const std::shared_ptr<ov::Node> value,
const std::shared_ptr<ov::Node> attention_mask,
const std::shared_ptr<ov::Node> scale,
const bool casual);
const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(std::shared_ptr<ov::Node> query,
std::shared_ptr<ov::Node> key,
std::shared_ptr<ov::Node> value,
std::shared_ptr<ov::Node> attention_mask,
std::shared_ptr<ov::Node> scale,
bool casual);

TEST_F(TransformationTestsF, ScaledDotProductAttentionDecompositionStaticBasic) {
const PartialShape query_shape{1, 32, 32};
Expand Down Expand Up @@ -129,6 +128,34 @@ TEST_F(TransformationTestsF, ScaledDotProductAttentionDecompositionStaticBroadca
}
}

TEST_F(TransformationTestsF, ScaledDotProductAttentionCasualPartiallyDynamic) {
const PartialShape query_shape{-1, -1, 24, 64};
const PartialShape key_shape{-1, -1, 24, 64};
const PartialShape value_shape{-1, -1, -1, 64};
const PartialShape attention_mask_shape{-1, -1, -1, -1};
const auto casual = true;

const auto query = std::make_shared<ov::op::v0::Parameter>(element::f32, query_shape);
const auto key = std::make_shared<ov::op::v0::Parameter>(element::f32, key_shape);
const auto value = std::make_shared<ov::op::v0::Parameter>(element::f32, value_shape);
const auto attention_mask = std::make_shared<ov::op::v0::Parameter>(element::f32, attention_mask_shape);
{
const auto scaled_dot_product_attention =
std::make_shared<ov::op::v13::ScaledDotProductAttention>(query, key, value, attention_mask, casual);

model = std::make_shared<ov::Model>(NodeVector{scaled_dot_product_attention},
ParameterVector{query, key, value, attention_mask});
manager.register_pass<ov::pass::ScaledDotProductAttentionDecomposition>();
}

{
const auto scaled_dot_product_attention =
scaled_dot_product_attention_decomposition(query, key, value, attention_mask, nullptr, casual);
model_ref = std::make_shared<ov::Model>(NodeVector{scaled_dot_product_attention},
ParameterVector{query, key, value, attention_mask});
}
}

TEST_F(TransformationTestsF, ScaledDotProductAttentionDecompositionDynamic) {
const PartialShape query_shape{-1, -1, -1};
const PartialShape key_shape{-1, -1, -1};
Expand Down Expand Up @@ -159,13 +186,12 @@ TEST_F(TransformationTestsF, ScaledDotProductAttentionDecompositionDynamic) {
}
}

const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(
const std::shared_ptr<ov::Node> query,
const std::shared_ptr<ov::Node> key,
const std::shared_ptr<ov::Node> value,
const std::shared_ptr<ov::Node> attention_mask,
const std::shared_ptr<ov::Node> scale,
const bool casual) {
const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(std::shared_ptr<ov::Node> query,
std::shared_ptr<ov::Node> key,
std::shared_ptr<ov::Node> value,
std::shared_ptr<ov::Node> attention_mask,
std::shared_ptr<ov::Node> scale,
bool casual) {
const auto q_shape = std::make_shared<ov::op::v3::ShapeOf>(query, element::i32);
const auto k_shape = std::make_shared<ov::op::v3::ShapeOf>(key, element::i32);
const auto minus_one = ov::op::v0::Constant::create(element::i32, Shape{}, {-1});
Expand All @@ -175,6 +201,23 @@ const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(
const auto one_f = std::make_shared<ov::op::v1::ConvertLike>(one_i, query);
const auto zero_f = std::make_shared<ov::op::v1::ConvertLike>(zero_i, query);

auto extract_dim = [&zero_i](const std::shared_ptr<ov::op::v3::ShapeOf>& shape_of,
const int64_t idx) -> std::shared_ptr<ov::Node> {
const auto& shape = shape_of->get_input_partial_shape(0);
const auto& dim = shape[idx];
if (dim.is_static()) {
return ov::op::v0::Constant::create(element::i32, Shape{}, {dim.get_length()});
}
const auto dim_to_extract_const = ov::op::v0::Constant::create(element::i32, Shape{}, {idx});
return std::make_shared<ov::op::v8::Gather>(shape_of, dim_to_extract_const, zero_i);
};

if (scale == nullptr) {
scale = extract_dim(q_shape, -1);
scale = std::make_shared<ov::op::v1::ConvertLike>(scale, query);
auto sqrt_scale = std::make_shared<ov::op::v0::Sqrt>(scale);
scale = std::make_shared<ov::op::v1::Divide>(one_f, sqrt_scale);
}
const auto q_scaled = std::make_shared<ov::op::v1::Multiply>(query, scale);
auto k_rank = std::make_shared<ov::op::v3::ShapeOf>(k_shape, element::i32)->output(0);
const auto k_last_dim = std::make_shared<ov::op::v1::Add>(k_rank, minus_one);
Expand Down Expand Up @@ -204,8 +247,8 @@ const std::shared_ptr<ov::Node> scaled_dot_product_attention_decomposition(
atten_mask = mask;
}
} else {
const auto target_s_len = std::make_shared<ov::op::v8::Gather>(q_shape, minus_two, zero_i);
const auto source_s_len = std::make_shared<ov::op::v8::Gather>(k_shape, minus_two, zero_i);
const auto target_s_len = extract_dim(q_shape, -2);
const auto source_s_len = extract_dim(k_shape, -2);
const auto ssl = std::make_shared<ov::op::v0::Unsqueeze>(source_s_len, zero_i);
const auto tsl = std::make_shared<ov::op::v0::Unsqueeze>(target_s_len, zero_i);
const auto mask_shape = std::make_shared<ov::op::v0::Concat>(OutputVector{tsl, ssl}, 0);
Expand Down
17 changes: 16 additions & 1 deletion src/core/src/runtime/itensor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

#include <memory>

#include "compare.hpp"
#include "openvino/core/except.hpp"
#include "openvino/core/shape_util.hpp"
#include "openvino/core/type/element_iterator.hpp"
Expand Down Expand Up @@ -46,7 +47,21 @@ bool ITensor::is_continuous() const {
// OpenVINO doesn't support strides for lp types
return true;
}
return default_byte_strides(get_shape(), get_element_type()) == get_strides();

const auto& strides = get_strides();
auto stride = strides.rbegin();
const auto default_strides = default_byte_strides(get_shape(), get_element_type());
auto default_stride = default_strides.rbegin();

for (; stride != strides.rend(); ++stride, ++default_stride) {
if (*stride != *default_stride) {
break;
}
}

const auto default_last = default_strides.rend();
return (default_stride == default_last) || (*default_stride < *stride && (get_shape()[0] == 1) &&
std::all_of(default_stride, default_last, cmp::Equal(*default_stride)));
}

void ITensor::copy_to(const std::shared_ptr<ov::ITensor>& dst) const {
Expand Down
Loading

0 comments on commit 39ce30d

Please sign in to comment.