Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[PASS][HuaweiAscendNPU] Fix constant fold pass and solve the precision of ascend meshgrid op #9940

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,24 @@ int ConvertMeshgrid(Converter* converter, core::Operation* operation) {
input_operator = converter->ConvertOperand(input_operand);
}
auto output_operand = output_operands[i];
std::vector<int32_t> shape(output_operand->type.dimensions.data,
output_operand->type.dimensions.data +
output_operand->type.dimensions.count);
auto shape_operator = converter->AddInt32ConstantOperator(shape);
std::vector<int32_t> output_shape(
output_operand->type.dimensions.data,
output_operand->type.dimensions.data +
output_operand->type.dimensions.count);
// Reshape input
std::vector<int32_t> view_shape(output_count, 1);
view_shape[i] = output_shape[i];
auto view_shape_operator = converter->AddInt32ConstantOperator(view_shape);
auto reshape_op =
converter->AddOperator<ge::op::Reshape>(output_operand, "reshape");
SET_INPUT(reshape_op, x, input_operator);
SET_INPUT(reshape_op, shape, view_shape_operator);
auto reshape_input_operator = MAP_OUTPUT(reshape_op, y, output_operand);
// BroadcastTo op
auto shape_operator = converter->AddInt32ConstantOperator(output_shape);
auto broadcast_to_op =
converter->AddOperator<ge::op::BroadcastTo>(output_operands[i]);
SET_INPUT(broadcast_to_op, x, input_operator);
SET_INPUT(broadcast_to_op, x, reshape_input_operator);
SET_INPUT(broadcast_to_op, shape, shape_operator);
MAP_OUTPUT(broadcast_to_op, y, output_operand);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,8 +47,8 @@ void AssignValueCalcOfflinePass::RemoveAssignValuePattern(
}
}
if (has_extra_producers) {
LOG(WARNING)
<< "The output var of op is not supported with multiple producers";
VLOG(5) << "WARNING: The output var of op is not supported with multiple "
"producers";
continue;
}

Expand All @@ -71,21 +71,24 @@ void AssignValueCalcOfflinePass::RemoveAssignValuePattern(
shape_int64_t.push_back(static_cast<int64_t>(value));
}
out_t->Resize(DDim(shape_int64_t));
auto out_data = out_t->mutable_data<float>();

if (dtype == static_cast<int>(lite::core::FluidType::INT32)) {
auto int32_values = op_desc->GetAttr<std::vector<int>>("int32_values");
auto out_data = out_t->mutable_data<int32_t>();
memcpy(out_data, int32_values.data(), sizeof(int) * int32_values.size());
} else if (dtype == static_cast<int>(lite::core::FluidType::FP32)) {
auto fp32_values = op_desc->GetAttr<std::vector<float>>("fp32_values");
auto out_data = out_t->mutable_data<float>();
memcpy(out_data, fp32_values.data(), sizeof(float) * fp32_values.size());
} else if (dtype == static_cast<int>(lite::core::FluidType::INT64)) {
auto int64_values =
op_desc->GetAttr<std::vector<int64_t>>("int64_values");
auto out_data = out_t->mutable_data<int64_t>();
memcpy(
out_data, int64_values.data(), sizeof(int64_t) * int64_values.size());
} else if (dtype == static_cast<int>(lite::core::FluidType::BOOL)) {
auto bool_values = op_desc->GetAttr<std::vector<int>>("bool_values");
auto out_data = out_t->mutable_data<bool>();
memcpy(out_data, bool_values.data(), sizeof(bool) * bool_values.size());
} else {
LOG(FATAL) << "Unsupported dtype for assign_value op: " << dtype;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,23 @@ void FillConstantCalcOfflinePass::RemoveFillConstantPattern(
}
// Get fill_constant's attr
auto dtype = op_desc->GetAttr<int>("dtype");
auto value = op_desc->GetAttr<float>("value");
auto str_value = op_desc->GetAttr<std::string>("str_value");
float value;
if (str_value.empty()) {
value = op_desc->GetAttr<float>("value");
} else {
// handle NaN/Inf first, which cannot be read from stream.
if (str_value == "inf") {
value = std::numeric_limits<float>::quiet_NaN();
} else if (str_value == "-inf") {
value = std::numeric_limits<float>::quiet_NaN();
} else if (str_value == "nan") {
value = std::numeric_limits<float>::quiet_NaN();
} else {
std::stringstream convert_stream(str_value);
convert_stream >> value;
}
}
auto shape = op_desc->GetAttr<std::vector<int64_t>>("shape");
// Get fill_constant's output tensor
auto out_var = scope->FindVar(op_desc->Output("Out").front());
Expand Down
56 changes: 37 additions & 19 deletions lite/core/optimizer/mir/elimination/range_calc_offline_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
// limitations under the License.

#include "lite/core/optimizer/mir/elimination/range_calc_offline_pass.h"

#include <algorithm>
#include <cmath>
#include <list>
#include <memory>
#include <set>
#include <vector>

#include "lite/core/optimizer/mir/pass.h"
#include "lite/core/optimizer/mir/pass_registry.h"
#include "lite/core/optimizer/mir/pattern_matcher.h"
Expand All @@ -36,6 +38,25 @@ int64_t GetSpanCount(T start, T end, T step) {
: std::ceil(std::abs((end - start) / step));
}

template <typename T>
void RangeCompute(lite::Tensor* start_tensor,
lite::Tensor* end_tensor,
lite::Tensor* step_tensor,
lite::Tensor* output_tensor) {
auto start = start_tensor->mutable_data<T>()[0];
auto end = end_tensor->mutable_data<T>()[0];
auto step = step_tensor->mutable_data<T>()[0];
// Calc range
int64_t size = GetSpanCount(start, end, step);
output_tensor->Resize(DDim({size}));
auto out_data = output_tensor->mutable_data<T>();
T value = start;
for (int64_t i = 0; i < size; ++i) {
out_data[i] = value;
value += step;
}
}

void RangeCalcOfflinePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
RemoveRangePattern(graph);
}
Expand All @@ -53,16 +74,15 @@ void RangeCalcOfflinePass::RemoveRangePattern(
}
}
if (has_extra_producers) {
LOG(WARNING)
<< "Unsupported for op output var containing multiple producers";
VLOG(5) << "WARNING: Unsupported for op output var containing multiple "
"producers";
continue;
}

std::set<const Node*> nodes2rm_;
auto& range_instruct = node->AsStmt();
auto* scope = range_instruct.op()->scope();
auto op_desc = range_instruct.mutable_op_info();

// Get range's input tensor
auto start_var = scope->FindVar(op_desc->Input("Start").front());
auto end_var = scope->FindVar(op_desc->Input("End").front());
Expand All @@ -72,28 +92,26 @@ void RangeCalcOfflinePass::RemoveRangePattern(
auto step_t = step_var->GetMutable<lite::Tensor>();
if (!start_t->persistable() || !end_t->persistable() ||
!step_t->persistable()) {
LOG(WARNING) << "RangeCalcOfflinePass does not support input that is not "
"persistable";
VLOG(5)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

为什么把warning改成vlog

Copy link
Collaborator

@zhupengyang zhupengyang Jan 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

应该是不希望每次执行都打印太多log吧

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

是的,LOG(WARNING)会不受限制的进行打印,有时会输出大量log

<< "WARNING: RangeCalcOfflinePass does not support input that is not "
"persistable";
continue;
}
auto start = start_t->mutable_data<float>()[0];
auto end = end_t->mutable_data<float>()[0];
auto step = step_t->mutable_data<float>()[0];
// Get range's output tensor
auto out_var = scope->FindVar(op_desc->Output("Out").front());
auto out_t = out_var->GetMutable<lite::Tensor>();

// Calc range
int64_t size = GetSpanCount(start, end, step);

out_t->Resize(DDim({size}));
auto out_data = out_t->mutable_data<float>();

float value = start;
for (int64_t i = 0; i < size; ++i) {
out_data[i] = value;
value += step;
// Get input precision
auto precision = start_t->precision();
if (precision == PrecisionType::kInt64) {
RangeCompute<int64_t>(start_t, end_t, step_t, out_t);
} else if (precision == PrecisionType::kInt32) {
RangeCompute<int32_t>(start_t, end_t, step_t, out_t);
} else if (precision == PrecisionType::kFloat) {
RangeCompute<float>(start_t, end_t, step_t, out_t);
} else {
LOG(FATAL) << "Unsupported precision: " << PrecisionToStr(precision);
}

// Offline calc range, only retain output tensor as persistable tensor
out_t->set_persistable(true);
auto range_outlinks = node->outlinks;
Expand Down
38 changes: 27 additions & 11 deletions lite/core/optimizer/mir/elimination/scale_calc_offline_pass.cc
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,20 @@ namespace paddle {
namespace lite {
namespace mir {

template <typename T>
void ScaleCompute(lite::Tensor* x_tensor,
float scale,
float bias,
lite::Tensor* out_tensor) {
auto x_data = x_tensor->mutable_data<T>();
auto x_dims = x_tensor->dims();
out_tensor->Resize(x_dims);
auto out_data = out_tensor->mutable_data<T>();
for (int i = 0; i < x_dims.production(); i++) {
out_data[i] = x_data[i] * scale + bias;
}
}

void ScaleCalcOfflinePass::Apply(const std::unique_ptr<SSAGraph>& graph) {
RemoveScalePattern(graph);
}
Expand Down Expand Up @@ -60,27 +74,29 @@ void ScaleCalcOfflinePass::RemoveScalePattern(
auto x_t = x_var->GetMutable<lite::Tensor>();
if (!x_t->persistable()) {
VLOG(5) << "WARNING: ScaleCalcOfflinePass does not support input that is "
"not persistable";
"not persistable";
continue;
}
auto x_data = x_t->mutable_data<float>();
auto x_dims = x_t->dims();
// Get scale's output tensor
auto out_var = scope->FindVar(op_desc->Output("Out").front());
auto out_t = out_var->GetMutable<lite::Tensor>();
// Get scale's attr
auto scale = op_desc->GetAttr<float>("scale");
auto bias = op_desc->GetAttr<float>("bias");
auto bias_after_scale = op_desc->GetAttr<bool>("bias_after_scale");
if (!bias_after_scale) {
bias *= scale;
}
// Get scale's output tensor
auto out_var = scope->FindVar(op_desc->Output("Out").front());
auto out_t = out_var->GetMutable<lite::Tensor>();
out_t->Resize(x_dims);
auto out_data = out_t->mutable_data<float>();
for (int i = 0; i < x_dims.production(); i++) {
out_data[i] = x_data[i] * scale + bias;
auto precision = x_t->precision();
if (precision == PrecisionType::kInt64) {
ScaleCompute<int64_t>(x_t, scale, bias, out_t);
} else if (precision == PrecisionType::kInt32) {
ScaleCompute<int32_t>(x_t, scale, bias, out_t);
} else if (precision == PrecisionType::kFloat) {
ScaleCompute<float>(x_t, scale, bias, out_t);
} else {
LOG(FATAL) << "Unsupported precision: " << PrecisionToStr(precision);
}

// Offline calc scale, only retain output tensor as persistable tensor
out_t->set_persistable(true);
auto scale_outlinks = node->outlinks;
Expand Down
1 change: 0 additions & 1 deletion lite/core/optimizer/optimizer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,6 @@ std::unique_ptr<RuntimeProgram> RunDefaultOptimizer(
"lite_conv_scale_fuse_pass",
"lite_conv_elementwise_tree_fuse_pass",
"lite_greater_than_cast_fuse_pass",
"fill_range_fuse_pass",
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

已经有fill_constant和range的常量折叠pass了,这个pass不再需要

"identity_dropout_eliminate_pass",
"sparse_conv_detect_pass",
// "keepdims_convert_pass",
Expand Down