Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NNAdapter][QualcommQNN] Supports ERNIE nano fully quantized model #9618

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,10 @@ class PatternMatcher {
int index = -1);
Pattern* IsOperationOutputOperand(NNAdapterOperationType type,
int index = -1);
Pattern* IsModelInputOperand();
Pattern* IsModelOutputOperand();
Pattern* IsNotModelInputOperand();
Pattern* IsNotModelOutputOperand();
Pattern* CheckInputCount(int num);
Pattern* CheckOutputCount(int num);
// Mark the pattern matched node to be deleted, so its inlinks and outlinks
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -186,10 +186,14 @@ core::Operand* InsertReshapeOperation(
const NNAdapterOperandDimensionType& input_dimensions,
std::vector<int32_t> shape = {});
// Append or insert a dummy add operation, set the addend to a zero operand
core::Operand* AppendDummyOperation(core::Model* model,
core::Operand* input_operand);
core::Operand* InsertDummyOperation(core::Model* model,
core::Operand* output_operand);
core::Operand* AppendDummyAddOperation(core::Model* model,
core::Operand* input_operand);
core::Operand* InsertDummyAddOperation(core::Model* model,
core::Operand* output_operand);
core::Operand* AppendDummySubOperation(core::Model* model,
core::Operand* input_operand);
core::Operand* InsertDummySubOperation(core::Model* model,
core::Operand* output_operand);
// Append or insert a unary activiation or other operation which has only one
// input and output operand
core::Operand* AppendUnaryOperation(core::Model* model,
Expand All @@ -206,6 +210,13 @@ core::Operand* AppendRequantOperation(core::Model* model,
core::Operand* InsertRequantOperation(core::Model* model,
core::Operand* output_operand,
void* input_quant_params);
// Append or insert a softmax operation
core::Operand* AppendSoftmaxOperation(core::Model* model,
core::Operand* input_operand,
int32_t axis);
core::Operand* InsertSoftmaxOperation(core::Model* model,
core::Operand* output_operand,
int32_t axis);

// Sort the operations of the specified model in topological order
std::vector<const core::Operation*> SortOperationsInTopologicalOrder(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void FixMultipleOutputsOps(core::Model* model) {
auto output_operand = output_operands[i];
if (IsModelOutputOperand(output_operand)) {
auto dummy_output_operand =
InsertDummyOperation(model, output_operand);
InsertDummyAddOperation(model, output_operand);
UpdateOperationOutputOperands(
operation, output_operand, dummy_output_operand);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ void FixNoInputsOps(core::Model* model) {
auto output_operand = model->output_operands[i];
if (IsModelOutputOperand(output_operand)) {
output_operand->type.lifetime = NNADAPTER_CONSTANT_COPY;
auto dummy_output_operand = AppendDummyOperation(model, output_operand);
auto dummy_output_operand =
AppendDummyAddOperation(model, output_operand);
UpdateModelOutputOperands(model, output_operand, dummy_output_operand);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ static void ReduceOpsAddDummyOperation(core::Model* model,
auto reduce_all =
axes_size == static_cast<int>(input_operand->type.dimensions.count);
if (!keep_dim && reduce_all && IsModelOutputOperand(output_operand)) {
auto dummy_output_operand = InsertDummyOperation(model, output_operand);
auto dummy_output_operand = InsertDummyAddOperation(model, output_operand);
UpdateOperationOutputOperands(
operation, output_operand, dummy_output_operand);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ void FixMultipleOutputsOps(core::Model* model) {
auto output_operand = output_operands[i];
if (IsModelOutputOperand(output_operand)) {
auto dummy_output_operand =
InsertDummyOperation(model, output_operand);
InsertDummyAddOperation(model, output_operand);
UpdateOperationOutputOperands(
operation, output_operand, dummy_output_operand);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ static void FixRELUDepthwiseConv2D(core::Model* model,
bool is_depthwise_mode = group != 1 && input_channel_size == group &&
output_channel_size % input_channel_size == 0;
if (is_depthwise_mode) {
auto dummy_output_operand = InsertDummyOperation(model, output_operand);
auto dummy_output_operand =
InsertDummyAddOperation(model, output_operand);
UpdateOperationOutputOperands(
operation, output_operand, dummy_output_operand);
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,42 @@ PatternMatcher::Pattern::IsOperationOutputOperand(NNAdapterOperationType type,
return this;
}

NNADAPTER_EXPORT PatternMatcher::Pattern *
PatternMatcher::Pattern::IsModelInputOperand() {
IsOperand();
conditions.emplace_back([=](const Node *node) {
return nnadapter::IsModelInputOperand(node->operand);
});
return this;
}

NNADAPTER_EXPORT PatternMatcher::Pattern *
PatternMatcher::Pattern::IsModelOutputOperand() {
IsOperand();
conditions.emplace_back([=](const Node *node) {
return nnadapter::IsModelOutputOperand(node->operand);
});
return this;
}

NNADAPTER_EXPORT PatternMatcher::Pattern *
PatternMatcher::Pattern::IsNotModelInputOperand() {
IsOperand();
conditions.emplace_back([=](const Node *node) {
return !nnadapter::IsModelInputOperand(node->operand);
});
return this;
}

NNADAPTER_EXPORT PatternMatcher::Pattern *
PatternMatcher::Pattern::IsNotModelOutputOperand() {
IsOperand();
conditions.emplace_back([=](const Node *node) {
return !nnadapter::IsModelOutputOperand(node->operand);
});
return this;
}

NNADAPTER_EXPORT PatternMatcher::Pattern *PatternMatcher::Pattern::IsOperation(
NNAdapterOperationType type) {
conditions.emplace_back([type](const Node *node) {
Expand Down
50 changes: 45 additions & 5 deletions lite/backends/nnadapter/nnadapter/src/utility/modeling.cc
Original file line number Diff line number Diff line change
Expand Up @@ -732,6 +732,7 @@ NNADAPTER_EXPORT core::Operand* InsertReshapeOperation(

core::Operand* AddDummyOperation(core::Model* model,
core::Operand* reference_operand,
NNAdapterOperationType operation_type,
bool after = true) {
auto target_operand = AddOperand(model);
CopyOperandType(&target_operand->type, reference_operand->type);
Expand All @@ -754,7 +755,7 @@ core::Operand* AddDummyOperation(core::Model* model,
auto fuse_code_operand = AddInt32ConstantOperand(model, 0);
// Insert a new ADD operation
auto dummy_add_operation = AddOperation(model);
dummy_add_operation->type = NNADAPTER_ADD;
dummy_add_operation->type = operation_type;
dummy_add_operation->input_operands = {
after ? reference_operand : target_operand,
zero_operand,
Expand All @@ -764,14 +765,24 @@ core::Operand* AddDummyOperation(core::Model* model,
return target_operand;
}

NNADAPTER_EXPORT core::Operand* AppendDummyOperation(
NNADAPTER_EXPORT core::Operand* AppendDummyAddOperation(
core::Model* model, core::Operand* input_operand) {
return AddDummyOperation(model, input_operand, true);
return AddDummyOperation(model, input_operand, NNADAPTER_ADD, true);
}

NNADAPTER_EXPORT core::Operand* InsertDummyOperation(
NNADAPTER_EXPORT core::Operand* InsertDummyAddOperation(
core::Model* model, core::Operand* output_operand) {
return AddDummyOperation(model, output_operand, false);
return AddDummyOperation(model, output_operand, NNADAPTER_ADD, false);
}

NNADAPTER_EXPORT core::Operand* AppendDummySubOperation(
core::Model* model, core::Operand* input_operand) {
return AddDummyOperation(model, input_operand, NNADAPTER_SUB, true);
}

NNADAPTER_EXPORT core::Operand* InsertDummySubOperation(
core::Model* model, core::Operand* output_operand) {
return AddDummyOperation(model, output_operand, NNADAPTER_SUB, false);
}

core::Operand* AddUnaryOperation(core::Model* model,
Expand Down Expand Up @@ -884,6 +895,35 @@ NNADAPTER_EXPORT core::Operand* InsertRequantOperation(
return AddRequantOperation(model, output_operand, input_quant_params, false);
}

core::Operand* AddSoftmaxOperation(core::Model* model,
core::Operand* reference_operand,
int32_t axis = -1,
bool after = true) {
auto target_operand = AddOperand(model);
CopyOperandType(&target_operand->type, reference_operand->type);
if (!IsTemporaryShapeOperand(reference_operand)) {
target_operand->type.lifetime = NNADAPTER_TEMPORARY_VARIABLE;
}
auto softmax_operation = AddOperation(model);
softmax_operation->type = NNADAPTER_SOFTMAX;
auto axis_operand = AddInt32ConstantOperand(model, axis);
softmax_operation->input_operands = {
after ? reference_operand : target_operand, axis_operand};
softmax_operation->output_operands = {after ? target_operand
: reference_operand};
return target_operand;
}

NNADAPTER_EXPORT core::Operand* AppendSoftmaxOperation(
core::Model* model, core::Operand* input_operand, int32_t axis) {
return AddSoftmaxOperation(model, input_operand, axis, true);
}

NNADAPTER_EXPORT core::Operand* InsertSoftmaxOperation(
core::Model* model, core::Operand* output_operand, int32_t axis) {
return AddSoftmaxOperation(model, output_operand, axis, false);
}

#define SORT_OPERATIONS_IN_TOPOLOGICAL_ORDER(T) \
NNADAPTER_EXPORT std::vector<T core::Operation*> \
SortOperationsInTopologicalOrder(T core::Model* model) { \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,24 @@ static bool SetOutScaleFromSpecialOps(const std::unique_ptr<SSAGraph>& graph) {
return found;
}

// Print variables without outscale
static void PrintVariablesWithoutOutScale(
const std::unique_ptr<SSAGraph>& graph) {
std::ostringstream os;
for (auto& op_node : graph->StmtTopologicalOrder()) {
if (!op_node->IsStmt()) continue;
auto op_info = op_node->AsStmt().mutable_op_info();
auto op_type = op_info->Type();
for (auto out_var_node : op_node->outlinks) {
CHECK(out_var_node->IsArg());
auto out_var_name = out_var_node->arg()->name;
if (op_info->HasOutputScale(out_var_name)) continue;
if (out_var_node->outlinks.size() > 0) os << out_var_name << "\n";
}
}
VLOG(5) << "\nVariables without outscale:\n" << os.str();
}

void QuantizationParametersPropagationPass::Apply(
const std::unique_ptr<SSAGraph>& graph) {
VLOG(5) << "\n" << Visualize(graph.get());
Expand Down Expand Up @@ -386,6 +404,8 @@ void QuantizationParametersPropagationPass::Apply(
SetOutScaleFromNextInScale(graph, auto_complete_quant_scale_level);
} while (found);
}
// Print variables without outscale to help users set out threshold manually
PrintVariablesWithoutOutScale(graph);
VLOG(5) << "\n" << Visualize(graph.get());
}

Expand Down
2 changes: 1 addition & 1 deletion lite/core/optimizer/mir/subgraph/subgraph_detector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -579,12 +579,12 @@ void SubgraphFuser::ReplaceNodesWithSubgraphs(
void SubgraphFuser::operator()() {
std::vector<std::vector<Node *>> subgraphs =
SubgraphDetector(graph_, teller_, subgraph_partition_configs_)();
SubgraphVisualizer(graph_, subgraphs)();
if (support_mixed_precision_) {
MixedPrecisionAutoInsertCalibFuser mixed_precision_auto_insert_calib_fuser(
graph_, &subgraphs);
mixed_precision_auto_insert_calib_fuser();
}
SubgraphVisualizer(graph_, subgraphs)();
ReplaceNodesWithSubgraphs(graph_, subgraphs, min_subgraph_size_);
}

Expand Down
20 changes: 16 additions & 4 deletions lite/kernels/host/print_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -124,18 +124,24 @@ class TensorFormatter {
void SetSummarize(int64_t summarize) { summarize_ = summarize; }

private:
template <typename T>
void FormatItem(T print_value, std::stringstream& log_stream) {
log_stream << " " << print_value;
}

template <typename T>
void FormatData(const Tensor& print_tensor, std::stringstream& log_stream) {
int64_t print_size = summarize_ == -1
? print_tensor.numel()
: (std::min)(summarize_, print_tensor.numel());
const T* data = print_tensor.data<T>(); // Always kHost, so unnessary to
// copy the data from device
const T* print_data = print_tensor.data<
T>(); // Always kHost, so unnessary to copy the data from device
log_stream << " - data: [";
if (print_size > 0) {
log_stream << data[0];
FormatItem<T>(print_data[0], log_stream);
for (int64_t i = 1; i < print_size; ++i) {
log_stream << " " << data[i];
log_stream << " ";
FormatItem<T>(print_data[i], log_stream);
}
}
log_stream << "]" << std::endl;
Expand All @@ -148,6 +154,12 @@ class TensorFormatter {
bool print_tensor_layout_ = true;
};

template <>
void TensorFormatter::FormatItem<int8_t>(int8_t print_value,
std::stringstream& log_stream) {
log_stream << " " << static_cast<int32_t>(print_value);
}

void PrintCompute::Run() {
auto& param = Param<param_t>();
param.out->CopyDataFrom(*param.in);
Expand Down
5 changes: 3 additions & 2 deletions lite/kernels/nnadapter/converter/slice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -114,12 +114,13 @@ int ConvertSlice(Converter* converter, OpInfo* op, Scope* scope) {
if (!decrease_axis.empty() &&
decrease_axis.size() != input_type->dimensions.count) {
// Squeeze operation
converter->AddSqueezeOperation(output_operand, decrease_axis, out_name);
converter->AddSqueezeOperation(
output_operand, decrease_axis, out_name, out_scales);
}
if (decrease_axis.size() == input_type->dimensions.count &&
decrease_axis.size() > 1) {
std::vector<int> shape = {1};
converter->AddReshapeOperation(output_operand, shape, out_name);
converter->AddReshapeOperation(output_operand, shape, out_name, out_scales);
}
return NO_ERROR;
}
Expand Down