From 582d26a31295645247caf85019e1538f523315c8 Mon Sep 17 00:00:00 2001 From: laiou Date: Fri, 7 Jan 2022 14:30:32 +0800 Subject: [PATCH 1/3] pad3d and memory pass --- lite/api/paddle_use_passes.h | 1 + .../mir/fusion/__xpu__inplace_fuse_pass.cc | 45 ++++ .../mir/fusion/__xpu__inplace_fuse_pass.h | 32 +++ .../mir/fusion/__xpu__inplace_fuser.cc | 53 ++++ .../mir/fusion/__xpu__inplace_fuser.h | 40 +++ .../optimizer/mir/fusion/inplace_fuse_pass.cc | 3 +- .../optimizer/mir/xpu_memory_optimize_pass.cc | 255 +++++++++++++++--- .../optimizer/mir/xpu_memory_optimize_pass.h | 12 +- lite/core/optimizer/optimizer.cc | 1 + .../x86_mobilenetv1_full_demo/CMakeLists.txt | 73 +++++ .../x86_mobilenetv1_light_demo/CMakeLists.txt | 73 +++++ lite/kernels/xpu/CMakeLists.txt | 1 + lite/kernels/xpu/pad3d_compute.cc | 101 +++++++ lite/kernels/xpu/pad3d_compute.h | 37 +++ 14 files changed, 680 insertions(+), 47 deletions(-) create mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc create mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h create mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc create mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h create mode 100644 lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt create mode 100644 lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt create mode 100644 lite/kernels/xpu/pad3d_compute.cc create mode 100644 lite/kernels/xpu/pad3d_compute.h diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h index 4fd1e24d09a..5c7af415737 100644 --- a/lite/api/paddle_use_passes.h +++ b/lite/api/paddle_use_passes.h @@ -65,6 +65,7 @@ USE_MIR_PASS(type_layout_cast_preprocess_pass); USE_MIR_PASS(memory_optimize_pass); USE_MIR_PASS(xpu_memory_optimize_pass); USE_MIR_PASS(lite_inplace_fuse_pass); +USE_MIR_PASS(xpu_inplace_fuse_pass); USE_MIR_PASS(multi_stream_analysis_pass); USE_MIR_PASS(elementwise_mul_constant_eliminate_pass); USE_MIR_PASS(npu_subgraph_pass); diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc new file mode 100644 index 00000000000..38212137228 --- /dev/null +++ b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc @@ -0,0 +1,45 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h" +#include +#include +#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h" +#include "lite/core/optimizer/mir/pass_registry.h" + +namespace paddle { +namespace lite { +namespace mir { + +void XPUInplaceFusePass::Apply(const std::unique_ptr& graph) { + std::vector inplace_type_cases{"reshape", + "reshape2", + "flatten", + "flatten2", + "squeeze", + "squeeze2", + "unsqueeze", + "unsqueeze2"}; + for (auto type : inplace_type_cases) { + fusion::XPUInplaceFuser inplace_fuser(type); + inplace_fuser(graph.get()); + } +} + +} // namespace mir +} // namespace lite +} // namespace paddle + +REGISTER_MIR_PASS(xpu_inplace_fuse_pass, paddle::lite::mir::XPUInplaceFusePass) + .BindTargets({TARGET(kXPU)}); diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h new file mode 100644 index 00000000000..5fb421bfbbc --- /dev/null +++ b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h @@ -0,0 +1,32 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "lite/core/optimizer/mir/pass.h" + +namespace paddle { +namespace lite { +namespace mir { + +class XPUInplaceFusePass : public ProgramPass { + public: + void Apply(const std::unique_ptr& graph) override; +}; + +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc new file mode 100644 index 00000000000..d9740213d47 --- /dev/null +++ b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc @@ -0,0 +1,53 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h" +#include +#include +#include "lite/core/optimizer/mir/pattern_matcher_high_api.h" + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +void XPUInplaceFuser::BuildPattern() { + auto* input = VarNode("input") + ->assert_is_op_input(type_, "X") + ->assert_only_one_output() + ->AsInput(); + + auto* op_node = OpNode("inplace", type_)->assert_is_op(type_); + + auto* output = + VarNode("output")->assert_is_op_output(type_, "Out")->AsOutput(); + + *input >> *op_node >> *output; +} + +void XPUInplaceFuser::InsertNewNode(SSAGraph* graph, + const key2nodes_t& matched) { + bool inplace = true; + auto* stmt = matched.at("inplace")->stmt(); + auto op = stmt->op(); + cpp::OpDesc* op_desc = op->mutable_op_info(); + op_desc->SetAttr("inplace", inplace); + stmt->op()->Attach(*op_desc, op->scope()); + stmt->op()->AttachKernel(&(stmt->picked_kernel())); +} + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h new file mode 100644 index 00000000000..75d2e44ad37 --- /dev/null +++ b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h @@ -0,0 +1,40 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include "lite/core/optimizer/mir/pattern_matcher_high_api.h" + +namespace paddle { +namespace lite { +namespace mir { +namespace fusion { + +class XPUInplaceFuser : public FuseBase { + public: + explicit XPUInplaceFuser(const std::string& type) : type_(type) {} + + void BuildPattern() override; + void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override; + + private: + std::string type_; +}; + +} // namespace fusion +} // namespace mir +} // namespace lite +} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc b/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc index 4fc05bd051e..25354fbee2f 100644 --- a/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc +++ b/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc @@ -43,4 +43,5 @@ void InplaceFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_inplace_fuse_pass, paddle::lite::mir::InplaceFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kNPU)}); + .ExcludeTargets({TARGET(kNPU)}) + .ExcludeTargets({TARGET(kXPU)}); diff --git a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc index 46c415fcf82..3e151a9220e 100644 --- a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc +++ b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc @@ -30,11 +30,15 @@ typedef struct { int cluster; std::pair lifetime; int life_interval; + int mapping; std::set adj; } XPUMemNode; void XPUMemoryOptimizePass::CollectLifeCycleByDevice( - std::map* lifecycles, SSAGraph* graph) { + std::map* lifecycles, + SSAGraph* graph, + std::map* squeeze_input2output, + std::map* squeeze_output2input) { max_lifecycle_ = 0; auto is_host = [](TargetType x) -> bool { @@ -93,12 +97,11 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( } }; - VLOG(4) << "invalid_op_nodes.size();" << invalid_op_nodes.size(); insert_invalid_op_nodes_for_specific_target(invalid_op_nodes); - VLOG(4) << "invalid_op_nodes.size();" << invalid_op_nodes.size(); // Collect the invalid input and output variables that will not be reused. std::set invalid_var_names; + int inplace_op_num = 0; for (auto& op_node : graph->StmtTopologicalOrder()) { // variables of invalid_op_nodes wil not be reused if (!op_node->IsStmt()) continue; @@ -130,19 +133,19 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( {"unsqueeze", {{"X"}, {"Out"}}}, {"unsqueeze2", {{"X"}, {"Out"}}}}; auto inplace_op_node = inplace_op_nodes.find(op_type); + if (inplace_op_node != inplace_op_nodes.end()) { bool inplace = false; if (op_info->HasAttr("inplace")) { inplace = op_info->GetAttr("inplace"); } if (inplace) { + inplace_op_num++; for (auto& in_param_name : inplace_op_node->second.first) { const auto& in_arg_names = op_info->Input(in_param_name); - invalid_var_names.insert(in_arg_names.begin(), in_arg_names.end()); } for (auto& out_param_name : inplace_op_node->second.second) { const auto& out_arg_names = op_info->Output(out_param_name); - invalid_var_names.insert(out_arg_names.begin(), out_arg_names.end()); } } } @@ -161,12 +164,35 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( if (op_node->AsStmt().op_info()->Type() == "io_copy_once") { continue; } + + std::map, std::set>> + inplace_ops = {{"reshape", {{"X"}, {"Out"}}}, + {"reshape2", {{"X"}, {"Out"}}}, + {"flatten", {{"X"}, {"Out"}}}, + {"flatten2", {{"X"}, {"Out"}}}, + {"squeeze", {{"X"}, {"Out"}}}, + {"squeeze2", {{"X"}, {"Out"}}}, + {"unsqueeze", {{"X"}, {"Out"}}}, + {"unsqueeze2", {{"X"}, {"Out"}}}}; VLOG(4) << op_node->AsStmt().op_info()->Type() << " life is " << max_lifecycle_; std::vector var_nodes(op_node->inlinks.begin(), op_node->inlinks.end()); var_nodes.insert( var_nodes.end(), op_node->outlinks.begin(), op_node->outlinks.end()); + + int count = 0; + + bool is_inplace = false; + + if (op_node->AsStmt().op_info()->HasAttr("inplace")) { + is_inplace = op_node->AsStmt().op_info()->GetAttr("inplace"); + } + + std::string input_host_var_name = " "; + std::string input_xpu_var_name = " "; + for (auto* var_node : var_nodes) { CHECK(var_node->IsArg()); auto& arg = var_node->AsArg(); @@ -175,18 +201,59 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( VLOG(4) << "OP VAR NAME IS " << var_name; if (var_name.find("_xpu_max") != std::string::npos) continue; if (invalid_var_names.count(var_name)) continue; - TargetType target_type = arg.type->target(); - if (is_host(target_type)) target_type = TARGET(kHost); - - if (!(*lifecycles)[TargetToStr(target_type)].count(var_name)) { - (*lifecycles)[TargetToStr(target_type)].emplace( - var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); - } else { - int cur_life = - (*lifecycles)[TargetToStr(target_type)][var_name].second; - (*lifecycles)[TargetToStr(target_type)][var_name].second = - (std::max)(max_lifecycle_, cur_life); - } + auto find_inplace_op = + inplace_ops.find(op_node->AsStmt().op_info()->Type()); + + if (find_inplace_op != inplace_ops.end() && count != 2) { + TargetType target_type = arg.type->target(); + if (is_host(target_type)) { + target_type = TARGET(kHost); + continue; + } + + if ((*lifecycles)[TargetToStr(target_type)].count(var_name)) { + if (is_host(target_type)) { + input_host_var_name = var_name; + } else { + input_xpu_var_name = var_name; + count++; + int cur_life = + (*lifecycles)[TargetToStr(target_type)][var_name].second; + (*lifecycles)[TargetToStr(target_type)][var_name].second = + (std::max)(max_lifecycle_, cur_life); + } + } else if (!(*lifecycles)[TargetToStr(target_type)].count(var_name)) { + count++; + if (is_host(target_type)) { + (*lifecycles)[TargetToStr(target_type)].emplace( + var_name, + (*lifecycles)[TargetToStr(target_type)][input_host_var_name]); + } else { + if (is_inplace) { + (*lifecycles)[TargetToStr(target_type)].emplace( + var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); + squeeze_input2output->emplace(input_xpu_var_name, var_name); + squeeze_output2input->emplace(var_name, input_xpu_var_name); + } else { + (*lifecycles)[TargetToStr(target_type)].emplace( + var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); + } + } + } + } else if (find_inplace_op == inplace_ops.end()) { + TargetType target_type = arg.type->target(); + if (is_host(target_type)) target_type = TARGET(kHost); + + if (!(*lifecycles)[TargetToStr(target_type)].count(var_name)) { + (*lifecycles)[TargetToStr(target_type)].emplace( + var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); + } else { + int cur_life = + (*lifecycles)[TargetToStr(target_type)][var_name].second; + (*lifecycles)[TargetToStr(target_type)][var_name].second = + (std::max)(max_lifecycle_, cur_life); + } + } // if else } ++max_lifecycle_; } @@ -196,7 +263,9 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( void XPUMemoryOptimizePass::MakeReusePlan( const lifecycle_map_t& lifecycles, - std::map* node2cluster) { + std::map* node2cluster, + std::map* squeeze_input2output, + std::map* squeeze_output2input) { std::vector mem_nodes; std::vector cluster; for (auto& data : lifecycles) { @@ -204,6 +273,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( temp_node.name = data.first; temp_node.cluster = -1; temp_node.lifetime = data.second; + temp_node.mapping = 0; temp_node.life_interval = data.second.second - data.second.first; mem_nodes.push_back(temp_node); } @@ -234,33 +304,129 @@ void XPUMemoryOptimizePass::MakeReusePlan( } } } + VLOG(4) << "Step1 get inplace node Cluster: "; + for (size_t i = 0; i < mem_nodes.size(); i++) { + if (squeeze_input2output->count(mem_nodes[i].name)) { + int cluster_index = cluster.size(); + mem_nodes[i].cluster = cluster_index; + (*node2cluster)[mem_nodes[i].name] = mem_nodes[i].name; + VLOG(4) << "Mapping Tensor Cluster: " << mem_nodes[i].name + << ", life time is " << mem_nodes[i].lifetime.first << " --> " + << mem_nodes[i].lifetime.second << ", cluster name is " + << (*node2cluster)[mem_nodes[i].name]; + std::set cluster_adj = mem_nodes[i].adj; + for (size_t j = 0; j < mem_nodes.size(); j++) { + if (mem_nodes[j].name == (*squeeze_input2output)[mem_nodes[i].name]) { + (*node2cluster)[mem_nodes[j].name] == mem_nodes[i].name; + mem_nodes[j].cluster = cluster_index; + VLOG(4) << mem_nodes[j].name << ", life time is " + << mem_nodes[j].lifetime.first << " --> " + << mem_nodes[j].lifetime.second << ", cluster name is " + << (*node2cluster)[mem_nodes[j].name]; + for (auto& n : mem_nodes[j].adj) { + cluster_adj.insert(n); + } + } + } + } + } + VLOG(4) << "Step2 merge inplace node Cluster: "; + for (size_t i = 0; i < mem_nodes.size(); i++) { + if (squeeze_input2output->count(mem_nodes[i].name) && + mem_nodes[i].mapping != 1) { + int cluster_index = cluster.size(); + mem_nodes[i].cluster = cluster_index; + (*node2cluster)[mem_nodes[i].name] = mem_nodes[i].name; + mem_nodes[i].mapping = 1; + VLOG(4) << "Mapping Tensor Cluster: " << mem_nodes[i].name + << ", life time is " << mem_nodes[i].lifetime.first << " --> " + << mem_nodes[i].lifetime.second << ", cluster index is " + << mem_nodes[i].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[i].name]; + cluster.push_back(mem_nodes[i].name); + + std::set cluster_adj = mem_nodes[i].adj; + for (size_t j = 0; j < mem_nodes.size(); j++) { + if (mem_nodes[j].name == (*squeeze_input2output)[mem_nodes[i].name]) { + mem_nodes[j].cluster = mem_nodes[i].cluster; + (*node2cluster)[mem_nodes[j].name] = mem_nodes[i].name; + VLOG(4) << mem_nodes[j].name << ", life time is " + << mem_nodes[j].lifetime.first << " --> " + << mem_nodes[j].lifetime.second << ", cluster index is " + << mem_nodes[j].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[j].name]; + + for (auto& m : mem_nodes[j].adj) { + cluster_adj.insert(m); + } + } else if (squeeze_input2output->count(mem_nodes[j].name) && + (cluster_adj.find(mem_nodes[j].name) == cluster_adj.end()) && + mem_nodes[j].mapping != 1) { + mem_nodes[j].mapping = 1; + mem_nodes[j].cluster = mem_nodes[i].cluster; + (*node2cluster)[mem_nodes[j].name] = mem_nodes[i].name; + VLOG(4) << mem_nodes[j].name << ", life time is " + << mem_nodes[j].lifetime.first << " --> " + << mem_nodes[j].lifetime.second << ", cluster index is " + << mem_nodes[j].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[j].name]; + + for (auto& n : mem_nodes[j].adj) { + cluster_adj.insert(n); + } + for (size_t n = 0; n < mem_nodes.size(); n++) { + if (mem_nodes[n].name == + (*squeeze_input2output)[mem_nodes[j].name]) { + mem_nodes[n].cluster = mem_nodes[i].cluster; + (*node2cluster)[mem_nodes[n].name] = mem_nodes[i].name; + VLOG(4) << mem_nodes[n].name << ", life time is " + << mem_nodes[n].lifetime.first << " --> " + << mem_nodes[n].lifetime.second << ", cluster index is " + << mem_nodes[n].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[n].name]; - // Generating XPUMemory Reuse Strategy Based on Greedy Way - // The vars can be reused if there is no overlap between them. + for (auto& m : mem_nodes[n].adj) { + cluster_adj.insert(m); + } + } + } + } + } + } + } + VLOG(4) << "Step3 get others node Cluster : "; for (size_t i = 0; i < mem_nodes.size(); i++) { - if (mem_nodes[i].cluster >= 0 || mem_nodes[i].life_interval == 0) continue; - int cluster_index = cluster.size(); - mem_nodes[i].cluster = cluster_index; - (*node2cluster)[mem_nodes[i].name] = mem_nodes[i].name; - VLOG(4) << "Mapping Tensor Cluster: " << mem_nodes[i].name - << ", life time is " << mem_nodes[i].lifetime.first << " --> " - << mem_nodes[i].lifetime.second; - cluster.push_back(mem_nodes[i].name); - std::set cluster_adj = mem_nodes[i].adj; - for (size_t j = i + 1; j < mem_nodes.size(); j++) { - if (mem_nodes[j].cluster < 0 && - (cluster_adj.find(mem_nodes[j].name) == cluster_adj.end())) { - (*node2cluster)[mem_nodes[j].name] = mem_nodes[i].name; - mem_nodes[j].cluster = cluster_index; - VLOG(4) << mem_nodes[j].name << ", life time is " - << mem_nodes[j].lifetime.first << " --> " - << mem_nodes[j].lifetime.second; - for (auto& n : mem_nodes[j].adj) { - cluster_adj.insert(n); + if (!(squeeze_input2output->count(mem_nodes[i].name)) && + mem_nodes[i].cluster < 0 && mem_nodes[i].life_interval != 0) { + int cluster_index = cluster.size(); + mem_nodes[i].cluster = cluster_index; + (*node2cluster)[mem_nodes[i].name] = mem_nodes[i].name; + VLOG(4) << "Mapping Tensor Cluster: " << mem_nodes[i].name + << ", life time is " << mem_nodes[i].lifetime.first << " --> " + << mem_nodes[i].lifetime.second << ", cluster index is " + << mem_nodes[i].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[i].name]; + cluster.push_back(mem_nodes[i].name); + std::set cluster_adj = mem_nodes[i].adj; + for (size_t j = i + 1; j < mem_nodes.size(); j++) { + if (!(squeeze_input2output->count(mem_nodes[j].name)) && + mem_nodes[j].cluster < 0 && + (cluster_adj.find(mem_nodes[j].name) == cluster_adj.end())) { + mem_nodes[j].cluster = mem_nodes[i].cluster; + (*node2cluster)[mem_nodes[j].name] = mem_nodes[i].name; + VLOG(4) << mem_nodes[j].name << ", life time is " + << mem_nodes[j].lifetime.first << " --> " + << mem_nodes[j].lifetime.second << ", cluster index is " + << mem_nodes[j].cluster << ", cluster name is " + << (*node2cluster)[mem_nodes[j].name]; + for (auto& n : mem_nodes[j].adj) { + cluster_adj.insert(n); + } } } } } + for (auto& name : cluster) { LOG(INFO) << "cluster: " << name; } @@ -272,6 +438,7 @@ void XPUMemoryOptimizePass::PerformReusePlan( for (auto& op_node : graph->StmtTopologicalOrder()) { if (!op_node->IsStmt()) continue; auto& stmt = op_node->AsStmt(); + auto* op_info = stmt.mutable_op_info(); std::map> in_args, out_args; // replace the op's input according the reuse table. @@ -354,13 +521,19 @@ void XPUMemoryOptimizePass::Apply(const std::unique_ptr& graph) { // 3. Perform reuse plan: Replace all var's name in the model according to the // mapping table. std::map lifecycles; - CollectLifeCycleByDevice(&lifecycles, graph.get()); + std::map squeeze_input2output; + std::map squeeze_output2input; + CollectLifeCycleByDevice( + &lifecycles, graph.get(), &squeeze_input2output, &squeeze_output2input); for (auto& ele : lifecycles) { if (ele.first != "xpu") { continue; } std::map node2cluster; - MakeReusePlan(ele.second, &node2cluster); + MakeReusePlan(ele.second, + &node2cluster, + &squeeze_input2output, + &squeeze_output2input); PerformReusePlan(graph.get(), node2cluster); } } diff --git a/lite/core/optimizer/mir/xpu_memory_optimize_pass.h b/lite/core/optimizer/mir/xpu_memory_optimize_pass.h index f0d920fadf3..053914371d9 100644 --- a/lite/core/optimizer/mir/xpu_memory_optimize_pass.h +++ b/lite/core/optimizer/mir/xpu_memory_optimize_pass.h @@ -31,9 +31,6 @@ namespace paddle { namespace lite { namespace mir { -/* - * XPUMemoryOptimizePass will - */ class XPUMemoryOptimizePass : public ProgramPass { public: using lifecycle_t = std::pair; @@ -42,9 +39,14 @@ class XPUMemoryOptimizePass : public ProgramPass { private: void CollectLifeCycleByDevice( - std::map* lifecycles, SSAGraph*); + std::map* lifecycles, + SSAGraph*, + std::map* squeeze_input2output, + std::map* squeeze_output2input); void MakeReusePlan(const lifecycle_map_t& lifecycles, - std::map* node2cluster); + std::map* node2cluster, + std::map* squeeze_input2output, + std::map* squeeze_output2input); void PerformReusePlan(SSAGraph* graph, const std::map& reuse_table); diff --git a/lite/core/optimizer/optimizer.cc b/lite/core/optimizer/optimizer.cc index f1b393cc2cb..06c3c918e5e 100644 --- a/lite/core/optimizer/optimizer.cc +++ b/lite/core/optimizer/optimizer.cc @@ -264,6 +264,7 @@ std::unique_ptr RunDefaultOptimizer( "runtime_context_assign_pass", "argument_type_display_pass", "lite_inplace_fuse_pass", + "xpu_inplace_fuse_pass", #if !(defined(LITE_WITH_FPGA) || defined(LITE_WITH_PRECISION_PROFILE)) "memory_optimize_pass", "xpu_memory_optimize_pass" diff --git a/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt b/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt new file mode 100644 index 00000000000..aaed1b50e02 --- /dev/null +++ b/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt @@ -0,0 +1,73 @@ +cmake_minimum_required(VERSION 2.8) +project(mobilenet_full_api) +set(TARGET mobilenet_full_api) + +# 1. path to Paddle-Lite lib and mklml lib +set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx") +set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/") + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + set(MSVC_STATIC_CRT ) + if(MSVC_STATIC_CRT) + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + else(MSVC_STATIC_CRT) + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MDd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MD") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MDd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MD") + endif(MSVC_STATIC_CRT) +endif() + +if (APPLE AND METAL) + message(STATUS "set METAL=ON") + add_definitions("-DMETAL") + find_library(METAL_LIBRARY Metal REQUIRED) + find_library(GRAPHIC CoreGraphics REQUIRED) + find_library(MPS_LIBRARY MetalPerformanceShaders REQUIRED) +endif() + +# 2. link mklml and Paddle-Lite directory +link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib) +include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include) + +# 3. compile options +if (NOT WIN32) + add_definitions(-std=c++11 -g -O3 -pthread) + set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}) +endif() + +# 4.add executable output +add_executable(${TARGET} ${TARGET}.cc) +if (WIN32) + set(WITH_STATIC_MKL OFF) + if(WITH_STATIC_MKL) + set(MATH_LIB ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} + ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() + + target_link_libraries(${TARGET} libpaddle_api_full_bundled.lib) + target_link_libraries(${TARGET} shlwapi.lib) + target_link_libraries(${TARGET} ${MATH_LIB}) + + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release + ) + if(NOT WITH_STATIC_MKL) + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release + ) + endif() +else() + if (APPLE AND METAL) + target_link_libraries(${TARGET} ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY}) + endif() + target_link_libraries(${TARGET} -lpaddle_full_api_shared) + target_link_libraries(${TARGET} -liomp5) + target_link_libraries(${TARGET} -ldl) +endif() diff --git a/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt b/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt new file mode 100644 index 00000000000..50b777e7520 --- /dev/null +++ b/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt @@ -0,0 +1,73 @@ +cmake_minimum_required(VERSION 2.8) +project(mobilenet_light_api) +set(TARGET mobilenet_light_api) + +# 1. path to Paddle-Lite lib and mklml lib +set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx") +set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/") + +if (WIN32) + add_definitions("/DGOOGLE_GLOG_DLL_DECL=") + set(MSVC_STATIC_CRT ) + if(MSVC_STATIC_CRT) + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") + else(MSVC_STATIC_CRT) + set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MDd") + set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MD") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MDd") + set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MD") + endif(MSVC_STATIC_CRT) +endif() + +if (APPLE AND METAL) + message(STATUS "set METAL=ON") + add_definitions("-DMETAL") + find_library(METAL_LIBRARY Metal REQUIRED) + find_library(GRAPHIC CoreGraphics REQUIRED) + find_library(MPS_LIBRARY MetalPerformanceShaders REQUIRED) +endif() + +# 2. link mklml and Paddle-Lite directory +link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib) +include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include) + +# 3. compile options +if (NOT WIN32) + add_definitions(-std=c++11 -g -O3 -pthread) + set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}) +endif() + +# 4.add executable output +add_executable(${TARGET} ${TARGET}.cc) +if (WIN32) + set(WITH_STATIC_MKL OFF) + if(WITH_STATIC_MKL) + set(MATH_LIB ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + else() + set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} + ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) + endif() + + target_link_libraries(${TARGET} libpaddle_api_light_bundled.lib) + target_link_libraries(${TARGET} shlwapi.lib) + target_link_libraries(${TARGET} ${MATH_LIB}) + + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release + ) + if(NOT WITH_STATIC_MKL) + add_custom_command(TARGET ${TARGET} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release + ) + endif() +else() + if (APPLE AND METAL) + target_link_libraries(${TARGET} ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY}) + endif() + target_link_libraries(${TARGET} -lpaddle_light_api_shared) + target_link_libraries(${TARGET} -liomp5) + target_link_libraries(${TARGET} -ldl) +endif() diff --git a/lite/kernels/xpu/CMakeLists.txt b/lite/kernels/xpu/CMakeLists.txt index 3d3720ba7f0..4b2be99d0b6 100644 --- a/lite/kernels/xpu/CMakeLists.txt +++ b/lite/kernels/xpu/CMakeLists.txt @@ -92,6 +92,7 @@ else() add_kernel(anchor_generator_compute_xpu XPU extra SRCS anchor_generator_compute.cc) add_kernel(box_clip_compute_xpu XPU extra SRCS box_clip_compute.cc) add_kernel(pad2d_compute_xpu XPU extra SRCS pad2d_compute.cc) + add_kernel(pad3d_compute_xpu XPU extra SRCS pad3d_compute.cc) add_kernel(pixel_shuffle_compute_xpu XPU extra SRCS pixel_shuffle_compute.cc) add_kernel(correlation_compute_xpu XPU extra SRCS correlation_compute.cc) add_kernel(logical_compute_xpu XPU extra SRCS logical_compute.cc) diff --git a/lite/kernels/xpu/pad3d_compute.cc b/lite/kernels/xpu/pad3d_compute.cc new file mode 100644 index 00000000000..8be51425f10 --- /dev/null +++ b/lite/kernels/xpu/pad3d_compute.cc @@ -0,0 +1,101 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lite/kernels/xpu/pad3d_compute.h" +#include +#include "lite/backends/xpu/xpu_header_sitter.h" +#include "lite/core/op_registry.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { + +template +void Pad3dCompute::Run() { + auto& param = this->template Param(); + auto& ctx = this->ctx_->template As(); + auto pads = param.paddings; + auto mode = param.mode; + auto data_format = param.data_format; + T value = static_cast(param.pad_value); + + auto* x = param.X; + auto in_dims = x->dims(); + auto* in_data = x->template data(); + auto* out = param.Out; + T* out_data = out->template mutable_data(TARGET(kXPU)); + + if (mode == "reflect" || mode == "constant" || mode == "replicate" || + mode == "circular") { + if (data_format == "NCDHW") { + std::vector pad_left = {0, 0, pads[4], pads[2], pads[0]}; + std::vector pad_right = {0, 0, pads[5], pads[3], pads[1]}; + + int n_shape = in_dims[0]; + int c_shape = in_dims[1]; + int d_shape = in_dims[2]; + int h_shape = in_dims[3]; + int w_shape = in_dims[4]; + + std::vector xshape = {n_shape, c_shape, d_shape, h_shape, w_shape}; + + int r = xdnn::pad(ctx.GetRawContext(), + in_data, + out_data, + xshape, + pad_left, + pad_right, + value); + CHECK_EQ(r, 0); + } else if (data_format == "NDHWC") { + std::vector pad_left = {0, pads[4], pads[2], pads[0], 0}; + std::vector pad_right = {0, pads[5], pads[3], pads[1], 0}; + + int n_shape = in_dims[0]; + int d_shape = in_dims[1]; + int h_shape = in_dims[2]; + int w_shape = in_dims[3]; + int c_shape = in_dims[4]; + std::vector xshape = {n_shape, d_shape, h_shape, w_shape, c_shape}; + + int r = xdnn::pad(ctx.GetRawContext(), + in_data, + out_data, + xshape, + pad_left, + pad_right, + value); + CHECK_EQ(r, 0); + } + + } else { + LOG(FATAL) << "xpu unsupport mode: " << mode; + } +} + +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle + +REGISTER_LITE_KERNEL(pad3d, + kXPU, + kFloat, + kNCHW, + paddle::lite::kernels::xpu::Pad3dCompute, + def) + .BindInput("X", {LiteType::GetTensorTy(TARGET(kXPU))}) + .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kXPU))}) + .Finalize(); diff --git a/lite/kernels/xpu/pad3d_compute.h b/lite/kernels/xpu/pad3d_compute.h new file mode 100644 index 00000000000..bd027a91c6e --- /dev/null +++ b/lite/kernels/xpu/pad3d_compute.h @@ -0,0 +1,37 @@ +// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "lite/core/kernel.h" + +namespace paddle { +namespace lite { +namespace kernels { +namespace xpu { + +template +class Pad3dCompute : public KernelLite { + public: + using param_t = operators::Pad2dParam; + + virtual void Run(); + + virtual ~Pad3dCompute() = default; +}; + +} // namespace xpu +} // namespace kernels +} // namespace lite +} // namespace paddle From 9a5488bc829507534548b68f247e2e72851c2d66 Mon Sep 17 00:00:00 2001 From: laiou Date: Tue, 11 Jan 2022 17:17:34 +0800 Subject: [PATCH 2/3] add pad3d change xpu_memory_pass lite_inplace_pass --- lite/api/paddle_use_passes.h | 1 - .../mir/fusion/__xpu__inplace_fuse_pass.cc | 45 ---------------- .../mir/fusion/__xpu__inplace_fuse_pass.h | 32 ----------- .../mir/fusion/__xpu__inplace_fuser.cc | 53 ------------------- .../mir/fusion/__xpu__inplace_fuser.h | 40 -------------- .../optimizer/mir/fusion/inplace_fuse_pass.cc | 3 +- .../optimizer/mir/fusion/inplace_fuser.cc | 23 +++++--- .../optimizer/mir/xpu_memory_optimize_pass.cc | 42 ++++++++------- .../optimizer/mir/xpu_memory_optimize_pass.h | 13 ++--- lite/core/optimizer/optimizer.cc | 1 - 10 files changed, 46 insertions(+), 207 deletions(-) delete mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc delete mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h delete mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc delete mode 100644 lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h diff --git a/lite/api/paddle_use_passes.h b/lite/api/paddle_use_passes.h index 5c7af415737..4fd1e24d09a 100644 --- a/lite/api/paddle_use_passes.h +++ b/lite/api/paddle_use_passes.h @@ -65,7 +65,6 @@ USE_MIR_PASS(type_layout_cast_preprocess_pass); USE_MIR_PASS(memory_optimize_pass); USE_MIR_PASS(xpu_memory_optimize_pass); USE_MIR_PASS(lite_inplace_fuse_pass); -USE_MIR_PASS(xpu_inplace_fuse_pass); USE_MIR_PASS(multi_stream_analysis_pass); USE_MIR_PASS(elementwise_mul_constant_eliminate_pass); USE_MIR_PASS(npu_subgraph_pass); diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc deleted file mode 100644 index 38212137228..00000000000 --- a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.cc +++ /dev/null @@ -1,45 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h" -#include -#include -#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h" -#include "lite/core/optimizer/mir/pass_registry.h" - -namespace paddle { -namespace lite { -namespace mir { - -void XPUInplaceFusePass::Apply(const std::unique_ptr& graph) { - std::vector inplace_type_cases{"reshape", - "reshape2", - "flatten", - "flatten2", - "squeeze", - "squeeze2", - "unsqueeze", - "unsqueeze2"}; - for (auto type : inplace_type_cases) { - fusion::XPUInplaceFuser inplace_fuser(type); - inplace_fuser(graph.get()); - } -} - -} // namespace mir -} // namespace lite -} // namespace paddle - -REGISTER_MIR_PASS(xpu_inplace_fuse_pass, paddle::lite::mir::XPUInplaceFusePass) - .BindTargets({TARGET(kXPU)}); diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h deleted file mode 100644 index 5fb421bfbbc..00000000000 --- a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuse_pass.h +++ /dev/null @@ -1,32 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "lite/core/optimizer/mir/pass.h" - -namespace paddle { -namespace lite { -namespace mir { - -class XPUInplaceFusePass : public ProgramPass { - public: - void Apply(const std::unique_ptr& graph) override; -}; - -} // namespace mir -} // namespace lite -} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc deleted file mode 100644 index d9740213d47..00000000000 --- a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.cc +++ /dev/null @@ -1,53 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h" -#include -#include -#include "lite/core/optimizer/mir/pattern_matcher_high_api.h" - -namespace paddle { -namespace lite { -namespace mir { -namespace fusion { - -void XPUInplaceFuser::BuildPattern() { - auto* input = VarNode("input") - ->assert_is_op_input(type_, "X") - ->assert_only_one_output() - ->AsInput(); - - auto* op_node = OpNode("inplace", type_)->assert_is_op(type_); - - auto* output = - VarNode("output")->assert_is_op_output(type_, "Out")->AsOutput(); - - *input >> *op_node >> *output; -} - -void XPUInplaceFuser::InsertNewNode(SSAGraph* graph, - const key2nodes_t& matched) { - bool inplace = true; - auto* stmt = matched.at("inplace")->stmt(); - auto op = stmt->op(); - cpp::OpDesc* op_desc = op->mutable_op_info(); - op_desc->SetAttr("inplace", inplace); - stmt->op()->Attach(*op_desc, op->scope()); - stmt->op()->AttachKernel(&(stmt->picked_kernel())); -} - -} // namespace fusion -} // namespace mir -} // namespace lite -} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h b/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h deleted file mode 100644 index 75d2e44ad37..00000000000 --- a/lite/core/optimizer/mir/fusion/__xpu__inplace_fuser.h +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include -#include -#include "lite/core/optimizer/mir/pattern_matcher_high_api.h" - -namespace paddle { -namespace lite { -namespace mir { -namespace fusion { - -class XPUInplaceFuser : public FuseBase { - public: - explicit XPUInplaceFuser(const std::string& type) : type_(type) {} - - void BuildPattern() override; - void InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) override; - - private: - std::string type_; -}; - -} // namespace fusion -} // namespace mir -} // namespace lite -} // namespace paddle diff --git a/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc b/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc index 25354fbee2f..4fc05bd051e 100644 --- a/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc +++ b/lite/core/optimizer/mir/fusion/inplace_fuse_pass.cc @@ -43,5 +43,4 @@ void InplaceFusePass::Apply(const std::unique_ptr& graph) { REGISTER_MIR_PASS(lite_inplace_fuse_pass, paddle::lite::mir::InplaceFusePass) .BindTargets({TARGET(kAny)}) - .ExcludeTargets({TARGET(kNPU)}) - .ExcludeTargets({TARGET(kXPU)}); + .ExcludeTargets({TARGET(kNPU)}); diff --git a/lite/core/optimizer/mir/fusion/inplace_fuser.cc b/lite/core/optimizer/mir/fusion/inplace_fuser.cc index 89399af8af7..9cef60c09e7 100644 --- a/lite/core/optimizer/mir/fusion/inplace_fuser.cc +++ b/lite/core/optimizer/mir/fusion/inplace_fuser.cc @@ -15,22 +15,31 @@ #include "lite/core/optimizer/mir/fusion/inplace_fuser.h" #include #include +#include "lite/core/optimizer/mir/pattern_matcher_high_api.h" namespace paddle { namespace lite { namespace mir { namespace fusion { -void InplaceFuser::BuildPattern() { OpNode("inplace", type_); } +void InplaceFuser::BuildPattern() { + auto* input = VarNode("input") + ->assert_is_op_input(type_, "X") + ->assert_only_one_output() + ->AsInput(); + + auto* op_node = OpNode("inplace", type_)->assert_is_op(type_); + + auto* output = VarNode("output") + ->assert_is_op_output(type_, "Out") + ->assert_only_one_output() + ->AsOutput(); + + *input >> *op_node >> *output; +} void InplaceFuser::InsertNewNode(SSAGraph* graph, const key2nodes_t& matched) { - auto out_var_nodes = matched.at("inplace")->outlinks; bool inplace = true; - for (auto& out_var_node : out_var_nodes) { - if (out_var_node->outlinks.size() > 1) { - inplace = false; - } - } auto* stmt = matched.at("inplace")->stmt(); auto op = stmt->op(); cpp::OpDesc* op_desc = op->mutable_op_info(); diff --git a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc index 3e151a9220e..e93c7fce24b 100644 --- a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc +++ b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc @@ -37,8 +37,8 @@ typedef struct { void XPUMemoryOptimizePass::CollectLifeCycleByDevice( std::map* lifecycles, SSAGraph* graph, - std::map* squeeze_input2output, - std::map* squeeze_output2input) { + std::map* inplaceop_input2output, + std::map* inplaceop_output2input) { max_lifecycle_ = 0; auto is_host = [](TargetType x) -> bool { @@ -232,8 +232,8 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( if (is_inplace) { (*lifecycles)[TargetToStr(target_type)].emplace( var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); - squeeze_input2output->emplace(input_xpu_var_name, var_name); - squeeze_output2input->emplace(var_name, input_xpu_var_name); + inplaceop_input2output->emplace(input_xpu_var_name, var_name); + inplaceop_output2input->emplace(var_name, input_xpu_var_name); } else { (*lifecycles)[TargetToStr(target_type)].emplace( var_name, std::make_pair(max_lifecycle_, max_lifecycle_)); @@ -264,8 +264,8 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( void XPUMemoryOptimizePass::MakeReusePlan( const lifecycle_map_t& lifecycles, std::map* node2cluster, - std::map* squeeze_input2output, - std::map* squeeze_output2input) { + std::map* inplaceop_input2output, + std::map* inplaceop_output2input) { std::vector mem_nodes; std::vector cluster; for (auto& data : lifecycles) { @@ -306,7 +306,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( } VLOG(4) << "Step1 get inplace node Cluster: "; for (size_t i = 0; i < mem_nodes.size(); i++) { - if (squeeze_input2output->count(mem_nodes[i].name)) { + if (inplaceop_input2output->count(mem_nodes[i].name)) { int cluster_index = cluster.size(); mem_nodes[i].cluster = cluster_index; (*node2cluster)[mem_nodes[i].name] = mem_nodes[i].name; @@ -316,7 +316,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( << (*node2cluster)[mem_nodes[i].name]; std::set cluster_adj = mem_nodes[i].adj; for (size_t j = 0; j < mem_nodes.size(); j++) { - if (mem_nodes[j].name == (*squeeze_input2output)[mem_nodes[i].name]) { + if (mem_nodes[j].name == (*inplaceop_input2output)[mem_nodes[i].name]) { (*node2cluster)[mem_nodes[j].name] == mem_nodes[i].name; mem_nodes[j].cluster = cluster_index; VLOG(4) << mem_nodes[j].name << ", life time is " @@ -332,7 +332,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( } VLOG(4) << "Step2 merge inplace node Cluster: "; for (size_t i = 0; i < mem_nodes.size(); i++) { - if (squeeze_input2output->count(mem_nodes[i].name) && + if (inplaceop_input2output->count(mem_nodes[i].name) && mem_nodes[i].mapping != 1) { int cluster_index = cluster.size(); mem_nodes[i].cluster = cluster_index; @@ -347,7 +347,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( std::set cluster_adj = mem_nodes[i].adj; for (size_t j = 0; j < mem_nodes.size(); j++) { - if (mem_nodes[j].name == (*squeeze_input2output)[mem_nodes[i].name]) { + if (mem_nodes[j].name == (*inplaceop_input2output)[mem_nodes[i].name]) { mem_nodes[j].cluster = mem_nodes[i].cluster; (*node2cluster)[mem_nodes[j].name] = mem_nodes[i].name; VLOG(4) << mem_nodes[j].name << ", life time is " @@ -359,7 +359,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( for (auto& m : mem_nodes[j].adj) { cluster_adj.insert(m); } - } else if (squeeze_input2output->count(mem_nodes[j].name) && + } else if (inplaceop_input2output->count(mem_nodes[j].name) && (cluster_adj.find(mem_nodes[j].name) == cluster_adj.end()) && mem_nodes[j].mapping != 1) { mem_nodes[j].mapping = 1; @@ -376,7 +376,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( } for (size_t n = 0; n < mem_nodes.size(); n++) { if (mem_nodes[n].name == - (*squeeze_input2output)[mem_nodes[j].name]) { + (*inplaceop_input2output)[mem_nodes[j].name]) { mem_nodes[n].cluster = mem_nodes[i].cluster; (*node2cluster)[mem_nodes[n].name] = mem_nodes[i].name; VLOG(4) << mem_nodes[n].name << ", life time is " @@ -396,7 +396,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( } VLOG(4) << "Step3 get others node Cluster : "; for (size_t i = 0; i < mem_nodes.size(); i++) { - if (!(squeeze_input2output->count(mem_nodes[i].name)) && + if (!(inplaceop_input2output->count(mem_nodes[i].name)) && mem_nodes[i].cluster < 0 && mem_nodes[i].life_interval != 0) { int cluster_index = cluster.size(); mem_nodes[i].cluster = cluster_index; @@ -409,7 +409,7 @@ void XPUMemoryOptimizePass::MakeReusePlan( cluster.push_back(mem_nodes[i].name); std::set cluster_adj = mem_nodes[i].adj; for (size_t j = i + 1; j < mem_nodes.size(); j++) { - if (!(squeeze_input2output->count(mem_nodes[j].name)) && + if (!(inplaceop_input2output->count(mem_nodes[j].name)) && mem_nodes[j].cluster < 0 && (cluster_adj.find(mem_nodes[j].name) == cluster_adj.end())) { mem_nodes[j].cluster = mem_nodes[i].cluster; @@ -521,10 +521,12 @@ void XPUMemoryOptimizePass::Apply(const std::unique_ptr& graph) { // 3. Perform reuse plan: Replace all var's name in the model according to the // mapping table. std::map lifecycles; - std::map squeeze_input2output; - std::map squeeze_output2input; - CollectLifeCycleByDevice( - &lifecycles, graph.get(), &squeeze_input2output, &squeeze_output2input); + std::map inplaceop_input2output; + std::map inplaceop_output2input; + CollectLifeCycleByDevice(&lifecycles, + graph.get(), + &inplaceop_input2output, + &inplaceop_output2input); for (auto& ele : lifecycles) { if (ele.first != "xpu") { continue; @@ -532,8 +534,8 @@ void XPUMemoryOptimizePass::Apply(const std::unique_ptr& graph) { std::map node2cluster; MakeReusePlan(ele.second, &node2cluster, - &squeeze_input2output, - &squeeze_output2input); + &inplaceop_input2output, + &inplaceop_output2input); PerformReusePlan(graph.get(), node2cluster); } } diff --git a/lite/core/optimizer/mir/xpu_memory_optimize_pass.h b/lite/core/optimizer/mir/xpu_memory_optimize_pass.h index 053914371d9..d4bbf9e7f9d 100644 --- a/lite/core/optimizer/mir/xpu_memory_optimize_pass.h +++ b/lite/core/optimizer/mir/xpu_memory_optimize_pass.h @@ -41,12 +41,13 @@ class XPUMemoryOptimizePass : public ProgramPass { void CollectLifeCycleByDevice( std::map* lifecycles, SSAGraph*, - std::map* squeeze_input2output, - std::map* squeeze_output2input); - void MakeReusePlan(const lifecycle_map_t& lifecycles, - std::map* node2cluster, - std::map* squeeze_input2output, - std::map* squeeze_output2input); + std::map* inplaceop_input2output, + std::map* inplaceop_output2input); + void MakeReusePlan( + const lifecycle_map_t& lifecycles, + std::map* node2cluster, + std::map* inplaceop_input2output, + std::map* inplaceop_output2input); void PerformReusePlan(SSAGraph* graph, const std::map& reuse_table); diff --git a/lite/core/optimizer/optimizer.cc b/lite/core/optimizer/optimizer.cc index 06c3c918e5e..f1b393cc2cb 100644 --- a/lite/core/optimizer/optimizer.cc +++ b/lite/core/optimizer/optimizer.cc @@ -264,7 +264,6 @@ std::unique_ptr RunDefaultOptimizer( "runtime_context_assign_pass", "argument_type_display_pass", "lite_inplace_fuse_pass", - "xpu_inplace_fuse_pass", #if !(defined(LITE_WITH_FPGA) || defined(LITE_WITH_PRECISION_PROFILE)) "memory_optimize_pass", "xpu_memory_optimize_pass" From be34e3285135581bbcc8e8790da7d60713a9ff1c Mon Sep 17 00:00:00 2001 From: laiou Date: Tue, 11 Jan 2022 17:27:09 +0800 Subject: [PATCH 3/3] add pad3d and change lite_inplace_pass xpu_memory_pass --- .../optimizer/mir/xpu_memory_optimize_pass.cc | 31 -------- .../x86_mobilenetv1_full_demo/CMakeLists.txt | 73 ------------------- .../x86_mobilenetv1_light_demo/CMakeLists.txt | 73 ------------------- lite/kernels/xpu/pad3d_compute.cc | 2 +- lite/kernels/xpu/pad3d_compute.h | 2 +- 5 files changed, 2 insertions(+), 179 deletions(-) delete mode 100644 lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt delete mode 100644 lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt diff --git a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc index e93c7fce24b..2070fdfa356 100644 --- a/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc +++ b/lite/core/optimizer/mir/xpu_memory_optimize_pass.cc @@ -101,7 +101,6 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( // Collect the invalid input and output variables that will not be reused. std::set invalid_var_names; - int inplace_op_num = 0; for (auto& op_node : graph->StmtTopologicalOrder()) { // variables of invalid_op_nodes wil not be reused if (!op_node->IsStmt()) continue; @@ -119,36 +118,6 @@ void XPUMemoryOptimizePass::CollectLifeCycleByDevice( } continue; } - // The specified input and output variables of the Ops whose 'inplace' attr - // is true will not be reused, such as reshape/reshape2's X and Out - // variables - std::map, std::set>> - inplace_op_nodes = {{"reshape", {{"X"}, {"Out"}}}, - {"reshape2", {{"X"}, {"Out"}}}, - {"flatten", {{"X"}, {"Out"}}}, - {"flatten2", {{"X"}, {"Out"}}}, - {"squeeze", {{"X"}, {"Out"}}}, - {"squeeze2", {{"X"}, {"Out"}}}, - {"unsqueeze", {{"X"}, {"Out"}}}, - {"unsqueeze2", {{"X"}, {"Out"}}}}; - auto inplace_op_node = inplace_op_nodes.find(op_type); - - if (inplace_op_node != inplace_op_nodes.end()) { - bool inplace = false; - if (op_info->HasAttr("inplace")) { - inplace = op_info->GetAttr("inplace"); - } - if (inplace) { - inplace_op_num++; - for (auto& in_param_name : inplace_op_node->second.first) { - const auto& in_arg_names = op_info->Input(in_param_name); - } - for (auto& out_param_name : inplace_op_node->second.second) { - const auto& out_arg_names = op_info->Output(out_param_name); - } - } - } } // non-tensor(like tensor_array) variables will not be reused diff --git a/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt b/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt deleted file mode 100644 index aaed1b50e02..00000000000 --- a/lite/demo/cxx/x86_mobilenetv1_full_demo/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -cmake_minimum_required(VERSION 2.8) -project(mobilenet_full_api) -set(TARGET mobilenet_full_api) - -# 1. path to Paddle-Lite lib and mklml lib -set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx") -set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/") - -if (WIN32) - add_definitions("/DGOOGLE_GLOG_DLL_DECL=") - set(MSVC_STATIC_CRT ) - if(MSVC_STATIC_CRT) - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") - else(MSVC_STATIC_CRT) - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MDd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MD") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MDd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MD") - endif(MSVC_STATIC_CRT) -endif() - -if (APPLE AND METAL) - message(STATUS "set METAL=ON") - add_definitions("-DMETAL") - find_library(METAL_LIBRARY Metal REQUIRED) - find_library(GRAPHIC CoreGraphics REQUIRED) - find_library(MPS_LIBRARY MetalPerformanceShaders REQUIRED) -endif() - -# 2. link mklml and Paddle-Lite directory -link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib) -include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include) - -# 3. compile options -if (NOT WIN32) - add_definitions(-std=c++11 -g -O3 -pthread) - set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}) -endif() - -# 4.add executable output -add_executable(${TARGET} ${TARGET}.cc) -if (WIN32) - set(WITH_STATIC_MKL OFF) - if(WITH_STATIC_MKL) - set(MATH_LIB ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) - else() - set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} - ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() - - target_link_libraries(${TARGET} libpaddle_api_full_bundled.lib) - target_link_libraries(${TARGET} shlwapi.lib) - target_link_libraries(${TARGET} ${MATH_LIB}) - - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release - ) - if(NOT WITH_STATIC_MKL) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release - ) - endif() -else() - if (APPLE AND METAL) - target_link_libraries(${TARGET} ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY}) - endif() - target_link_libraries(${TARGET} -lpaddle_full_api_shared) - target_link_libraries(${TARGET} -liomp5) - target_link_libraries(${TARGET} -ldl) -endif() diff --git a/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt b/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt deleted file mode 100644 index 50b777e7520..00000000000 --- a/lite/demo/cxx/x86_mobilenetv1_light_demo/CMakeLists.txt +++ /dev/null @@ -1,73 +0,0 @@ -cmake_minimum_required(VERSION 2.8) -project(mobilenet_light_api) -set(TARGET mobilenet_light_api) - -# 1. path to Paddle-Lite lib and mklml lib -set(LITE_DIR "${PROJECT_SOURCE_DIR}/../../../cxx") -set(MKLML_DIR "${PROJECT_SOURCE_DIR}/../../../third_party/mklml/") - -if (WIN32) - add_definitions("/DGOOGLE_GLOG_DLL_DECL=") - set(MSVC_STATIC_CRT ) - if(MSVC_STATIC_CRT) - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MT") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MTd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MT") - else(MSVC_STATIC_CRT) - set(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} /bigobj /MDd") - set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} /bigobj /MD") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /bigobj /MDd") - set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} /bigobj /MD") - endif(MSVC_STATIC_CRT) -endif() - -if (APPLE AND METAL) - message(STATUS "set METAL=ON") - add_definitions("-DMETAL") - find_library(METAL_LIBRARY Metal REQUIRED) - find_library(GRAPHIC CoreGraphics REQUIRED) - find_library(MPS_LIBRARY MetalPerformanceShaders REQUIRED) -endif() - -# 2. link mklml and Paddle-Lite directory -link_directories(${LITE_DIR}/lib ${MKLML_DIR}/lib) -include_directories(${LITE_DIR}/include/ ${MKLML_DIR}/include) - -# 3. compile options -if (NOT WIN32) - add_definitions(-std=c++11 -g -O3 -pthread) - set(EXECUTABLE_OUTPUT_PATH ${PROJECT_SOURCE_DIR}) -endif() - -# 4.add executable output -add_executable(${TARGET} ${TARGET}.cc) -if (WIN32) - set(WITH_STATIC_MKL OFF) - if(WITH_STATIC_MKL) - set(MATH_LIB ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) - else() - set(MATH_LIB ${MKLML_DIR}/lib/mklml${CMAKE_STATIC_LIBRARY_SUFFIX} - ${MKLML_DIR}/lib/libiomp5md${CMAKE_STATIC_LIBRARY_SUFFIX}) - endif() - - target_link_libraries(${TARGET} libpaddle_api_light_bundled.lib) - target_link_libraries(${TARGET} shlwapi.lib) - target_link_libraries(${TARGET} ${MATH_LIB}) - - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/libiomp5md.dll ${CMAKE_BINARY_DIR}/Release - ) - if(NOT WITH_STATIC_MKL) - add_custom_command(TARGET ${TARGET} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${MKLML_DIR}/lib/mklml.dll ${CMAKE_BINARY_DIR}/Release - ) - endif() -else() - if (APPLE AND METAL) - target_link_libraries(${TARGET} ${METAL_LIBRARY} ${GRAPHIC} ${MPS_LIBRARY}) - endif() - target_link_libraries(${TARGET} -lpaddle_light_api_shared) - target_link_libraries(${TARGET} -liomp5) - target_link_libraries(${TARGET} -ldl) -endif() diff --git a/lite/kernels/xpu/pad3d_compute.cc b/lite/kernels/xpu/pad3d_compute.cc index 8be51425f10..261090faded 100644 --- a/lite/kernels/xpu/pad3d_compute.cc +++ b/lite/kernels/xpu/pad3d_compute.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/lite/kernels/xpu/pad3d_compute.h b/lite/kernels/xpu/pad3d_compute.h index bd027a91c6e..734e01fde5b 100644 --- a/lite/kernels/xpu/pad3d_compute.h +++ b/lite/kernels/xpu/pad3d_compute.h @@ -1,4 +1,4 @@ -// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.