PaddlePaddle · zhupengyang · Apr 21, 2021 · Apr 20, 2021
@@ -14,7 +14,6 @@
 
 #include "lite/kernels/x86/activation_compute.h"
 
-// float
 REGISTER_LITE_KERNEL(square,
                      kX86,
                      kFloat,
@@ -25,7 +24,6 @@ REGISTER_LITE_KERNEL(square,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(relu,
                      kX86,
                      kFloat,
@@ -36,7 +34,6 @@ REGISTER_LITE_KERNEL(relu,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(leaky_relu,
                      kX86,
                      kFloat,
@@ -48,7 +45,6 @@ REGISTER_LITE_KERNEL(leaky_relu,
     .BindPaddleOpVersion("leaky_relu", 1)
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(tanh,
                      kX86,
                      kFloat,
@@ -59,7 +55,6 @@ REGISTER_LITE_KERNEL(tanh,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(gelu,
                      kX86,
                      kFloat,
@@ -80,7 +75,6 @@ REGISTER_LITE_KERNEL(softsign,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(sigmoid,
                      kX86,
                      kFloat,
@@ -91,7 +85,6 @@ REGISTER_LITE_KERNEL(sigmoid,
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
 
-// float
 REGISTER_LITE_KERNEL(relu6,
                      kX86,
                      kFloat,
@@ -101,3 +94,13 @@ REGISTER_LITE_KERNEL(relu6,
     .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
     .Finalize();
+
+REGISTER_LITE_KERNEL(sqrt,
+                     kX86,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::x86::SqrtCompute<float>,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86))})
+    .Finalize();
@@ -307,6 +307,28 @@ class Relu6Compute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
   virtual ~Relu6Compute() = default;
 };
 
+template <typename T>
+struct SqrtFunctor : public BaseActivationFunctor<T> {
+  template <typename Device, typename X, typename Out>
+  void operator()(Device d, X x, Out out) const {
+    out.device(d) = x.sqrt();
+  }
+};
+
+template <typename T>
+class SqrtCompute : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::ActivationParam;
+
+  void Run() override {
+    auto& param = *param_.get_mutable<operators::ActivationParam>();
+    param.Out->template mutable_data<T>();
+    Activate<SqrtFunctor<T>>(param.X, param.Out);
+  }
+
+  virtual ~SqrtCompute() = default;
+};
+
 }  // namespace x86
 }  // namespace kernels
 }  // namespace lite

@@ -182,6 +182,17 @@ REGISTER_LITE_KERNEL(
     .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kInt64))})
     .Finalize();
 
+REGISTER_LITE_KERNEL(elementwise_pow,
+                     kX86,
+                     kFloat,
+                     kNCHW,
+                     paddle::lite::kernels::x86::ElementwisePowCompute<float>,
+                     def)
+    .BindInput("X", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
+    .BindInput("Y", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
+    .BindOutput("Out", {LiteType::GetTensorTy(TARGET(kX86), PRECISION(kFloat))})
+    .Finalize();
+
 REGISTER_LITE_KERNEL(elementwise_mod,
                      kX86,
                      kFloat,

@@ -11,8 +11,9 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
-#pragma once
 
+#pragma once
+#include <cmath>
 #include "lite/core/kernel.h"
 #include "lite/core/op_registry.h"
 #include "lite/fluid/eigen.h"
@@ -50,6 +51,11 @@ struct FloorDivFunctor {
   }
 };
 
+template <typename T>
+struct PowFunctor {
+  inline HOSTDEVICE T operator()(T a, T b) const { return std::pow(a, b); }
+};
+
 template <typename T>
 struct ModFunctor {
   inline HOSTDEVICE T operator()(T a, T b) const {
@@ -78,9 +84,7 @@ class ElementwiseAddCompute
     auto& param = *param_.get_mutable<param_t>();
     auto& context = ctx_->As<X86Context>();
     param.Out->template mutable_data<T>();
-    paddle::lite::kernels::x86::ElementwiseComputeEx<AddFunctor<T>,
-                                                     lite::TargetType::kX86,
-                                                     T>(
+    ElementwiseComputeEx<AddFunctor<T>, lite::TargetType::kX86, T>(
         context, param.X, param.Y, param.axis, AddFunctor<T>(), param.Out);
   }
 
@@ -98,9 +102,7 @@ class ElementwiseSubCompute
     auto& context = ctx_->As<X86Context>();
 
     param.Out->template mutable_data<T>();
-    paddle::lite::kernels::x86::ElementwiseComputeEx<SubFunctor<T>,
-                                                     lite::TargetType::kX86,
-                                                     T>(
+    ElementwiseComputeEx<SubFunctor<T>, lite::TargetType::kX86, T>(
         context, param.X, param.Y, param.axis, SubFunctor<T>(), param.Out);
   }
 
@@ -116,9 +118,7 @@ class ElementwiseMulCompute
     auto& param = *param_.get_mutable<param_t>();
     auto& context = ctx_->As<X86Context>();
     param.Out->template mutable_data<T>();
-    paddle::lite::kernels::x86::ElementwiseComputeEx<MulFunctor<T>,
-                                                     lite::TargetType::kX86,
-                                                     T>(
+    ElementwiseComputeEx<MulFunctor<T>, lite::TargetType::kX86, T>(
         context, param.X, param.Y, param.axis, MulFunctor<T>(), param.Out);
   }
 
@@ -134,9 +134,7 @@ class ElementwiseDivCompute
     auto& param = *param_.get_mutable<param_t>();
     auto& context = ctx_->As<X86Context>();
     param.Out->template mutable_data<T>();
-    paddle::lite::kernels::x86::ElementwiseComputeEx<DivFunctor<T>,
-                                                     lite::TargetType::kX86,
-                                                     T>(
+    ElementwiseComputeEx<DivFunctor<T>, lite::TargetType::kX86, T>(
         context, param.X, param.Y, param.axis, DivFunctor<T>(), param.Out);
   }
 
@@ -159,6 +157,22 @@ class ElementwiseFloorDivCompute
   virtual ~ElementwiseFloorDivCompute() = default;
 };
 
+template <typename T>
+class ElementwisePowCompute
+    : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {
+ public:
+  using param_t = operators::ElementwiseParam;
+  void Run() override {
+    auto& param = *param_.get_mutable<param_t>();
+    auto& context = ctx_->As<X86Context>();
+    param.Out->template mutable_data<T>();
+    ElementwiseComputeEx<PowFunctor<T>, lite::TargetType::kX86, T>(
+        context, param.X, param.Y, param.axis, PowFunctor<T>(), param.Out);
+  }
+
+  virtual ~ElementwisePowCompute() = default;
+};
+
 template <typename T>
 class ElementwiseModCompute
     : public KernelLite<TARGET(kX86), PRECISION(kFloat)> {

@@ -687,6 +687,8 @@ TEST(Activation_sqrt, precision) {
   place = Place(TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault));
 #elif defined(LITE_WITH_ARM)
   place = TARGET(kARM);
+#elif defined(LITE_WITH_X86)
+  place = TARGET(kX86);
 #else
   return;
 #endif

@@ -64,6 +64,11 @@ T floordiv(T a, T b) {
   return static_cast<T>(std::trunc(a / b));
 }
 
+template <class T>
+T pow(T a, T b) {
+  return std::pow(a, b);
+}
+
 template <class T>
 T max(T a, T b) {
   return std::max(a, b);
@@ -74,11 +79,6 @@ T min(T a, T b) {
   return std::min(a, b);
 }
 
-template <class T>
-T pow(T a, T b) {
-  return std::pow(a, b);
-}
-
 template <class T>
 T mod(T a, T b) {
   T res = a % b;
@@ -379,6 +379,7 @@ TEST(elementwise_x86, precison) {
     TestEltX86<int64_t>(place, abs_error, op, "int64");
   }
 
+  TestEltX86<float>(place, abs_error, "pow", "def");
   TestEltX86<int>(place, abs_error, "mod", "int32");
   TestEltX86<int64_t>(place, abs_error, "mod", "int64");
 }