Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[feat]add more op support #18

Merged
merged 9 commits into from
Jan 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 18 additions & 6 deletions ggml/src/ggml-qnn/backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ bool qnn_is_op_valid(ggml_backend_qnn_device_context *ctx, const ggml_tensor *ds
return false;
}

const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
switch (param_count) {
case 1:
return dst->src[0];
Expand Down Expand Up @@ -91,9 +91,13 @@ void get_graph_key_from_op(const ggml_tensor *op, std::string &output) {
GGML_ASSERT(op->op != GGML_OP_NONE);
output += ggml_op_desc(op);
output += qnn::get_ggml_type_name(op->type);
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
for (size_t i = 0; i < param_count; ++i) {
auto *input = op->src[i];
if (!input) {
break;
}

output += '_';
append_tensor_dimensions(input, output);
}
Expand Down Expand Up @@ -224,7 +228,7 @@ bool qnn_generic_op_impl(ggml_backend_qnn_device_context *ctx, ggml_tensor *dst)

#ifndef NDEBUG
if (!succeed) {
const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(dst));
const auto param_count = qnn::get_qnn_op_input_param_count(dst);
for (size_t i = 0; i < param_count; ++i) {
print_ggml_tensor(dst->src[i]);
}
Expand Down Expand Up @@ -409,7 +413,7 @@ bool ggnl_qnn_supports_op_tensor(ggml_backend_qnn_device_context *ctx, const ggm
return false;
}

const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
for (size_t i = 0; i < param_count; ++i) {
if (!ggml_qnn_supports_tensor(ctx, op->src[i])) {
return false;
Expand Down Expand Up @@ -479,12 +483,20 @@ bool device_supports_op(ggml_backend_qnn_device_context *ctx, const ggml_tensor
}

if (!kQnnOpsTable[qnn::get_qnn_op_index(op)]) {
QNN_LOG_DEBUG("[%s]unsupported op", ggml_op_name(op->op));
#ifndef NDEBUG
std::string op_key;
get_graph_key_from_op(op, op_key);
QNN_LOG_DEBUG("[%s]unsupported op", op_key.c_str());
#endif
return false;
}

if (!ggnl_qnn_supports_op_tensor(ctx, op)) {
QNN_LOG_DEBUG("[%s]unsupported tensor", ggml_op_name(op->op));
#ifndef NDEBUG
std::string tensor_dims;
append_tensor_dimensions(op, tensor_dims);
QNN_LOG_DEBUG("[%s]unsupported tensor(%s)", ggml_op_name(op->op), tensor_dims.c_str());
#endif
return false;
}

Expand Down
12 changes: 5 additions & 7 deletions ggml/src/ggml-qnn/graph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ using qnn_tensor_cache_t = std::unordered_map<ggml_tensor *, qnn::qnn_tensor_ptr

int get_op_max_rank(const ggml_tensor *op) {
int max_rank = ggml_n_dims(op);
const int count = (int)qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const int count = (int)qnn::get_qnn_op_input_param_count(op);
for (int i = 0; i < count; ++i) {
max_rank = std::max(max_rank, ggml_n_dims(op->src[i]));
}
Expand Down Expand Up @@ -56,14 +56,12 @@ qnn::qnn_op_config_ptr_t create_operation_from_op_tensor(ggml_tensor *dst, const
QNNBackend device, Qnn_GraphHandle_t graph_handle,
std::shared_ptr<qnn::qnn_instance> qnn_instance,
bool is_intermediate, qnn_tensor_cache_t &tensor_cache) {
const auto op_index = qnn::get_qnn_op_index(dst);
auto qnn_op = qnn::create_op_constructor(op_index);
auto operation = qnn_op(name, qnn_instance);
auto operation = qnn::create_op(dst, name, qnn_instance);

// input tensors
qnn::qnn_tensor_array_t input_qnn_tensors;
auto tensor_type = is_intermediate ? qnn::ggml_qnn_tensor::INTERMEDIATE : qnn::ggml_qnn_tensor::INPUT;
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(op_index); ++i) {
for (size_t i = 0; i < qnn::get_qnn_op_input_param_count(dst); ++i) {
auto input_qnn_tensor =
create_tensor_with_cache(dst->src[i], tensor_type, rank, device, graph_handle, qnn_instance, tensor_cache);
input_qnn_tensors.push_back(input_qnn_tensor);
Expand Down Expand Up @@ -92,7 +90,7 @@ bool bind_src_tensors(ggml_tensor *op, qnn::qnn_tensor_array_t &tensor_wrappers,
return false;
}

const auto param_count = qnn::get_qnn_op_input_param_count(qnn::get_qnn_op_index(op));
const auto param_count = qnn::get_qnn_op_input_param_count(op);
GGML_ASSERT(tensor_wrappers.size() == param_count);
qnn_tensors.resize(param_count);
for (size_t i = 0; i < param_count; ++i) {
Expand Down Expand Up @@ -268,7 +266,7 @@ bool qnn_graph::build_graph_from_ggml_graph(const ggml_cgraph *cgraph) {
continue;
}

QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst->op));
QNN_LOG_DEBUG("[%s]create op: %s", get_backend_name(_device), get_qnn_op_name(dst));
auto operation = create_operation_from_op_tensor(dst, dst->name, rank, _device, _graph_handle,
_qnn_instance, true, tensor_cache); // TODO: fix op name
operations.push_back(operation);
Expand Down
208 changes: 196 additions & 12 deletions ggml/src/ggml-qnn/op-config-caps.cpp
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@

#include "op-config.hpp"
#include "op-config-impl.hpp"

namespace {

using op_constructor_t = std::shared_ptr<qnn::ggml_qnn_op_config> (*)(const ggml_tensor *, const std::string &,
std::shared_ptr<qnn::qnn_instance>);
using op_dims_calc_func_t = void (*)(const std::vector<const qnn::ggml_dimension_array_t> &input_dims,
qnn::ggml_dimension_array_t &output_dims);

Expand All @@ -24,6 +26,7 @@ struct qnn_op_caps_t {
const char *qnn_op_name = nullptr;
const size_t input_param_count = 0;
op_dims_calc_func_t calc_dims_func = nullptr;
const char *qnn_param_name = nullptr;
};

constexpr const qnn_op_caps_t kOpCaps[] = {
Expand Down Expand Up @@ -80,7 +83,13 @@ constexpr const qnn_op_caps_t kOpCaps[] = {
{}, // GGML_OP_CONCAT
{}, // GGML_OP_SILU_BACK
{}, // GGML_OP_NORM
{}, // GGML_OP_RMS_NORM
{
// GGML_OP_RMS_NORM
QNN_OP_RMS_NORM, // qnn_op_name
1, // input_param_count
nullptr, // TODO: calc_dims_func
QNN_OP_RMS_NORM_PARAM_EPSILON, // qnn_param_name
},
{}, // GGML_OP_RMS_NORM_BACK
{}, // GGML_OP_GROUP_NORM
{
Expand Down Expand Up @@ -187,9 +196,172 @@ static_assert(kOpCaps[GGML_OP_MUL_MAT].calc_dims_func == mat_mul_op_dims,
"GGML_OP_ADD does not have element_wise_op_dims function");
static_assert(kOpCaps[GGML_OP_LOG].calc_dims_func == element_wise_op_dims,
"GGML_OP_LOG does not have element_wise_op_dims function");
static_assert(kOpCaps[GGML_OP_COUNT + GGML_UNARY_OP_GELU].input_param_count == 1,
"GGML_UNARY_OP_GELU does not have 1 input parameter");
static_assert(std::size(kOpCaps) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpCaps table");

std::shared_ptr<qnn::ggml_qnn_op_config> mat_mul_op_constructor(const ggml_tensor *op, const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
QNN_LOG_DEBUG("create QNN_OP_MAT_MUL, name %s", instance_name.c_str());
return std::make_shared<qnn::ggml_qnn_matmul_op_config>(instance_name, qnn_instance);
}

template <size_t _op>
std::shared_ptr<qnn::ggml_qnn_op_config> generic_op_constructor(const ggml_tensor *op, const std::string &instance_name,
std::shared_ptr<qnn::qnn_instance> qnn_instance) {
GGML_UNUSED(op);
static_assert(_op < std::size(kOpCaps));
static_assert(kOpCaps[_op].qnn_op_name != nullptr);
return std::make_shared<qnn::ggml_qnn_single_op_config>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
kOpCaps[_op].qnn_op_name, qnn_instance);
}

void add_type_parameters(std::shared_ptr<qnn::ggml_qnn_op_config_base> op, const char *name, float value) {
Qnn_Scalar_t scalar = QNN_SCALAR_INIT;
scalar.dataType = QNN_DATATYPE_FLOAT_32;
scalar.floatValue = value;
op->add_scalar_param(name, scalar);
}

template <size_t _op, typename _ggml_op_param_type, typename _qnn_op_type_name>
std::shared_ptr<qnn::ggml_qnn_op_config> op_constructor_with_type_param(
const ggml_tensor *op, const std::string &instance_name, std::shared_ptr<qnn::qnn_instance> qnn_instance) {
static_assert(std::is_base_of<qnn::ggml_qnn_op_config_base, _qnn_op_type_name>::value);
static_assert(_op < std::size(kOpCaps));

constexpr auto &op_caps = kOpCaps[_op];
static_assert(op_caps.qnn_op_name != nullptr);

_ggml_op_param_type op_param;
memcpy(&op_param, op->op_params, sizeof(op_param));
auto qnn_op = std::make_shared<_qnn_op_type_name>(instance_name, QNN_OP_PACKAGE_NAME_QTI_AISW, op_caps.qnn_op_name,
qnn_instance);
if (op_caps.qnn_param_name) {
add_type_parameters(qnn_op, op_caps.qnn_param_name, op_param);
}
return qnn_op;
}

constexpr const op_constructor_t kOpConstructors[] = {
nullptr, // GGML_OP_NONE
nullptr, // GGML_OP_DUP
generic_op_constructor<GGML_OP_ADD>, // GGML_OP_ADD
nullptr, // GGML_OP_ADD1
nullptr, // GGML_OP_ACC
generic_op_constructor<GGML_OP_SUB>, // GGML_OP_SUB
generic_op_constructor<GGML_OP_MUL>, // GGML_OP_MUL
generic_op_constructor<GGML_OP_DIV>, // GGML_OP_DIV
nullptr, // GGML_OP_SQR
generic_op_constructor<GGML_OP_SQRT>, // GGML_OP_SQRT
generic_op_constructor<GGML_OP_LOG>, // GGML_OP_LOG
nullptr, // GGML_OP_SIN
nullptr, // GGML_OP_COS
nullptr, // GGML_OP_SUM
nullptr, // GGML_OP_SUM_ROWS
nullptr, // GGML_OP_MEAN
nullptr, // GGML_OP_ARGMAX
nullptr, // GGML_OP_COUNT_EQUAL
nullptr, // GGML_OP_REPEAT
nullptr, // GGML_OP_REPEAT_BACK
nullptr, // GGML_OP_CONCAT
nullptr, // GGML_OP_SILU_BACK
nullptr, // GGML_OP_NORM
op_constructor_with_type_param<GGML_OP_RMS_NORM, float, qnn::ggml_qnn_rmsnorm_op_config>, // GGML_OP_RMS_NORM
nullptr, // GGML_OP_RMS_NORM_BACK
nullptr, // GGML_OP_GROUP_NORM

mat_mul_op_constructor, // GGML_OP_MUL_MAT
nullptr, // GGML_OP_MUL_MAT_ID
nullptr, // GGML_OP_OUT_PROD

nullptr, // GGML_OP_SCALE
nullptr, // GGML_OP_SET
nullptr, // GGML_OP_CPY
nullptr, // GGML_OP_CONT
generic_op_constructor<GGML_OP_RESHAPE>, // GGML_OP_RESHAPE
nullptr, // GGML_OP_VIEW
nullptr, // GGML_OP_PERMUTE
nullptr, // GGML_OP_TRANSPOSE
nullptr, // GGML_OP_GET_ROWS
nullptr, // GGML_OP_GET_ROWS_BACK
nullptr, // GGML_OP_DIAG
nullptr, // GGML_OP_DIAG_MASK_INF
nullptr, // GGML_OP_DIAG_MASK_ZERO
nullptr, // GGML_OP_SOFT_MAX
nullptr, // GGML_OP_SOFT_MAX_BACK
nullptr, // GGML_OP_ROPE
nullptr, // GGML_OP_ROPE_BACK
nullptr, // GGML_OP_CLAMP
nullptr, // GGML_OP_CONV_TRANSPOSE_1D
nullptr, // GGML_OP_IM2COL
nullptr, // GGML_OP_IM2COL_BACK
nullptr, // GGML_OP_CONV_TRANSPOSE_2D
nullptr, // GGML_OP_POOL_1D
nullptr, // GGML_OP_POOL_2D
nullptr, // GGML_OP_POOL_2D_BACK
nullptr, // GGML_OP_UPSCALE
nullptr, // GGML_OP_PAD
nullptr, // GGML_OP_PAD_REFLECT_1D
nullptr, // GGML_OP_ARANGE
nullptr, // GGML_OP_TIMESTEP_EMBEDDING
nullptr, // GGML_OP_ARGSORT
nullptr, // GGML_OP_LEAKY_RELU

nullptr, // GGML_OP_FLASH_ATTN_EXT
nullptr, // GGML_OP_FLASH_ATTN_BACK
nullptr, // GGML_OP_SSM_CONV
nullptr, // GGML_OP_SSM_SCAN
nullptr, // GGML_OP_WIN_PART
nullptr, // GGML_OP_WIN_UNPART
nullptr, // GGML_OP_GET_REL_POS
nullptr, // GGML_OP_ADD_REL_POS
nullptr, // GGML_OP_RWKV_WKV6
nullptr, // GGML_OP_GATED_LINEAR_ATTN

nullptr, // GGML_OP_UNARY

nullptr, // GGML_OP_MAP_UNARY
nullptr, // GGML_OP_MAP_BINARY

nullptr, // GGML_OP_MAP_CUSTOM1_F32
nullptr, // GGML_OP_MAP_CUSTOM2_F32
nullptr, // GGML_OP_MAP_CUSTOM3_F32

nullptr, // GGML_OP_MAP_CUSTOM1
nullptr, // GGML_OP_MAP_CUSTOM2
nullptr, // GGML_OP_MAP_CUSTOM3

nullptr, // GGML_OP_CROSS_ENTROPY_LOSS
nullptr, // GGML_OP_CROSS_ENTROPY_LOSS_BACK
nullptr, // GGML_OP_OPT_STEP_ADAMW

// ggml_unary_op
nullptr, // GGML_UNARY_OP_ABS
nullptr, // GGML_UNARY_OP_SGN
nullptr, // GGML_UNARY_OP_NEG
nullptr, // GGML_UNARY_OP_STEP
nullptr, // GGML_UNARY_OP_TANH
nullptr, // GGML_UNARY_OP_ELU
nullptr, // GGML_UNARY_OP_RELU
nullptr, // GGML_UNARY_OP_SIGMOID
nullptr, // GGML_UNARY_OP_GELU
nullptr, // GGML_UNARY_OP_GELU_QUICK
nullptr, // GGML_UNARY_OP_SILU
nullptr, // GGML_UNARY_OP_HARDSWISH
nullptr, // GGML_UNARY_OP_HARDSIGMOID
nullptr, // GGML_UNARY_OP_EXP
};

static_assert(kOpConstructors[GGML_OP_NONE] == nullptr, "GGML_OP_NONE does not match the nullptr function");
static_assert(kOpConstructors[GGML_OP_ADD] == generic_op_constructor<GGML_OP_ADD>,
"GGML_OP_ADD does not match the generic_op_constructor<GGML_OP_ADD> function");
static_assert(kOpConstructors[GGML_OP_MUL_MAT] == mat_mul_op_constructor,
"GGML_OP_MUL_MAT does not match the mat_mul_op_constructor function");
static_assert(std::size(kOpConstructors) == (GGML_OP_COUNT + GGML_UNARY_OP_COUNT),
"GGML_OP_COUNT does not match the size of the kOpConstructors table");

} // namespace

namespace qnn {
Expand All @@ -202,23 +374,35 @@ size_t get_qnn_op_index(const ggml_tensor *tensor) {
return tensor->op;
}

void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, size_t op,
void get_ggml_op_output_dimensions(const std::vector<const ggml_dimension_array_t> &input_dims, const ggml_tensor *op,
ggml_dimension_array_t &output_dims) {
GGML_ASSERT(op < std::size(kOpCaps));
auto get_dims = kOpCaps[op].calc_dims_func;
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto get_dims = kOpCaps[op_index].calc_dims_func;
GGML_ASSERT(get_dims);
get_dims(input_dims, output_dims);
}

const char *get_qnn_op_name(size_t op) {
GGML_ASSERT(op < std::size(kOpCaps));
GGML_ASSERT(kOpCaps[op].qnn_op_name);
return kOpCaps[op].qnn_op_name;
const char *get_qnn_op_name(const ggml_tensor *op) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
GGML_ASSERT(kOpCaps[op_index].qnn_op_name);
return kOpCaps[op_index].qnn_op_name;
}

size_t get_qnn_op_input_param_count(const ggml_tensor *op) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
return kOpCaps[op_index].input_param_count;
}

size_t get_qnn_op_input_param_count(size_t op) {
GGML_ASSERT(op < std::size(kOpCaps));
return kOpCaps[op].input_param_count;
std::shared_ptr<ggml_qnn_op_config> create_op(const ggml_tensor *op, const std::string &name,
std::shared_ptr<qnn_instance> qnn_instance) {
auto op_index = get_qnn_op_index(op);
GGML_ASSERT(op_index < std::size(kOpCaps));
auto op_constructor = kOpConstructors[op_index];
GGML_ASSERT(op_constructor);
return op_constructor(op, name, qnn_instance);
}

} // namespace qnn
Loading