Skip to content

Commit

Permalink
use cast op for gpu
Browse files Browse the repository at this point in the history
  • Loading branch information
chraac committed Feb 7, 2025
1 parent c667db2 commit 05df736
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 7 deletions.
2 changes: 1 addition & 1 deletion ggml/src/ggml-qnn/backend-ops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -448,7 +448,6 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
}
// fall through, from test here, the convert op is super slow on NPU:
// https://github.com/usefulsensors/qc_npu_benchmark
case QNN_BACKEND_GPU:
if (src0->type != src1->type || src0->type != op->type) {
// there's no convert op for GPU.
QNN_LOG_DEBUG(
Expand All @@ -457,6 +456,7 @@ bool ggml_qnn_supports_matmul_op(ggml_backend_qnn_device_context *ctx, const ggm
return false;
}
break;
case QNN_BACKEND_GPU:
default:
break;
}
Expand Down
11 changes: 5 additions & 6 deletions ggml/src/ggml-qnn/op-config-impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,10 +307,9 @@ qnn_tensor_ptr_t ggml_qnn_matmul_op_config::create_gather_nodes(QNNBackend devic
bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_GraphHandle_t graph_handle, const int rank,
qnn_tensor_array_t &tensor_inputs,
qnn_tensor_array_t &tensor_outputs) {
if (device == QNN_BACKEND_GPU) {
// there's no convert op for GPU, so we should create matmul nodes directly.
return true;
}
// there's no convert op for GPU, so we use cast instead
// https://docs.qualcomm.com/bundle/publicresource/topics/80-63442-50/MasterOpDef.html#cast
const std::string convert_op_name = (device == QNN_BACKEND_GPU) ? QNN_OP_CAST : QNN_OP_CONVERT;

// create tensors for convert node
auto tensor_type = get_tensor_type(tensor_inputs);
Expand All @@ -328,7 +327,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
convert_in->get_dimensions(), tensor_type, rank, device,
graph_handle, _qnn_instance);
auto convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_CONVERT, _qnn_instance);
convert_op_name, _qnn_instance);
convert->set_input_tensors({convert_in});
convert->set_output_tensors({convert_out});
tensor_inputs[i] = convert_out;
Expand All @@ -343,7 +342,7 @@ bool ggml_qnn_matmul_op_config::create_convert_nodes(QNNBackend device, Qnn_Grap
convert_out->get_dimensions(), tensor_type, rank, device,
graph_handle, _qnn_instance);
auto output_convert = std::make_shared<ggml_qnn_single_op_config>(convert_name, QNN_OP_PACKAGE_NAME_QTI_AISW,
QNN_OP_CONVERT, _qnn_instance);
convert_op_name, _qnn_instance);
output_convert->set_input_tensors({convert_in});
output_convert->set_output_tensors({convert_out});
tensor_outputs.front() = convert_in;
Expand Down

0 comments on commit 05df736

Please sign in to comment.