-
Notifications
You must be signed in to change notification settings - Fork 2.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Es/lpt/lpt to ngraph fixes2 with master #2671
Changes from 250 commits
75a6414
4cc1109
0ee5e9a
6840fb8
e83e0d9
c25028b
ff35f58
10d2a4a
88ed077
5e3b364
09a8c7b
178fbeb
db0c33c
20bc449
0e9f6de
c92a2aa
f4ffccf
d956008
7bfdf66
36119b5
3155060
9a3b0e2
6d8b12b
715b8c4
8092c93
1e316cc
6bb3a8e
0b6d600
fd7d49b
7396092
b561c9f
fd4d3c1
eebb795
d192d11
7431fe5
c31b307
1efc713
59bcc86
1732260
8c434cb
550d293
8b17bcf
ea1ebbd
0918751
4086f1c
f212ab8
a627bcb
03ac1ed
c07a393
95eec47
7c25bef
2742592
a087194
1607569
7b1f6b3
3b21edd
b92cf5e
4836442
b866968
4ee20d2
5037b51
0c50f11
ee10cb9
3b3128c
5d25c7e
1852420
76e6f2c
07c206b
e854a7c
e8b3207
a5ef448
a46a7c8
a6c599a
165412d
3fbcac8
d0e1068
8a497a8
d730e4d
1994294
e0f635e
f879f31
6b5208f
6398b8f
3958e55
4d2cee9
26f5b04
8e2d7c8
1012b75
9eec103
11a31f4
3f4c1fe
5bc0afd
b1102b6
07fe900
c4614cc
ab5d479
c9c4d35
85b8779
be778b9
be6a313
2982991
a40a011
9b1d968
cd1464f
c41b784
5e8eb39
5ebcbad
70c98df
4608510
77dbbbc
f13e018
4704a63
49ba970
63b5607
6c40457
e451cf2
d7806ec
f5b071f
82be35b
3df32a9
1e8618b
60d663a
c3f5e92
40713e1
dbc0a4f
a5b4eaa
e206f1f
7446e53
ac97c83
a52f514
6f8d450
c52fafb
e29b505
4c600dc
51ef2b2
05a850b
0c0ed09
292b9ee
3a1e210
143673a
70975d6
9405359
3e53922
9e4e478
12f7338
a62255c
59922be
776ff3f
dcdaa77
edd798d
4445ad2
5595c4c
932ff7d
637aeab
a9d0019
36733ef
271f169
5ea6624
8d8bd5c
089e259
4d0632c
b03990b
82eaaec
1c8c57c
2463bc4
6e83091
34bd7f3
3278add
f86dc1d
dbec4cf
0d93d6b
1ec6102
6e2dacb
5609275
71f726f
9823ded
1194de1
53a672f
8e611e3
8a157c5
f4cf3a5
07d1b56
f9486a2
a5b275a
dc22ae5
3095459
977ca2f
7d0a070
95579a3
e0514b2
3f59f1c
1ae5655
00219df
28c87b9
7c93a3a
69ec09f
1c6a9c4
72c39ab
f93b5e4
939ca97
6ce619d
e0cc238
c61e9ff
10131b3
43b18f4
58b276d
e0af769
fba49cd
7ac56d0
d0efcc8
b7af836
457639a
0c1b347
e56fcbc
b751053
e1cdd4b
6435344
c3056bb
9635131
6e5c570
9debe58
3dba7bd
a471c1f
540711b
a491844
66a0564
ac5e2c1
797f6f2
3cd084f
1d93cd0
5114f15
4636b97
985aed7
87ddcce
bdf93e1
4021c8c
a0e4209
6eeaac2
88242f1
f524ef7
2a3316e
e1a5cee
5126095
6c52ee7
3c1b711
840937e
00aef0d
a29524d
2a4ca0f
0a57b9d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,7 +33,9 @@ | |
#include <transformations/opset_conversions/convert_opset2_to_opset1.hpp> | ||
#include <transformations/opset_conversions/convert_opset3_to_opset2.hpp> | ||
#include <transformations/init_node_info.hpp> | ||
#include <transformations/convert_precision.hpp> | ||
#include <transformations/rt_info/fused_names_attribute.hpp> | ||
|
||
#include <legacy/convert_function_to_cnn_network.hpp> | ||
#include <legacy/ie_util_internal.hpp> | ||
#include <legacy/graph_transformer.h> | ||
|
@@ -42,6 +44,9 @@ | |
#include "cldnn_executable_network.h" | ||
#include "cldnn_custom_layer.h" | ||
|
||
#include <transformations/low_precision/transformer.hpp> | ||
#include <transformations/low_precision/mat_mul.hpp> | ||
|
||
#ifdef __linux__ | ||
#include <dlfcn.h> | ||
#endif | ||
|
@@ -72,8 +77,10 @@ cldnn::device_info clDNNEngine::GetDeviceInfo(const std::map<std::string, std::s | |
return device_info; | ||
} | ||
|
||
InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network) const { | ||
InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const InferenceEngine::ICNNNetwork& network, CLDNNPlugin::Config config) const { | ||
std::shared_ptr<ICNNNetwork> clonedNetwork = cloneNetwork(network); | ||
bool baselineIsFP16 = false; | ||
|
||
if (clonedNetwork->getFunction()) { | ||
const auto transformations_callback = [](const std::shared_ptr<const ::ngraph::Node> &node) -> bool { | ||
// Reshape->Permute->Reshape pattern in theory can change output rank, so this check is added to be sure | ||
|
@@ -112,6 +119,12 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const In | |
return can_use_reduce; | ||
} | ||
|
||
if (auto add_op = std::dynamic_pointer_cast<const ngraph::opset1::Add>(node)) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is purpose of this skip option? Should it prevent bias merge into conversion mode? Or general linear operation merge? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. And what if Conv -(in:1)-> Add |
||
return ngraph::is_type<ngraph::opset1::Convolution>(add_op->get_input_node_shared_ptr(0)) || | ||
ngraph::is_type<ngraph::opset1::GroupConvolution>(add_op->get_input_node_shared_ptr(0)) || | ||
ngraph::is_type<ngraph::opset1::MatMul>(add_op->get_input_node_shared_ptr(0)); | ||
} | ||
|
||
return std::dynamic_pointer_cast<const ::ngraph::opset2::Gelu>(node) || | ||
std::dynamic_pointer_cast<const ::ngraph::opset3::ShuffleChannels>(node) || | ||
std::dynamic_pointer_cast<const ::ngraph::opset2::BatchToSpace>(node) || | ||
|
@@ -126,24 +139,75 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const In | |
// Disable shape inference (WA for generic operations) | ||
::ngraph::op::GenericIE::DisableReshape noReshape(nGraphFunc); | ||
|
||
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline | ||
ngraph::pass::Manager manager; | ||
manager.register_pass<ngraph::pass::InitNodeInfo>(); | ||
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass | ||
manager.register_pass<ngraph::pass::ConvertPriorBox>(); | ||
manager.register_pass<ngraph::pass::CommonOptimizations>(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>(); | ||
|
||
manager.set_callback(transformations_callback); | ||
manager.run_passes(nGraphFunc); | ||
|
||
ngraph::pass::Manager ti_manager; | ||
// Unroll will be called after all conversions | ||
// temporarily switch back to plugin unroller from NGraph unroller until TI output names are corrected | ||
// ti_manager.register_pass<ngraph::pass::UnrollTensorIterator>(); | ||
ti_manager.run_passes(nGraphFunc); | ||
bool enableInt8; | ||
|
||
{ | ||
// Note: instead of running all Conversion Transformations you can make up your own transformation pipeline | ||
ngraph::pass::Manager manager; | ||
manager.register_pass<ngraph::pass::InitNodeInfo>(); | ||
// WA: ConvertPriorBox must be executed before the 1st ConstantFolding pass | ||
manager.register_pass<ngraph::pass::ConvertPriorBox>(); | ||
manager.register_pass<ngraph::pass::CommonOptimizations>(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet3ToOpSet2>(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>(); | ||
|
||
|
||
manager.set_callback(transformations_callback); | ||
manager.run_passes(nGraphFunc); | ||
|
||
const auto fp16_callback = [&baselineIsFP16](const std::shared_ptr<const ::ngraph::Node> &node) -> bool { | ||
if (!baselineIsFP16 && node->get_output_element_type(0) == ngraph::element::f16) { | ||
baselineIsFP16 = true; | ||
} | ||
|
||
return true; | ||
}; | ||
|
||
ngraph::pass::Manager conversion_manager; | ||
|
||
enableInt8 = config.enableInt8 && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
(config.lptVersion == Config::LptVersion::nGraph) && | ||
ngraph::pass::low_precision::LowPrecisionTransformer::isFunctionQuantized(nGraphFunc); | ||
if (enableInt8) { | ||
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors | ||
conversion_manager.register_pass<ngraph::pass::ConvertPrecision>(ngraph::element::f16, ngraph::element::f32); | ||
} | ||
|
||
conversion_manager.set_callback(fp16_callback); | ||
conversion_manager.run_passes(nGraphFunc); | ||
|
||
ngraph::pass::Manager ti_manager; | ||
// Unroll will be called after all conversions | ||
// temporarily switch back to plugin unroller from NGraph unroller until TI output names are corrected | ||
// ti_manager.register_pass<ngraph::pass::UnrollTensorIterator>(); | ||
ti_manager.run_passes(nGraphFunc); | ||
} | ||
|
||
using namespace ngraph::pass::low_precision; | ||
if (enableInt8) { | ||
auto params = LayerTransformation::Params( | ||
true, // updatePrecisions | ||
LayerTransformation::QuantizedTensorAlignment::UpdateLevel, // quantizedTensorAlignmentOnActivations | ||
LayerTransformation::QuantizedTensorAlignment::None, // quantizedTensorAlignmentOnWeights | ||
true); // supportAsymmetricQuantization | ||
LowPrecisionTransformer transformer(LowPrecisionTransformer::getAllTransformations(params) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @eshoguli, @slyalin, @GlebKazantaev why the API for low precision transformation is completely differs from common transformations? manager.register_pass<ngraph::pass::ConvertOpSet2ToOpSet1>();
auto pass_config = manager.get_pass_config();
pass_config->disable<ngraph::pass::ConvertGELU>();
pass_config->set_callback<ngraph::pass::ConvertReduceMaxToPooling>(
[](const_node_ptr &node) -> bool {
return disableReduceDecomposition<ngraph::opset1::ReduceMax>(node);
});
manager.run_passes(nGraphFunc);
// etc So I'd expect LPT to be used as follows: manager.register_pass<ngraph::pass::LowPrecisionTransformations>();
pass_config.set_callback<ngraph::pass:MatMulTransformation>(/* callback that checks asymmetric quantization */);
or
pass_config.disable<ngraph::pass:MatMulTransformationAsymmetric>();
manager.run_passes(nGraphFunc); It it possible to align LPT with other transforms? |
||
.add<MatMulTransformation, ngraph::opset1::MatMul>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false))); | ||
|
||
transformer.transform(nGraphFunc); | ||
} | ||
|
||
{ | ||
ngraph::pass::Manager manager = ngraph::pass::Manager(); | ||
manager.register_pass<ngraph::pass::ConvertOpSet1ToLegacy>(); | ||
manager.set_callback(transformations_callback); | ||
manager.run_passes(nGraphFunc); | ||
|
||
ngraph::pass::Manager ti_manager; | ||
// Unroll will be called after all conversions | ||
// temporarily switch back to plugin unroller from NGraph unroller until TI output names are corrected | ||
// ti_manager.register_pass<ngraph::pass::UnrollTensorIterator>(); | ||
ti_manager.run_passes(nGraphFunc); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks like a call for empty transformation list. Same as in line 183. Can it be removed? |
||
} | ||
|
||
clonedNetwork = InferenceEngine::details::convertFunctionToICNNNetwork(nGraphFunc, *clonedNetwork); | ||
} | ||
|
@@ -155,6 +219,14 @@ InferenceEngine::ICNNNetwork::Ptr clDNNEngine::CloneAndTransformNetwork(const In | |
transformator.fullTrim(); | ||
} | ||
|
||
if (baselineIsFP16) { | ||
InputsDataMap inputsMap; | ||
clonedNetwork->getInputsInfo(inputsMap); | ||
|
||
auto input0 = getInputTo(inputsMap.begin()->second->getInputData()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. should be marked as WA with some description. The meaning of this parameter is not fully clear to me. |
||
input0.begin()->second->params["FP16"]; | ||
} | ||
|
||
return clonedNetwork; | ||
} | ||
|
||
|
@@ -257,7 +329,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn | |
|
||
context = m_defaultContext; | ||
|
||
return std::make_shared<CLDNNExecNetwork>(*CloneAndTransformNetwork(network), context, conf); | ||
return std::make_shared<CLDNNExecNetwork>(*CloneAndTransformNetwork(network, conf), context, conf); | ||
} | ||
|
||
ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEngine::ICNNNetwork &network, | ||
|
@@ -281,7 +353,7 @@ ExecutableNetworkInternal::Ptr clDNNEngine::LoadExeNetworkImpl(const InferenceEn | |
conf.max_dynamic_batch = static_cast<int>(network.getBatchSize()); | ||
} | ||
|
||
return std::make_shared<CLDNNExecNetwork>(*CloneAndTransformNetwork(network), casted, conf); | ||
return std::make_shared<CLDNNExecNetwork>(*CloneAndTransformNetwork(network, conf), casted, conf); | ||
} | ||
|
||
RemoteContext::Ptr clDNNEngine::CreateContext(const ParamMap& params) { | ||
|
@@ -324,7 +396,7 @@ QueryNetworkResult clDNNEngine::QueryNetwork(const ICNNNetwork& network, | |
for (auto&& node : function->get_ops()) { | ||
originalOps.emplace(node->get_friendly_name()); | ||
} | ||
auto clonedNetwork = CloneAndTransformNetwork(network); | ||
auto clonedNetwork = CloneAndTransformNetwork(network, _impl->m_config); | ||
std::unordered_set<std::string> supported; | ||
std::unordered_set<std::string> unsupported; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -397,7 +397,39 @@ Program::Program(InferenceEngine::ICNNNetwork& network, std::shared_ptr<const cl | |
, p_currentOutputs({}) { | ||
InitFormat(network); | ||
|
||
InputsDataMap inputsMap; | ||
network.getInputsInfo(inputsMap); | ||
|
||
auto input0 = getInputTo(inputsMap.begin()->second->getInputData()); | ||
|
||
bool baselineIsFP16 = false; | ||
if (input0.begin()->second->params.count("FP16") != 0) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If the results of |
||
baselineIsFP16 = true; | ||
} | ||
|
||
bool fqFound = false; | ||
bool allFQareSupported = true; | ||
if (config.enableInt8) { | ||
auto it = details::CNNNetworkIterator(&network); | ||
auto end = details::CNNNetworkIterator(); | ||
while (it != end) { | ||
auto& layer = *it; | ||
if (layer->precision == Precision::FP16) { | ||
baselineIsFP16 = true; | ||
} | ||
|
||
if (CaselessEq<std::string>()(layer->type, "FakeQuantize")) { | ||
fqFound = true; | ||
auto levels = layer->GetParamAsUInt("levels"); | ||
if (levels != 255 && levels != 256) { | ||
allFQareSupported = false; | ||
} | ||
} | ||
it++; | ||
} | ||
} | ||
|
||
if (config.enableInt8 && (config.lptVersion == Config::LptVersion::cnnNetwork)) { | ||
auto params = LayerTransformation::Params(true, // updatePrecisions | ||
true, // quantizeOutputs | ||
true, // weightsToConst | ||
|
@@ -413,38 +445,17 @@ Program::Program(InferenceEngine::ICNNNetwork& network, std::shared_ptr<const cl | |
.add<FullyConnectedTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "FullyConnected") | ||
.add<GemmTransformation>(LayerTransformation::Params(params).setSupportAsymmetricQuantization(false), "GEMM"); | ||
|
||
bool fqFound = false; | ||
bool allFQareSupported = true; | ||
bool baselineIsFP16 = false; | ||
{ | ||
auto it = details::CNNNetworkIterator(&network); | ||
auto end = details::CNNNetworkIterator(); | ||
while (it != end) { | ||
auto& layer = *it; | ||
if (layer->precision == Precision::FP16) { | ||
baselineIsFP16 = true; | ||
} | ||
|
||
if (CaselessEq<std::string>()(layer->type, "FakeQuantize")) { | ||
fqFound = true; | ||
auto levels = layer->GetParamAsUInt("levels"); | ||
if (levels != 255 && levels != 256) { | ||
allFQareSupported = false; | ||
} | ||
} | ||
it++; | ||
} | ||
} | ||
|
||
// [WA part1] Convert quantized FP16 model to FP32 to avoid possible overflow and mixed precision errors | ||
if (fqFound && allFQareSupported) { | ||
NetPass::ConvertPrecision(network, Precision::FP16, Precision::FP32); | ||
} | ||
|
||
LowPrecisionTransformer transformer(transforms); | ||
transformer.transform(network); | ||
} | ||
|
||
// [WA part2] Try to find non-quantized layers and convert them back to FP16 | ||
// [WA part2] Try to find non-quantized layers and convert them back to FP16 | ||
if (config.enableInt8) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If I understand correctly, you run this fp16 fallback stuff only for |
||
if (fqFound && baselineIsFP16 && config.enable_fp16_for_quantized_models) { | ||
auto layersSorted = BFSSort(network); | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,13 +22,17 @@ class INFERENCE_ENGINE_API_CLASS(Eltwise) : public Op { | |
|
||
Eltwise(const Output<Node>& data1, | ||
const Output<Node>& data2, | ||
const ELTWISE_TYPE eltwise_type); | ||
const ELTWISE_TYPE eltwise_type, | ||
const element::Type output_type = element::undefined); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it valid to change signature of legacy operations? As I see this change makes data type cast semantic integrated into eltwise (and many others). |
||
|
||
void validate_and_infer_types() override; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
|
||
ELTWISE_TYPE eltwise_type; | ||
|
||
private: | ||
element::Type m_output_type; | ||
}; | ||
|
||
} // namespace op | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -29,17 +29,21 @@ class INFERENCE_ENGINE_API_CLASS(FullyConnected) : public Op { | |
FullyConnected(const Output<Node> & A, | ||
const Output<Node> & B, | ||
const Output<Node> & C, | ||
const Shape & output_shape); | ||
const Shape & output_shape, | ||
const element::Type output_type = element::undefined); | ||
|
||
void validate_and_infer_types() override; | ||
|
||
std::shared_ptr<Node> clone_with_new_inputs(const OutputVector& new_args) const override; | ||
|
||
size_t get_out_size() { return m_output_size; } | ||
size_t get_out_size() const { return m_output_size; } | ||
|
||
element::Type get_output_type() const { return m_output_type; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You don't need this method as you can get output type directly from node output There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Especially when all other operation has no such method. |
||
|
||
private: | ||
size_t m_output_size = 0; | ||
Shape m_output_shape = {}; | ||
element::Type m_output_type; | ||
}; | ||
|
||
} // namespace op | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,7 +25,8 @@ class INFERENCE_ENGINE_API_CLASS(NormalizeIE) : public Op { | |
const Output<Node>& weights, | ||
float eps, | ||
bool across_spatial, | ||
bool channel_shared); | ||
bool channel_shared, | ||
const ngraph::element::Type output_type); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we introduce output_type even to legacy ops we need to have the same default value and the same logic for setting output type. |
||
|
||
float get_eps() const { return m_eps; } | ||
bool get_channel_shared() const { return m_channel_shared;} | ||
|
@@ -39,6 +40,7 @@ class INFERENCE_ENGINE_API_CLASS(NormalizeIE) : public Op { | |
float m_eps; | ||
bool m_across_spatial; | ||
bool m_channel_shared; | ||
ngraph::element::Type m_output_type; | ||
}; | ||
|
||
} // namespace op | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use const ref for the config.