diff --git a/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst b/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst index 9bb69a73f0883c..b4740167b84b12 100644 --- a/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst +++ b/docs/articles_en/get-started/learn-openvino/openvino-samples/benchmark-tool.rst @@ -382,7 +382,7 @@ available options and parameters: -t TIME, --time TIME Optional. Time in seconds to execute topology. -api {sync,async}, --api_type {sync,async} - Optional. Enable using sync/async API. Default value is async. + Optional. Enable using sync/async API. When hint is throughput, default value is async. When hint is latency, default value is sync. Input shapes: @@ -557,7 +557,7 @@ available options and parameters: -c Required for GPU custom kernels. Absolute path to an .xml file with the kernels description. -cache_dir Optional. Enables caching of loaded models to specified directory. List of devices which support caching is shown at the end of this message. -load_from_file Optional. Loads model from file directly without read_model. All CNNNetwork options (like re-shape) will be ignored - -api Optional. Enable Sync/Async API. Default value is "async". + -api Optional. Enable Sync/Async API. When hint is throughput, default value is "async". When hint is latency, default value is "sync". -nireq Optional. Number of infer requests. Default value is determined automatically for device. -nstreams Optional. Number of streams to use for inference on the CPU or GPU devices (for HETERO and MULTI device cases use format :, : or just ). Default value is determined automatically for a device.Please note that although the automatic selection usually provides a reasonable performance, it still may be non - optimal for some cases, especially for very small models. See sample's README for more details. Also, using nstreams>1 is inherently throughput-oriented option, while for the best-latency estimations the number of streams should be set to 1. -inference_only Optional. Measure only inference stage. Default option for static models. Dynamic models are measured in full mode which includes inputs setup stage, inference only mode available for them with single input data shape only. To enable full mode for static models pass "false" value to this argument: ex. "-inference_only=false". diff --git a/samples/cpp/benchmark_app/benchmark_app.hpp b/samples/cpp/benchmark_app/benchmark_app.hpp index 17d99812e910e4..bfe84b846d509a 100644 --- a/samples/cpp/benchmark_app/benchmark_app.hpp +++ b/samples/cpp/benchmark_app/benchmark_app.hpp @@ -98,7 +98,9 @@ static const char layout_message[] = "For example, \"input1[NCHW],input2[NC]\" or \"[NCHW]\" in case of one input size."; /// @brief message for execution mode -static const char api_message[] = "Optional. Enable Sync/Async API. Default value is \"async\"."; +static const char api_message[] = + "Optional. Enable Sync/Async API. When hint is throughput, default value is \"async\". " + "When hint is latency, default value is \"sync\"."; /// @brief message for #streams for CPU inference static const char infer_num_streams_message[] = @@ -303,7 +305,7 @@ DEFINE_string(cache_dir, "", cache_dir_message); DEFINE_bool(load_from_file, false, load_from_file_message); /// @brief Define execution mode -DEFINE_string(api, "async", api_message); +DEFINE_string(api, "", api_message); /// @brief Number of infer requests in parallel DEFINE_uint64(nireq, 0, infer_requests_count_message); diff --git a/samples/cpp/benchmark_app/main.cpp b/samples/cpp/benchmark_app/main.cpp index 93ca527f51d325..2171ba1701af7a 100644 --- a/samples/cpp/benchmark_app/main.cpp +++ b/samples/cpp/benchmark_app/main.cpp @@ -58,6 +58,9 @@ bool parse_and_check_command_line(int argc, char* argv[]) { show_usage(); throw std::logic_error("The percentile value is incorrect. The applicable values range is [1, 100]."); } + if (FLAGS_api == "") { + FLAGS_api = FLAGS_hint == "latency" ? "sync" : "async"; + } if (FLAGS_api != "async" && FLAGS_api != "sync") { throw std::logic_error("Incorrect API. Please set -api option to `sync` or `async` value."); } diff --git a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py index 3e2cb75456a21b..1f476e6bf72ad5 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/benchmark.py @@ -16,7 +16,7 @@ def percentile(values, percent): class Benchmark: def __init__(self, device: str, number_infer_requests: int = 0, number_iterations: int = None, - duration_seconds: int = None, api_type: str = 'async', inference_only = None, + duration_seconds: int = None, api_type: str = '', inference_only = None, maximum_inference_rate: float = 0): self.device = device self.core = Core() diff --git a/tools/benchmark_tool/openvino/tools/benchmark/main.py b/tools/benchmark_tool/openvino/tools/benchmark/main.py index 9ed9a121e82541..228b1fb5f42b40 100755 --- a/tools/benchmark_tool/openvino/tools/benchmark/main.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/main.py @@ -49,6 +49,9 @@ def arg_not_empty(arg_value,empty_value): raise Exception("Cannot set precision for a compiled model. " \ "Please re-compile your model with required precision.") + if args.api_type == "": + args.api_type = "sync" if args.perf_hint == "latency" else "async" + if args.api_type == "sync": if args.time == 0 and (args.number_infer_requests > args.number_iterations): raise Exception("Number of infer requests should be less than or equal to number of iterations in sync mode.") diff --git a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py index bb339d5e5148b1..146e351681210d 100644 --- a/tools/benchmark_tool/openvino/tools/benchmark/parameters.py +++ b/tools/benchmark_tool/openvino/tools/benchmark/parameters.py @@ -108,8 +108,9 @@ def parse_args(): help="Optional. Enable model caching to specified directory") advs.add_argument('-lfile', '--load_from_file', required=False, nargs='?', default=argparse.SUPPRESS, help="Optional. Loads model from file directly without read_model.") - args.add_argument('-api', '--api_type', type=str, required=False, default='async', choices=['sync', 'async'], - help='Optional. Enable using sync/async API. Default value is async.') + args.add_argument('-api', '--api_type', type=str, required=False, default='', choices=['sync', 'async'], + help='Optional. Enable using sync/async API. When hint is throughput, default value is async. ' + 'When hint is latency, default value is sync.') advs.add_argument('-nireq', '--number_infer_requests', type=check_positive, required=False, default=0, help='Optional. Number of infer requests. Default value is determined automatically for device.') advs.add_argument('-nstreams', '--number_streams', type=str, required=False, default=None,