From 67a34e4cac9561213e806d4a55163f6706b462ba Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Thu, 11 Jan 2018 13:00:55 +0530 Subject: [PATCH 1/4] Initial text for benchmarking RFC --- text/0000-benchmarking.md | 159 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 159 insertions(+) create mode 100644 text/0000-benchmarking.md diff --git a/text/0000-benchmarking.md b/text/0000-benchmarking.md new file mode 100644 index 00000000000..74c47b02897 --- /dev/null +++ b/text/0000-benchmarking.md @@ -0,0 +1,159 @@ +- Feature Name: benchmarking +- Start Date: 2018-01-11 +- RFC PR: (leave this empty) +- Rust Issue: (leave this empty) + +# Summary +[summary]: #summary + +This aims to stabilize basic benchmarking tools for a stable `cargo bench` + +# Motivation +[motivation]: #motivation + +Benchmarking is important for maintaining good libraries. They give us a clear idea of performance tradeoffs +and make it easier to pick the best library for the job. They also help people keep track of performance regressions, +and aid in finding and fixing performance bottlenecks. + +# Guide-level explanation +[guide-level-explanation]: #guide-level-explanation + +You can write benchmarks much like tests; using a `#[bench]` annotation in your library code or in a +dedicated file under `benches/`. You can also use `[[bench]]` entries in your `Cargo.toml` to place +it in a custom location. + + +A benchmarking function looks like this: + +```rust +use std::test::Bencher; + +#[bench] +fn my_benchmark(bench: &mut Bencher) { + let x = do_some_setup(); + bench.iter(|| x.compute_thing()); + x.teardown(); +} +``` + +`Bencher::iter` is where the actual code being benchmarked is placed. It will run the +test multiple times until it has a clear idea of what the average time taken is, +and the variance. + +The benchmark can be run with `cargo bench`. + +To ensure that the compiler doesn't optimize things away, use `test::black_box`. +The following code will show very little time taken because of optimizations, because +the optimizer knows the input at compile time and can do some of the computations beforehand. + +```rust +use std::test::Bencher; + +fn pow(x: u32, y: u32) -> u32 { + if y == 0 { + 1 + } else { + x * pow(x, y - 1) + } +} + +#[bench] +fn my_benchmark(bench: &mut Bencher) { + bench.iter(|| pow(4, 30)); +} +``` + +``` +running 1 test +test my_benchmark ... bench: 4 ns/iter (+/- 0) + +test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured; 0 filtered out +``` + +However, via `test::black_box`, we can blind the optimizer to the input values, +so that it does not attempt to use them to optimize the code: + +```rust +#[bench] +fn my_benchmark(bench: &mut Bencher) { + let x = test::black_box(4); + let y = test::black_box(30); + bench.iter(|| pow(x, y)); +} +``` + +``` +running 1 test +test my_benchmark ... bench: 11 ns/iter (+/- 2) + +test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured; 0 filtered out +``` + +Any result that is yielded from the callback for `Bencher::iter()` is also +black boxed; otherwise, the compiler might notice that the result is unused and +optimize out the entire computation. + +In case you are generating unused values that do not get returned from the callback, +use `black_box()` on them as well: + +```rust +#[bench] +fn my_benchmark(bench: &mut Bencher) { + let x = test::black_box(4); + let y = test::black_box(30); + bench.iter(|| { + black_box(pow(y, x)); + pow(x, y) + }); +} +``` + +# Reference-level explanation +[reference-level-explanation]: #reference-level-explanation + +The bencher reports the median value and deviation (difference between min and max). +Samples are [winsorized], so extreme outliers get clamped. + +Avoid calling `iter` multiple times in a benchmark; each call wipes out the previously +collected data. + +`cargo bench` essentially takes the same flags as `cargo test`, except it has a `--bench foo` +flag to select a single benchmark target. + + + [winsorized]: https://en.wikipedia.org/wiki/Winsorizing + +# Drawbacks +[drawbacks]: #drawbacks + +The reason we haven't stabilized this so far is basically because we're hoping to have a custom test +framework system, so that the bencher can be written as a crate. This is still an alternative, though +there has been no movement on this front in years. + +# Rationale and alternatives +[alternatives]: #alternatives + +This design works. It doesn't give you fine grained tools for analyzing results, but it's +a basic building block that lets one do most benchmarking tasks. The alternatives include +a custom test/bench framework, which is much more holistic, or exposing more +fundamental building blocks. + +Another possible API would be one which implicitly handles the black boxing, something +like + +```rust +let input1 = foo(); +let input2 = bar(); +bencher.iter(|(input1, input2)| baz(input1, input2), (input1, input2)) +``` + +This has problems with the types not being Copy, and it feels a bit less flexible. + +# Unresolved questions +[unresolved]: #unresolved-questions + +- Should stuff be in `std::test` or a partially-stabilized `libtest`? +- Should we stabilize any other `Bencher` methods (like `run_once`)? +- Stable machine-readable output for this would be nice, but can be done in a separate RFC. +- `test::black_box` can instead be `mem::black_box` + From cf634453b2d532fe00b6f138fa021e12bf29c702 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Fri, 19 Jan 2018 12:23:13 +0530 Subject: [PATCH 2/4] test::black_box -> mem::black_box --- text/0000-benchmarking.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/text/0000-benchmarking.md b/text/0000-benchmarking.md index 74c47b02897..36b72040cd2 100644 --- a/text/0000-benchmarking.md +++ b/text/0000-benchmarking.md @@ -70,15 +70,15 @@ test my_benchmark ... bench: 4 ns/iter (+/- 0) test result: ok. 0 passed; 0 failed; 0 ignored; 1 measured; 0 filtered out ``` -However, via `test::black_box`, we can blind the optimizer to the input values, +However, via `mem::black_box`, we can blind the optimizer to the input values, so that it does not attempt to use them to optimize the code: ```rust #[bench] -fn my_benchmark(bench: &mut Bencher) { - let x = test::black_box(4); - let y = test::black_box(30); - bench.iter(|| pow(x, y)); +fn my_benchmark(bench: Bencher) -> BenchResult { + let x = mem::black_box(4); + let y = mem::black_box(30); + bench.iter(|| pow(x, y)) } ``` @@ -99,8 +99,8 @@ use `black_box()` on them as well: ```rust #[bench] fn my_benchmark(bench: &mut Bencher) { - let x = test::black_box(4); - let y = test::black_box(30); + let x = mem::black_box(4); + let y = mem::black_box(30); bench.iter(|| { black_box(pow(y, x)); pow(x, y) @@ -155,5 +155,4 @@ This has problems with the types not being Copy, and it feels a bit less flexibl - Should stuff be in `std::test` or a partially-stabilized `libtest`? - Should we stabilize any other `Bencher` methods (like `run_once`)? - Stable machine-readable output for this would be nice, but can be done in a separate RFC. -- `test::black_box` can instead be `mem::black_box` From 10dde7e81e39fdedb1ca9ae571577f1f0cda870f Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Fri, 19 Jan 2018 12:25:53 +0530 Subject: [PATCH 3/4] make iter consume Bencher and output a BenchResult --- text/0000-benchmarking.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/text/0000-benchmarking.md b/text/0000-benchmarking.md index 36b72040cd2..0b008467004 100644 --- a/text/0000-benchmarking.md +++ b/text/0000-benchmarking.md @@ -26,13 +26,14 @@ it in a custom location. A benchmarking function looks like this: ```rust -use std::test::Bencher; +use std::test::{Bencher, BenchResult}; #[bench] -fn my_benchmark(bench: &mut Bencher) { +fn my_benchmark(bench: Bencher) -> BenchResult { let x = do_some_setup(); - bench.iter(|| x.compute_thing()); + let result = bench.iter(|| x.compute_thing()); x.teardown(); + result } ``` @@ -58,7 +59,7 @@ fn pow(x: u32, y: u32) -> u32 { } #[bench] -fn my_benchmark(bench: &mut Bencher) { +fn my_benchmark(bench: Bencher) -> BenchResult { bench.iter(|| pow(4, 30)); } ``` @@ -74,6 +75,9 @@ However, via `mem::black_box`, we can blind the optimizer to the input values, so that it does not attempt to use them to optimize the code: ```rust +use std::mem; +use std::test::{Bencher, BenchResult}; + #[bench] fn my_benchmark(bench: Bencher) -> BenchResult { let x = mem::black_box(4); @@ -97,8 +101,7 @@ In case you are generating unused values that do not get returned from the callb use `black_box()` on them as well: ```rust -#[bench] -fn my_benchmark(bench: &mut Bencher) { +fn my_benchmark(bench: Bencher) -> BenchResult { let x = mem::black_box(4); let y = mem::black_box(30); bench.iter(|| { @@ -114,9 +117,6 @@ fn my_benchmark(bench: &mut Bencher) { The bencher reports the median value and deviation (difference between min and max). Samples are [winsorized], so extreme outliers get clamped. -Avoid calling `iter` multiple times in a benchmark; each call wipes out the previously -collected data. - `cargo bench` essentially takes the same flags as `cargo test`, except it has a `--bench foo` flag to select a single benchmark target. From b37b88f794dcf2e9a17a78f21f77dce6b957044c Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Fri, 19 Jan 2018 12:26:11 +0530 Subject: [PATCH 4/4] Add iter_n --- text/0000-benchmarking.md | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/text/0000-benchmarking.md b/text/0000-benchmarking.md index 0b008467004..2714bad3d78 100644 --- a/text/0000-benchmarking.md +++ b/text/0000-benchmarking.md @@ -60,7 +60,7 @@ fn pow(x: u32, y: u32) -> u32 { #[bench] fn my_benchmark(bench: Bencher) -> BenchResult { - bench.iter(|| pow(4, 30)); + bench.iter(|| pow(4, 30)) } ``` @@ -107,7 +107,16 @@ fn my_benchmark(bench: Bencher) -> BenchResult { bench.iter(|| { black_box(pow(y, x)); pow(x, y) - }); + }) +} +``` + +In case you want the benchmark to run for a predetermined number of times, use `iter_n`: + +```rust +#[bench] +fn my_benchmark(bench: Bencher) -> BenchResult { + bench.iter_n(1000, || do_some_stuff()); } ```