Skip to content
This repository has been archived by the owner on Nov 6, 2020. It is now read-only.

[ethash] remove mem::uninitialized #10861

Merged
merged 17 commits into from
Jul 12, 2019
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions ethash/benches/basic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,12 @@ extern crate ethash;
use criterion::Criterion;
use ethash::{NodeCacheBuilder, OptimizeFor};

const HASH: [u8; 32] = [0xf5, 0x7e, 0x6f, 0x3a, 0xcf, 0xc0, 0xdd, 0x4b, 0x5b, 0xf2, 0xbe,
0xe4, 0x0a, 0xb3, 0x35, 0x8a, 0xa6, 0x87, 0x73, 0xa8, 0xd0, 0x9f,
0x5e, 0x59, 0x5e, 0xab, 0x55, 0x94, 0x05, 0x52, 0x7d, 0x72];
const HASH: [u8; 32] = [
0xf5, 0x7e, 0x6f, 0x3a, 0xcf, 0xc0, 0xdd, 0x4b,
0x5b, 0xf2, 0xbe, 0xe4, 0x0a, 0xb3, 0x35, 0x8a,
0xa6, 0x87, 0x73, 0xa8, 0xd0, 0x9f, 0x5e, 0x59,
0x5e, 0xab, 0x55, 0x94, 0x05, 0x52, 0x7d, 0x72,
];
const NONCE: u64 = 0xd7b3ac70a301a249;

criterion_group!(
Expand Down Expand Up @@ -52,13 +55,13 @@ fn bench_light_compute_memory(b: &mut Criterion) {
let builder = NodeCacheBuilder::new(OptimizeFor::Cpu, u64::max_value());
let light = builder.light(&env::temp_dir(), 486382);

b.bench_function("bench_light_compute_memmap", move |b| b.iter(|| light.compute(&HASH, NONCE, u64::max_value())));
b.bench_function("bench_light_compute_memory", move |b| b.iter(|| light.compute(&HASH, NONCE, u64::max_value())));
}

fn bench_light_new_round_trip_memmap(b: &mut Criterion) {
use std::env;

b.bench_function("bench_light_compute_memmap", move |b| b.iter(|| {
b.bench_function("bench_light_new_round_trip_memmap", move |b| b.iter(|| {
let builder = NodeCacheBuilder::new(OptimizeFor::Memory, u64::max_value());
let light = builder.light(&env::temp_dir(), 486382);
light.compute(&HASH, NONCE, u64::max_value());
Expand All @@ -68,7 +71,7 @@ fn bench_light_new_round_trip_memmap(b: &mut Criterion) {
fn bench_light_new_round_trip_memory(b: &mut Criterion) {
use std::env;

b.bench_function("bench_light_compute_memmap", move |b| b.iter(|| {
b.bench_function("bench_light_new_round_trip_memory", move |b| b.iter(|| {
let builder = NodeCacheBuilder::new(OptimizeFor::Cpu, u64::max_value());
let light = builder.light(&env::temp_dir(), 486382);
light.compute(&HASH, NONCE, u64::max_value());
Expand All @@ -86,7 +89,7 @@ fn bench_light_from_file_round_trip_memory(b: &mut Criterion) {
dummy.to_file().unwrap();
}

b.bench_function("bench_light_compute_memmap", move |b| b.iter(|| {
b.bench_function("bench_light_from_file_round_trip_memory", move |b| b.iter(|| {
let builder = NodeCacheBuilder::new(OptimizeFor::Cpu, u64::max_value());
let light = builder.light_from_file(&dir, 486382).unwrap();
light.compute(&HASH, NONCE, u64::max_value());
Expand All @@ -105,7 +108,7 @@ fn bench_light_from_file_round_trip_memmap(b: &mut Criterion) {
dummy.to_file().unwrap();
}

b.bench_function("bench_light_compute_memmap", move |b| b.iter(|| {
b.bench_function("bench_light_from_file_round_trip_memmap", move |b| b.iter(|| {
let builder = NodeCacheBuilder::new(OptimizeFor::Memory, u64::max_value());
let light = builder.light_from_file(&dir, 486382).unwrap();
light.compute(&HASH, NONCE, u64::max_value());
Expand Down
43 changes: 24 additions & 19 deletions ethash/src/compute.rs
Original file line number Diff line number Diff line change
Expand Up @@ -141,17 +141,22 @@ pub fn quick_get_difficulty(header_hash: &H256, nonce: u64, mix_hash: &H256, pro
//
// This cannot be elided by the compiler as it doesn't know the implementation of
// `keccak_512`.
let mut buf: [u8; 64 + 32] = mem::uninitialized();
let mut buf = mem::MaybeUninit::<[u8; 64 + 32]>::uninit();

ptr::copy_nonoverlapping(header_hash.as_ptr(), buf.as_mut_ptr(), 32);
ptr::copy_nonoverlapping(&nonce as *const u64 as *const u8, buf[32..].as_mut_ptr(), 8);
let buf_ptr = buf.as_mut_ptr() as *mut u8;
ptr::copy_nonoverlapping(header_hash.as_ptr(), buf_ptr, 32);
ptr::copy_nonoverlapping(&nonce as *const u64 as *const u8, buf_ptr.offset(32), 8);

keccak_512::unchecked(buf.as_mut_ptr(), 64, buf.as_ptr(), 40);
ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf[64..].as_mut_ptr(), 32);
keccak_512::unchecked(buf_ptr, 64, buf_ptr, 40);
ptr::copy_nonoverlapping(mix_hash.as_ptr(), buf_ptr.offset(64), 32);

// This is initialized in `keccak_256`
let mut hash: [u8; 32] = mem::uninitialized();
keccak_256::unchecked(hash.as_mut_ptr(), hash.len(), buf.as_ptr(), buf.len());
let buf = buf.assume_init();

// This is initialized in `keccak_256` below.
let mut hash = mem::MaybeUninit::<[u8; 32]>::uninit();
keccak_256::unchecked(hash.as_mut_ptr() as *mut u8, 32, buf.as_ptr(), buf.len());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you extract 32 to a constant so you don't have to ensure that the 32 in the definition of hash and the 32 in the unchecked call are the same? Before you could do .len but that's now impossible because of MaybeUninit.


let hash = hash.assume_init();

hash
}
Expand Down Expand Up @@ -208,27 +213,27 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64)
// We explicitly write the first 40 bytes, leaving the last 24 as uninitialized. Then
// `keccak_512` reads the first 40 bytes (4th parameter) and overwrites the entire array,
// leaving it fully initialized.
let mut out: [u8; NODE_BYTES] = mem::uninitialized();
let mut out = mem::MaybeUninit::<[u8; NODE_BYTES]>::uninit();
let out_ptr = out.as_mut_ptr() as *mut u8;

ptr::copy_nonoverlapping(header_hash.as_ptr(), out.as_mut_ptr(), header_hash.len());
ptr::copy_nonoverlapping(header_hash.as_ptr(), out_ptr, header_hash.len());
ptr::copy_nonoverlapping(
&nonce as *const u64 as *const u8,
out[header_hash.len()..].as_mut_ptr(),
out_ptr.add(header_hash.len()),
mem::size_of::<u64>(),
);

// compute keccak-512 hash and replicate across mix
keccak_512::unchecked(
out.as_mut_ptr(),
out_ptr,
NODE_BYTES,
out.as_ptr(),
out_ptr,
header_hash.len() + mem::size_of::<u64>(),
);

Node { bytes: out }
Node { bytes: out.assume_init() }
},
// This is fully initialized before being read, see `let mut compress = ...` below
compress_bytes: unsafe { mem::uninitialized() },
compress_bytes: [0u8; MIX_WORDS],
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❯ cargo bench --features=bench -- compute
bench_light_compute_memmap                                                                            
                        time:   [980.59 us 986.10 us 992.69 us]
                        change: [-3.7744% -1.4892% +0.5632%] (p = 0.20 > 0.05)
                        No change in performance detected.
Found 6 outliers among 100 measurements (6.00%)
  4 (4.00%) high mild
  2 (2.00%) high severe

bench_light_compute_memory                                                                             
                        time:   [1.0336 ms 1.0410 ms 1.0493 ms]
                        change: [-3.9993% -2.0714% +0.0881%] (p = 0.05 > 0.05)
                        No change in performance detected.
Found 9 outliers among 100 measurements (9.00%)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Amazing, must have been part of one of the LLVM upgrades since this code was written. I absolutely benchmarked it before and 0-initialising it was significantly slower. Wonder if there's anywhere else where we can remove uninit entirely now.

};

let mut mix: [_; MIX_NODES] = [buf.half_mix.clone(), buf.half_mix.clone()];
Expand Down Expand Up @@ -277,9 +282,9 @@ fn hash_compute(light: &Light, full_size: usize, header_hash: &H256, nonce: u64)
let mix_words: [u32; MIX_WORDS] = unsafe { mem::transmute(mix) };

{
// This is an uninitialized buffer to begin with, but we iterate precisely `compress.len()`
// times and set each index, leaving the array fully initialized. THIS ONLY WORKS ON LITTLE-
// ENDIAN MACHINES. See a future PR to make this and the rest of the code work correctly on
// We iterate precisely `compress.len()` times and set each index,
// leaving the array fully initialized. THIS ONLY WORKS ON LITTLE-ENDIAN MACHINES.
// See a future PR to make this and the rest of the code work correctly on
// big-endian arches like mips.
let compress: &mut [u32; MIX_WORDS / 4] =
unsafe { make_const_array!(MIX_WORDS / 4, &mut buf.compress_bytes) };
Expand Down