Skip to content

Commit

Permalink
Auto merge of #181 - cbarrick:iter-probe, r=Amanieu
Browse files Browse the repository at this point in the history
Refactor probing logic into an external iterator

This commit refactors the `RawTable::find` logic into an external iterator, and exposes a new method to create such an iterator.

My motivation was to implement a weak map on top of the raw API.

IIUC, the standard way to implement `insert` is to first call `RawTable::find` to look for an existing bucket. If one is found, replace the value. Otherwise call `RawTable::insert` to create a new bucket.

In a weak map, keys may expire. One optimization for `insert` is to reuse a bucket with an expired key. To do that, you need to keep track of expired buckets during the search stage. This isn't possible with `RawTable::find` because it only gives you access to the element, not the bucket.
  • Loading branch information
bors committed Jul 27, 2020
2 parents 0c2cda1 + 28fab73 commit 853d88d
Show file tree
Hide file tree
Showing 2 changed files with 82 additions and 16 deletions.
2 changes: 1 addition & 1 deletion src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
clippy::doc_markdown,
clippy::module_name_repetitions,
clippy::must_use_candidate,
clippy::option_if_let_else,
clippy::option_if_let_else
)]
#![warn(missing_docs)]
#![warn(rust_2018_idioms)]
Expand Down
96 changes: 81 additions & 15 deletions src/raw/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ cfg_if! {

mod bitmask;

use self::bitmask::BitMask;
use self::bitmask::{BitMask, BitMaskIter};
use self::imp::Group;

// Branch prediction hint. This is currently only available on nightly but it
Expand Down Expand Up @@ -938,23 +938,14 @@ impl<T> RawTable<T> {
#[inline]
pub fn find(&self, hash: u64, mut eq: impl FnMut(&T) -> bool) -> Option<Bucket<T>> {
unsafe {
for pos in self.probe_seq(hash) {
let group = Group::load(self.ctrl(pos));
for bit in group.match_byte(h2(hash)) {
let index = (pos + bit) & self.bucket_mask;
let bucket = self.bucket(index);
if likely(eq(bucket.as_ref())) {
return Some(bucket);
}
}
if likely(group.match_empty().any_bit_set()) {
return None;
for bucket in self.iter_hash(hash) {
let elm = bucket.as_ref();
if likely(eq(elm)) {
return Some(bucket);
}
}
None
}

// probe_seq never returns.
unreachable!();
}

/// Returns the number of elements the map can hold without reallocating.
Expand Down Expand Up @@ -1004,6 +995,18 @@ impl<T> RawTable<T> {
}
}

/// Returns an iterator over occupied buckets that could match a given hash.
///
/// In rare cases, the iterator may return a bucket with a different hash.
///
/// It is up to the caller to ensure that the `RawTable` outlives the
/// `RawIterHash`. Because we cannot make the `next` method unsafe on the
/// `RawIterHash` struct, we have to make the `iter_hash` method unsafe.
#[cfg_attr(feature = "inline-more", inline)]
pub unsafe fn iter_hash(&self, hash: u64) -> RawIterHash<'_, T> {
RawIterHash::new(self, hash)
}

/// Returns an iterator which removes all elements from the table without
/// freeing the memory.
///
Expand Down Expand Up @@ -1737,3 +1740,66 @@ impl<T> Iterator for RawDrain<'_, T> {

impl<T> ExactSizeIterator for RawDrain<'_, T> {}
impl<T> FusedIterator for RawDrain<'_, T> {}

/// Iterator over occupied buckets that could match a given hash.
///
/// In rare cases, the iterator may return a bucket with a different hash.
pub struct RawIterHash<'a, T> {
table: &'a RawTable<T>,

// The top 7 bits of the hash.
h2_hash: u8,

// The sequence of groups to probe in the search.
probe_seq: ProbeSeq,

// The current group and its position.
pos: usize,
group: Group,

// The elements within the group with a matching h2-hash.
bitmask: BitMaskIter,
}

impl<'a, T> RawIterHash<'a, T> {
fn new(table: &'a RawTable<T>, hash: u64) -> Self {
unsafe {
let h2_hash = h2(hash);
let mut probe_seq = table.probe_seq(hash);
let pos = probe_seq.next().unwrap();
let group = Group::load(table.ctrl(pos));
let bitmask = group.match_byte(h2_hash).into_iter();

RawIterHash {
table,
h2_hash,
probe_seq,
pos,
group,
bitmask,
}
}
}
}

impl<'a, T> Iterator for RawIterHash<'a, T> {
type Item = Bucket<T>;

fn next(&mut self) -> Option<Bucket<T>> {
unsafe {
loop {
if let Some(bit) = self.bitmask.next() {
let index = (self.pos + bit) & self.table.bucket_mask;
let bucket = self.table.bucket(index);
return Some(bucket);
}
if likely(self.group.match_empty().any_bit_set()) {
return None;
}
self.pos = self.probe_seq.next().unwrap();
self.group = Group::load(self.table.ctrl(self.pos));
self.bitmask = self.group.match_byte(self.h2_hash).into_iter();
}
}
}
}

0 comments on commit 853d88d

Please sign in to comment.