Skip to content
This repository has been archived by the owner on Jan 22, 2025. It is now read-only.

Adds cache hash data deletion policy enum #34956

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 12 additions & 7 deletions accounts-db/src/accounts_db.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,9 @@ use {
append_vec::{
aligned_stored_size, AppendVec, APPEND_VEC_MMAPPED_FILES_OPEN, STORE_META_OVERHEAD,
},
cache_hash_data::{CacheHashData, CacheHashDataFileReference},
cache_hash_data::{
CacheHashData, CacheHashDataFileReference, DeletionPolicy as CacheHashDeletionPolicy,
},
contains::Contains,
epoch_accounts_hash::EpochAccountsHashManager,
in_mem_accounts_index::StartupStats,
Expand Down Expand Up @@ -7549,10 +7551,13 @@ impl AccountsDb {
_ = std::fs::remove_dir_all(&failed_dir);
failed_dir
};
CacheHashData::new(
accounts_hash_cache_path,
(kind == CalcAccountsHashKind::Incremental).then_some(storages_start_slot),
)
let deletion_policy = match kind {
CalcAccountsHashKind::Full => CacheHashDeletionPolicy::AllUnused,
CalcAccountsHashKind::Incremental => {
CacheHashDeletionPolicy::UnusedAtLeast(storages_start_slot)
}
};
CacheHashData::new(accounts_hash_cache_path, deletion_policy)
}

// modeled after calculate_accounts_delta_hash
Expand Down Expand Up @@ -9775,7 +9780,7 @@ pub mod tests {
let temp_dir = TempDir::new().unwrap();
let accounts_hash_cache_path = temp_dir.path().to_path_buf();
self.scan_snapshot_stores_with_cache(
&CacheHashData::new(accounts_hash_cache_path, None),
&CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
storage,
stats,
bins,
Expand Down Expand Up @@ -10843,7 +10848,7 @@ pub mod tests {
};

let result = accounts_db.scan_account_storage_no_bank(
&CacheHashData::new(accounts_hash_cache_path, None),
&CacheHashData::new(accounts_hash_cache_path, CacheHashDeletionPolicy::AllUnused),
&CalcAccountsHashConfig::default(),
&get_storage_refs(&[storage]),
test_scan,
Expand Down
56 changes: 36 additions & 20 deletions accounts-db/src/cache_hash_data.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,8 +193,7 @@ impl CacheHashDataFile {
pub(crate) struct CacheHashData {
cache_dir: PathBuf,
pre_existing_cache_files: Arc<Mutex<HashSet<PathBuf>>>,
/// Decides which old cache files to delete. See `delete_old_cache_files()` for more info.
storages_start_slot: Option<Slot>,
deletion_policy: DeletionPolicy,
pub stats: Arc<CacheHashDataStats>,
}

Expand All @@ -206,15 +205,15 @@ impl Drop for CacheHashData {
}

impl CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, storages_start_slot: Option<Slot>) -> CacheHashData {
pub(crate) fn new(cache_dir: PathBuf, deletion_policy: DeletionPolicy) -> CacheHashData {
std::fs::create_dir_all(&cache_dir).unwrap_or_else(|err| {
panic!("error creating cache dir {}: {err}", cache_dir.display())
});

let result = CacheHashData {
cache_dir,
pre_existing_cache_files: Arc::new(Mutex::new(HashSet::default())),
storages_start_slot,
deletion_policy,
stats: Arc::default(),
};

Expand All @@ -229,21 +228,24 @@ impl CacheHashData {
let mut old_cache_files =
std::mem::take(&mut *self.pre_existing_cache_files.lock().unwrap());

// If `storages_start_slot` is None, we're doing a full accounts hash calculation, and thus
// all unused cache files can be deleted.
// If `storages_start_slot` is Some, we're doing an incremental accounts hash calculation,
// and we only want to delete the unused cache files *that IAH considered*.
if let Some(storages_start_slot) = self.storages_start_slot {
old_cache_files.retain(|old_cache_file| {
let Some(parsed_filename) = parse_filename(old_cache_file) else {
// if parsing the cache filename fails, we *do* want to delete it
return true;
};

// if the old cache file is in the incremental accounts hash calculation range,
// then delete it
parsed_filename.slot_range_start >= storages_start_slot
});
match self.deletion_policy {
DeletionPolicy::AllUnused => {
// no additional work to do here; we will delete everything in `old_cache_files`
}
DeletionPolicy::UnusedAtLeast(storages_start_slot) => {
// when calculating an incremental accounts hash, we only want to delete the unused
// cache files *that IAH considered*
old_cache_files.retain(|old_cache_file| {
let Some(parsed_filename) = parse_filename(old_cache_file) else {
// if parsing the cache filename fails, we *do* want to delete it
return true;
};

// if the old cache file is in the incremental accounts hash calculation range,
// then delete it
parsed_filename.slot_range_start >= storages_start_slot
});
}
}

if !old_cache_files.is_empty() {
Expand Down Expand Up @@ -410,6 +412,19 @@ fn parse_filename(cache_filename: impl AsRef<Path>) -> Option<ParsedFilename> {
})
}

/// Decides which old cache files to delete
///
/// See `delete_old_cache_files()` for more info.
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum DeletionPolicy {
/// Delete *all* the unused cache files
/// Should be used when calculating full accounts hash
AllUnused,
/// Delete *only* the unused cache files with starting slot range *at least* this slot
/// Should be used when calculating incremental accounts hash
UnusedAtLeast(Slot),
}

#[cfg(test)]
mod tests {
use {super::*, crate::accounts_hash::AccountHash, rand::Rng};
Expand Down Expand Up @@ -477,7 +492,8 @@ mod tests {
data_this_pass.push(this_bin_data);
}
}
let cache = CacheHashData::new(cache_dir.clone(), None);
let cache =
CacheHashData::new(cache_dir.clone(), DeletionPolicy::AllUnused);
let file_name = PathBuf::from("test");
cache.save(&file_name, &data_this_pass).unwrap();
cache.get_cache_files();
Expand Down
Loading