Skip to content

Commit

Permalink
Use size to help decide whether to invalidate files
Browse files Browse the repository at this point in the history
  • Loading branch information
wlandau-lilly committed Jun 2, 2019
1 parent 7e4e695 commit 9793232
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 60 deletions.
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ These changes are technically breaking changes, but they should only affect adva
- Log the name of the calling function in the console log file, e.g. "begin make()" and "end make()". Applies to all functions that accept a `config` argument.
- Memory management: set `use_cache` to `FALSE` in `storr` function calls for saving and loading targets. Also, at the end of `make()`, call `flush_cache()` (and then `gc()` if garbage collection is enabled).
- Mention `callr::r()` within commands as [a safe alternative to `lock_envir = FALSE`](https://github.com/rstudio/gt/issues/297#issuecomment-497778735) in the self-invalidation section of the `make()` help file.
- Use file size to help decide when to rehash `file_in()`/`file_out()`/`knitr_in()` files. We now rehash files if the file is less than 100 KB or the time stamp changed or the **file size** changed.

# Version 7.3.0

Expand Down
27 changes: 21 additions & 6 deletions R/exec-meta.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,9 @@ drake_meta_ <- function(target, config) {
}
# For imported files.
if (meta$isfile) {
meta$mtime <- storage_mtime(decode_path(target, config))
path <- decode_path(target, config)
meta$mtime <- storage_mtime(path)
meta$size <- storage_size(path)
}
if (meta$trigger$command) {
meta$command <- layout$command_standardized
Expand Down Expand Up @@ -173,11 +175,14 @@ should_rehash_storage <- function(
filename,
size_cutoff,
new_mtime,
old_mtime
old_mtime,
new_size,
old_size
) {
small <- (storage_size(filename) < size_cutoff) %||NA% TRUE
small <- (new_size < size_cutoff) %||NA% TRUE
touched <- (new_mtime > old_mtime) %||NA% TRUE
small || touched
resized <- (abs(new_size - old_size) > drake_tol) %||NA% TRUE
small || touched || resized
}

storage_hash <- function(
Expand All @@ -202,7 +207,9 @@ storage_hash <- function(
filename = filename,
size_cutoff = size_cutoff,
new_mtime = storage_mtime(filename),
old_mtime = meta$mtime %||% -Inf
old_mtime = meta$mtime %||% -Inf,
new_size = storage_size(filename),
old_size = meta$size
)
ifelse (
should_rehash,
Expand All @@ -223,7 +230,15 @@ storage_size <- function(x) {
if (dir.exists(x)) {
dir_size(x)
} else {
file_size(x)
}
}

file_size <- function(x) {
if (file.exists(x)) {
file.size(x)
} else {
NA_real_
}
}

Expand All @@ -247,6 +262,6 @@ dir_size <- function(x) {
recursive = TRUE,
include.dirs = FALSE
)
sizes <- vapply(files, file.size, FUN.VALUE = numeric(1))
sizes <- vapply(files, file_size, FUN.VALUE = numeric(1))
max(sizes %||% 0)
}
3 changes: 3 additions & 0 deletions R/utils-utils.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# From testthat
drake_tol <- .Machine$double.eps ^ 0.5

# From lintr
`%||%` <- function(x, y) {
if (is.null(x) || length(x) <= 0) {
Expand Down
119 changes: 65 additions & 54 deletions tests/testthat/test-hash.R
Original file line number Diff line number Diff line change
Expand Up @@ -48,60 +48,71 @@ test_with_dir("same with a directory", {
}
})

test_with_dir("stress test hashing decisions", {
test_with_dir("hashing decisions", {
skip_on_cran() # CRAN gets whitelist tests only (check time limits).
file <- "nobodyhome"
expect_false(file.exists(file))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = -1))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = -1))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = -1))
})

test_with_dir("more stress testing of hashing decisions", {
skip_on_cran() # CRAN gets whitelist tests only (check time limits).
file <- "input.rds"
saveRDS(1, file = file)
expect_true(file.exists(file))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = -1))
expect_false(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = -1))
expect_false(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = -1))
})

test_with_dir("same with a directory", {
skip_on_cran() # CRAN gets whitelist tests only (check time limits).
file <- "myinputdir"
dir.create(file)
writeLines("12sddsfff3", "myinputdir/a.txt")
writeLines("4sdfasdf56", "myinputdir/b.txt")
expect_true(file.exists(file))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = Inf))
expect_true(should_rehash_storage(
filename = file, new_mtime = 1, old_mtime = 0, size_cutoff = -1))
expect_false(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 1, size_cutoff = -1))
expect_false(should_rehash_storage(
filename = file, new_mtime = 0, old_mtime = 0, size_cutoff = -1))
expect_true(
should_rehash_storage(
filename = file,
new_mtime = 0,
old_mtime = 0,
old_size = 0,
new_size = 0,
size_cutoff = Inf
)
)
for (i in c(0, 1)) {
expect_false(
should_rehash_storage(
filename = file,
new_mtime = 0,
old_mtime = i,
old_size = 0,
new_size = 0,
size_cutoff = -Inf
)
)
expect_true(
should_rehash_storage(
filename = file,
new_mtime = 0,
old_mtime = i,
old_size = 0,
new_size = 0,
size_cutoff = Inf
)
)
}
for (s in c(-Inf, Inf)) {
expect_true(
should_rehash_storage(
filename = file,
new_mtime = 1,
old_mtime = 0,
old_size = 0,
new_size = 0,
size_cutoff = s
)
)
expect_true(
should_rehash_storage(
filename = file,
new_mtime = 0,
old_mtime = 0,
old_size = 1,
new_size = 0,
size_cutoff = s
)
)
expect_true(
should_rehash_storage(
filename = file,
new_mtime = 0,
old_mtime = 0,
old_size = 0,
new_size = 1,
size_cutoff = s
)
)
}
})

0 comments on commit 9793232

Please sign in to comment.