-
Notifications
You must be signed in to change notification settings - Fork 2
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
💾 We can probably reduce the memory footprint (alloc read & parsed size) #52
Comments
=> suggested fix for the moment: use |
r$> # Set up -----------------------------------------------------------------------
library("opusreader2")
path <- file.path("data", "spectra", "2018-NABO")
opus_files <- list.files(
path = path, pattern = "\\.\\d+$", full.names = TRUE, recursive = TRUE
)
# number of OPUS files
length(opus_files)
# Benchmark single-threaded reading (standard) ---------------------------------
bnch_seq <- bench::mark(
data <- opusreader2::read_opus(dsn = path)
)
bnch_seq
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc total…⁴ result memory
<bch:expr> <bch> <bch:> <dbl> <bch:b> <dbl> <int> <dbl> <bch:t> <list> <list>
1 data <- opusreader2::read_opus(dsn = path) 21.1s 21.1s 0.0475 8.32GB 4.32 1 91 21.1s <list> <Rprofmem>
# … with 2 more variables: time <list>, gc <list>, and abbreviated variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`,
# ⁴total_time
# ℹ Use `colnames()` to see all variable names
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> # memory sizes on disk for files, memory size
total_filesize_mb <- sum(sapply(opus_files, file.size)) / 1000^2
parsed_data_size <- unclass(pryr::object_size(data)) / 1000^2
read_alloc_memory <- unclass(bnch_seq$mem_alloc) / 1000^2
# comparson of file size and parsed size
mem_comp_seq <- tibble::tribble(
~total_file_size_MB, ~read_alloc_memory_in_MB, ~parsed_size_in_MB,
total_filesize_mb, read_alloc_memory, parsed_data_size
)
mem_comp_seq
# A tibble: 1 × 3
total_file_size_MB read_alloc_memory_in_MB parsed_size_in_MB
<dbl> <dbl> <dbl>
1 490. 8930. 2104.
r$> # Benchmark asynchronous (multisession) reading --------------------------------
library("future")
availableCores() # 8 threads := 4 cores
plan(multisession(workers = 4))
# does chunk the file list according to dsn into number of `workers` registered
# in future
bnch_multis4 <- bench::mark(
data_multis4 <- opusreader2::read_opus(dsn = path, parallel = TRUE)
)
Loading required package: future.apply
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> bnch_multis4
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc
<bch:expr> <bch:> <bch:> <dbl> <bch:b> <dbl> <int> <dbl>
1 data_multis4 <- opusreader2::read_opus(dsn = path, parallel = TRUE) 8.92s 8.92s 0.112 2.65GB 0.112 1 1
# … with 5 more variables: total_time <bch:tm>, result <list>, memory <list>, time <list>, gc <list>, and abbreviated
# variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`
# ℹ Use `colnames()` to see all variable names
r$> parsed_data_size_multis4 <- unclass(pryr::object_size(data_multis4)) / 1000^2
read_alloc_memory_multis4 <- unclass(bnch_multis4$mem_alloc) / 1000^2
mem_comp_multis4 <- tibble::tribble(
~total_file_size_MB, ~read_alloc_memory_in_MB, ~parsed_size_in_MB,
total_filesize_mb, read_alloc_memory_multis4, parsed_data_size_multis4
)
mem_comp_multis4
# A tibble: 1 × 3
total_file_size_MB read_alloc_memory_in_MB parsed_size_in_MB
<dbl> <dbl> <dbl>
1 490. 2846. 3076.
r$> # Benchmark asynchronous (multicore) reading -----------------------------------
# uses forking of processes; only supported on Linux and MacOS
# also not recommended in RStudio (other the terminal or another editor like
# VSCode)
library("future")
plan(multicore(workers = 4))
# does chunk the file list according to dsn into number of `workers` registered
# in future
bnch_multic4 <- bench::mark(
data_multic4 <- opusreader2::read_opus(dsn = path, parallel = TRUE),
memory = FALSE # If you are benchmarking parallel code you must set `memory = FALSE`.
)
bnch_multic4
# A tibble: 1 × 13
expression min median itr/s…¹ mem_a…² gc/se…³ n_itr n_gc
<bch:expr> <bch:> <bch:> <dbl> <bch:b> <dbl> <int> <dbl>
1 data_multic4 <- opusreader2::read_opus(dsn = path, parallel = TRUE) 11.7s 11.7s 0.0852 NA 2.98 1 35
# … with 5 more variables: total_time <bch:tm>, result <list>, memory <list>, time <list>, gc <list>, and abbreviated
# variable names ¹`itr/sec`, ²mem_alloc, ³`gc/sec`
# ℹ Use `colnames()` to see all variable names
Warning message:
Some expressions had a GC in every iteration; so filtering is disabled.
r$> (parsed_data_size_multic4 <- unclass(pryr::object_size(data_multis4)) / 1000^2)
[1] 3076.439
r$> (read_alloc_memory_multic4 <- unclass(bnch_multis4$mem_alloc) / 1000^2)
[1] 2845.69 |
=> Based on the results above, we should do some more profiling to see where we can improve the single-threaded implementation (standard) of |
Code for profiling when reading one file. library("opusreader2")
file <- opus_file()
devtools::load_all()
library("profvis")
# visual benchmarking
profvis(
{
data <- read_opus(dsn = file)
},
interval = 0.005
)
# tabular benchmarking
bnch <- bench::mark(
data <- read_opus(dsn = file)
)
mem_list <- bnch$memory
mem_tab <- mem_list[[1]]
library("data.table")
setDT(mem_tab)
mem_ord <- na.omit(mem_tab[order(-bytes)])
# ordered table with stack trace expressions, top 31 memory usage
mem_top31 <- mem_ord[1:31, ]
mem_top31 r$> bnch
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total…¹ result memory
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:t> <list> <list>
1 data <- read_opus(dsn = file) 16.8ms 17.8ms 56.3 1.02MB 13.4 21 5 373ms <named list> <Rprofmem>
# … with 2 more variables: time <list>, gc <list>, and abbreviated variable name ¹total_time
# ℹ Use `colnames()` to see all variable names r$> mem_top31
what bytes trace
1: alloc 72080 readBin,read_raw.file,read_raw,read_opus_raw,read_opus_impl,FUN,...
2: alloc 72080 rawConnection,parse_opus,read_opus_impl,FUN,lapply,opus_lapply,...
3: alloc 38656 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
4: alloc 38648 seq.default,seq,rev,prepare_spectra,f,Reduce,...
5: alloc 38648 pmin,seq.default,seq,rev,prepare_spectra,f,...
6: alloc 38648 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
7: alloc 38648 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
8: alloc 38648 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
9: alloc 38608 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
10: alloc 38608 readBin,read_float,parse_chunk.data,parse_chunk,FUN,lapply,...
11: alloc 38600 seq.default,seq,rev,prepare_spectra,f,Reduce,...
12: alloc 38600 pmin,seq.default,seq,rev,prepare_spectra,f,...
13: alloc 38600 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
14: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
15: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
16: alloc 38600 seq.default,seq,rev,prepare_spectra,f,Reduce,...
17: alloc 38600 pmin,seq.default,seq,rev,prepare_spectra,f,...
18: alloc 38600 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
19: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
20: alloc 38600 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
21: alloc 34736 readBin,read_character,parse_chunk.text,parse_chunk,FUN,lapply,...
22: alloc 19352 seq.default,seq,rev,prepare_spectra,f,Reduce,...
23: alloc 19352 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
24: alloc 19352 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
25: alloc 19328 seq.default,seq,rev,prepare_spectra,f,Reduce,...
26: alloc 19328 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
27: alloc 19328 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
28: alloc 19328 seq.default,seq,rev,prepare_spectra,f,Reduce,...
29: alloc 19328 rev.default,rev,prepare_spectra,f,Reduce,parse_opus,...
30: alloc 19328 matrix,prepare_spectra,f,Reduce,parse_opus,read_opus_impl,...
31: alloc 2096 <Anonymous>,cmpCallSymFun,cmpCall,cmp,cmpSymbolAssign,h,...
what bytes trace |
I have two tentative remarks from rough profiling above (profvis guesses that estimated intervals of < 5ms will gain accurate timings).
Lines 83 to 86 in c3e4c2d
opusreader2/R/prepare_spectra.R Line 15 in c3e4c2d
r$> data.table(
list_object = names(data),
R_memory_size = lapply(data, pryr::object_size)
)
list_object R_memory_size
1: refl_no_atm_comp_data_param 8.54 kB
2: refl_no_atm_comp 79.81 kB
3: quant_report_refl 5.62 kB
4: sc_sample_data_param 8.54 kB
5: sc_sample 79.79 kB
6: sc_ref_data_param 8.54 kB
7: sc_ref 79.88 kB
8: optics 10.67 kB
9: optics_ref 10.60 kB
10: acquisition_ref 11.53 kB
11: fourier_transformation_ref 7.34 kB
12: fourier_transformation 7.34 kB
13: sample 9.18 kB
14: acquisition 11.51 kB
15: instrument_ref 27.37 kB
16: instrument 26.78 kB
17: lab_and_process_param_raw 8.18 kB
18: lab_and_process_param_processed 8.79 kB
19: info_block 14.16 kB
20: history 8.66 kB Inevitably, the corresponding wavenumbers take almost as much memory as the 1-row matrix of the spectra. r$> data.table(
list_object = names(refl_no_atm_comp),
data_type = lapply(refl_no_atm_comp, class),
R_memory_size = lapply(refl_no_atm_comp, pryr::object_size)
)
list_object data_type R_memory_size
1: block_type integer 56 B
2: channel_type integer 56 B
3: text_type integer 56 B
4: additional_type integer 56 B
5: offset integer 56 B
6: next_offset integer 56 B
7: chunk_size integer 56 B
8: block_type_name character 136 B
9: data matrix,array 39.42 kB
10: wavenumbers numeric 38.60 kB |
Here some further exploration. # spectra object (list) without wavenumbers element
refl <- refl_no_atm_comp[!names(refl_no_atm_comp) %in% "wavenumbers"]
wavenumbers <- refl_no_atm_comp$wavenumbers
# conceptually as in `prepare_spectra()`
refl_list <- list(refl = refl)
pryr::object_size(refl)
pryr::object_size(refl_list)
# define appending methods to test in benchmark
c_fun <- function(refl_list, wavenumbers) {
refl_list[[1]] <- c(refl_list[[1]], wavenumbers = wavenumbers)
return(refl_list)
}
append_fun <- function(refl_list, wavenumbers) {
names_first <- names(refl_list[[1]])
refl_list[[1]] <- append(x = refl_list[[1]], values = wavenumbers)
# one extra step to set names
names(refl_list[[1]]) <- c(names_first, "wavenumbers")
return(refl_list)
}
# at first level
c_fun_first <- function(refl, wavenumbers) {
refl <- c(refl, wavenumbers = wavenumbers)
return(refl)
} r$> ## benchmark: almost no difference in terms of memory, however processing time
# best solution
bench::mark(
c_fun(refl_list, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 c_fun(refl_list, wavenumbers) 1.13ms 1.14ms 796. 75.5KB 2.03 393 1 494ms <named list [1]> <Rprofmem [2 × 3]> <bench_tm [394]> <tibble [394 × 3]>
r$> # actually, this is the most efficient for read time!
bench::mark(
append_fun(refl_list, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 append_fun(refl_list, wavenumbers) 142µs 158µs 6056. 113KB 28.1 2586 12 427ms <named list [1]> <Rprofmem [3 × 3]> <bench_tm [2,598]> <tibble [2,598 × 3]>
r$> # no difference to c_fun
bench::mark(
c_fun_first(refl, wavenumbers)
)
# A tibble: 1 × 13
expression min median `itr/sec` mem_alloc `gc/sec` n_itr n_gc total_time result memory time gc
<bch:expr> <bch:tm> <bch:tm> <dbl> <bch:byt> <dbl> <int> <dbl> <bch:tm> <list> <list> <list> <list>
1 c_fun_first(refl, wavenumbers) 1.12ms 1.14ms 820. 75.5KB 4.11 399 2 487ms <named list [4,828]> <Rprofmem [2 × 3]> <bench_tm [401]> <tibble [401 × 3]>
`` |
opusreader2/R/prepare_spectra.R Line 15 in c3e4c2d
For 2., I spontaneously think of
|
#55 at least when using |
@ThomasKnecht currently we iterate through all elements of Lines 83 to 86 in c3e4c2d
Just to try, hat about doing a selective element update with re-assignment of specific elements in a loop? |
Closing for now as we have not found speed or memory gains in #59 . The only think we could still do is not expand the x-axis (spatial frerquency in wavenumbers) given by FXV (frequency of first X-variable), LXV (frequency of last X-variable) and NPT (number of points) into the extra |
No description provided.
The text was updated successfully, but these errors were encountered: