Merge pull request #12 from b-cubed-eu/add-effect-class

Add effect classification functionality
b-cubed-eu · Feb 11, 2025 · 08a6cf0 · 08a6cf0
2 parents 87fdf50 + 50247cf
commit 08a6cf0
Show file tree

Hide file tree

Showing 18 changed files with 526 additions and 31 deletions.
diff --git a/.zenodo.json b/.zenodo.json
@@ -1,6 +1,6 @@
 {
   "title": "dubicube: Calculation and Interpretation of Data Cube Indicator Uncertainty",
-  "version": "0.1.0",
+  "version": "0.2.0",
   "license": "MIT",
   "upload_type": "software",
   "description": "<p>An R package to explore calculation, interpretation and visualisation of uncertainty related to indicators based on biodiversity data cubes.<\/p>",

diff --git a/CITATION.cff b/CITATION.cff
@@ -29,4 +29,4 @@ abstract: "An R package to explore calculation, interpretation and visualisation
 identifiers:
 - type: url
   value: https://b-cubed-eu.github.io/dubicube/
-version: 0.1.0
+version: 0.2.0
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: dubicube
 Title: Calculation and Interpretation of Data Cube Indicator Uncertainty
-Version: 0.1.0
+Version: 0.2.0
 Authors@R: c(
     person("Ward", "Langeraert", , "[email protected]", role = c("aut", "cre"),
            comment = c(ORCID = "0000-0002-5900-8109", affiliation = "Research Institute for Nature and Forest (INBO)")),
@@ -20,6 +20,7 @@ Imports:
     assertthat,
     boot,
     dplyr,
+    effectclass,
     modelr,
     purrr,
     rlang,
@@ -29,6 +30,7 @@ Suggests:
     testthat (>= 3.0.0)
 Remotes: 
     b-cubed-eu/b3gbi
+Additional_repositories: https://inbo.r-universe.dev
 Config/checklist/communities: b3; inbo
 Config/checklist/keywords: uncertainty quantification; uncertainty
     visualisation; biodiversity indicators; data cubes

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,10 +1,13 @@
 # Generated by roxygen2: do not edit by hand
 
+export(add_effect_classification)
 export(bootstrap_cube)
 export(calculate_bootstrap_ci)
 import(assertthat)
 import(boot)
 import(dplyr)
+importFrom(effectclass,classification)
+importFrom(effectclass,coarse_classification)
 importFrom(modelr,bootstrap)
 importFrom(purrr,map)
 importFrom(rlang,.data)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,8 @@
+# dubicube 0.2.0
+
+* Add effect classification function
+* Add `...` argument to `bootstrap_cube()` and `calculate_bootstrap_ci()`
+
 # dubicube 0.1.0
 
 * Add bootstrapping functions

diff --git a/R/add_classification_as_factor.R b/R/add_classification_as_factor.R
@@ -0,0 +1,193 @@
+# nolint start: line_length_linter.
+#' Add effect classifications to a dataframe by comparing the confidence
+#' intervals with a reference and thresholds
+#'
+#' This function adds classified effects to a dataframe as ordered factor
+#' variables by comparing the confidence intervals with a reference and
+#' thresholds.
+#'
+#' @param df A dataframe containing summary data of confidence limits. Two
+#' columns are required containing lower and upper limits indicated by the
+#' `cl_columns` argument. Any other columns are optional.
+#' @param cl_columns A vector of 2 column names in `df` indicating respectively
+#' the lower and upper confidence limits (e.g. `c("lcl", "ucl")`).
+#' @param threshold A vector of either 1 or 2 thresholds. A single threshold
+#' will be transformed into `reference + c(-abs(threshold), abs(threshold))`.
+#' @param reference The null hypothesis value to compare confidence intervals
+#' against. Defaults to 0.
+#' @param coarse Logical, defaults to `TRUE`. If `TRUE`, add a coarse
+#' classification to the dataframe.
+#'
+#' @returns The returned value is a modified version of the original input
+#' dataframe `df` with additional columns `effect_code` and `effect` containing
+#' respectively the effect symbols and descriptions as ordered factor variables.
+#' In case of `coarse = TRUE` (by default) also `effect_code_coarse` and
+#' `effect_coarse` containing the coarse classification effects.
+#'
+#' @details
+#' This function is a wrapper around `effectclass::classify()` and
+#' `effectclass::coarse_classification()` from the \pkg{effectclass} package
+#' (Onkelinx, 2023). They classify effects in a stable and transparent manner.
+#'
+#' | Symbol | Fine effect / trend | Coarse effect / trend | Rule |
+#' | :---: | --- | --- | --- |
+#' | `++` | strong positive effect / strong increase | positive effect / increase | confidence interval above the upper threshold |
+#' | `+`  | positive effect / increase | positive effect / increase | confidence interval above reference and contains the upper threshold |
+#' | `+~` | moderate positive effect / moderate increase | positive effect / increase | confidence interval between reference and the upper threshold |
+#' | `~`  | no effect / stable | no effect / stable | confidence interval between thresholds and contains reference |
+#' | `-~` | moderate negative effect / moderate decrease | negative effect / decrease | confidence interval between reference and the lower threshold |
+#' | `-`  | negative effect / decrease | negative effect / decrease | confidence interval below reference and contains the lower threshold |
+#' | `--` | strong negative effect / strong decrease | negative effect / decrease | confidence interval below the lower threshold |
+#' | `?+` | potential positive effect / potential increase | unknown effect / unknown | confidence interval contains reference and the upper threshold |
+#' | `?-` | potential negative effect / potential decrease | unknown effect / unknown | confidence interval contains reference and the lower threshold |
+#' | `?`  | unknown effect / unknown | unknown effect / unknown | confidence interval contains the lower and upper threshold |
+#'
+#' @references
+#' Onkelinx, T. (2023). effectclass: Classification and visualisation of effects
+#' \[Computer software\]. \url{https://inbo.github.io/effectclass/}
+#'
+#' @export
+#'
+#' @family uncertainty
+#'
+#' @import dplyr
+#' @importFrom rlang .data
+#' @importFrom effectclass coarse_classification classification
+#'
+#' @examples
+#' # Example dataset
+#' ds <- data.frame(
+#'   mean = c(0, 0.5, -0.5, 1, -1, 1.5, -1.5, 0.5, -0.5, 0),
+#'   sd = c(1, 0.5, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0.5)
+#' )
+#' ds$lcl <- qnorm(0.05, ds$mean, ds$sd)
+#' ds$ucl <- qnorm(0.95, ds$mean, ds$sd)
+#'
+#' add_effect_classification(
+#'  df = ds,
+#'  cl_columns = c("lcl", "ucl"),
+#'  threshold = 1,
+#'  reference = 0,
+#'  coarse = TRUE)
+# nolint end
+
+add_effect_classification <- function(
+    df,
+    cl_columns,
+    threshold,
+    reference = 0,
+    coarse = TRUE) {
+  ### Start checks
+  # Check dataframe input
+  stopifnot("`df` must be a dataframe." =
+              inherits(df, "data.frame"))
+
+  # Check if cl_columns is a character vector
+  stopifnot("`cl_columns` must be a character vector of length 2." =
+              is.character(cl_columns) & length(cl_columns) == 2)
+
+  # Check if cl_columns columns are present in dataframe
+  stopifnot("`cl_columns` columns are not present in `df`." =
+              all(cl_columns %in% names(df)))
+
+  # Check if reference is a numeric vector
+  stopifnot("`threshold` must be a numeric vector of length 1 or 2." =
+              is.numeric(threshold) &
+              (length(threshold) == 1 | length(threshold) == 2))
+
+  # Check if reference is a number
+  stopifnot("`reference` must be a numeric vector of length 1." =
+              assertthat::is.number(reference))
+
+  # Check if coarse is a logical vector of length 1
+  stopifnot("`coarse` must be a logical vector of length 1." =
+              assertthat::is.flag(coarse))
+  ### End checks
+
+  # Classify effects with effectclass
+  classified_df <- df %>%
+    dplyr::mutate(effect_code = effectclass::classification(
+      lcl = !!dplyr::sym(cl_columns[1]),
+      ucl = !!dplyr::sym(cl_columns[2]),
+      threshold = threshold,
+      reference = reference)
+    )
+
+  # Add coarse classification if specified
+  if (coarse) {
+    classified_df$effect_code_coarse <- effectclass::coarse_classification(
+      classified_df$effect_code)
+  }
+
+  # Create ordered factors of effects
+  out_df <- classified_df %>%
+    dplyr::mutate(
+      effect_code = factor(.data$effect_code,
+                           levels = c(
+                             "++",
+                             "+",
+                             "+~",
+                             "~",
+                             "-~",
+                             "-",
+                             "--",
+                             "?+",
+                             "?-",
+                             "?"),
+                           ordered = TRUE),
+      effect = dplyr::case_when(
+        effect_code == "++" ~ "strong increase",
+        effect_code == "+"  ~ "increase",
+        effect_code == "+~" ~ "moderate increase",
+        effect_code == "~"  ~ "stable",
+        effect_code == "-~" ~ "moderate decrease",
+        effect_code == "-"  ~ "decrease",
+        effect_code == "--" ~ "strong decrease",
+        effect_code == "?+" ~ "potential increase",
+        effect_code == "?-" ~ "potential decrease",
+        effect_code == "?"  ~ "unknown"
+      ),
+      effect = factor(.data$effect,
+                      levels = c(
+                        "strong increase",
+                        "increase",
+                        "moderate increase",
+                        "stable",
+                        "moderate decrease",
+                        "decrease",
+                        "strong decrease",
+                        "potential increase",
+                        "potential decrease",
+                        "unknown"),
+                      ordered = TRUE)
+    )
+
+  # Create ordered factors of effects if coarse is specified
+  if (coarse) {
+    out_df <- out_df %>%
+      dplyr::mutate(
+        effect_code_coarse = factor(.data$effect_code_coarse,
+                                    levels = c(
+                                      "+",
+                                      "~",
+                                      "-",
+                                      "?"),
+                                    ordered = TRUE),
+        effect_coarse = dplyr::case_when(
+          effect_code_coarse == "+" ~ "increase",
+          effect_code_coarse == "-" ~ "decrease",
+          effect_code_coarse == "~" ~ "stable",
+          effect_code_coarse == "?" ~ "unknown"
+        ),
+        effect_coarse = factor(.data$effect_coarse,
+                               levels = c(
+                                 "increase",
+                                 "stable",
+                                 "decrease",
+                                 "unknown"),
+                               ordered = TRUE)
+      )
+  }
+
+  return(out_df)
+}
diff --git a/R/bootstrap_cube.R b/R/bootstrap_cube.R
@@ -11,6 +11,7 @@
 #' @param fun A function which, when applied to `data_cube` returns the
 #' statistic(s) of interest. This function must return a dataframe with a column
 #' `diversity_val` containing the statistic of interest.
+#' @param ... Additional arguments passed on to `fun`.
 #' @param grouping_var A string specifying the grouping variable(s) for the
 #' bootstrap analysis. The output of `fun(data_cube)` returns a row per group.
 #' @param samples The number of bootstrap replicates. A single positive integer.
@@ -100,6 +101,8 @@
 #'
 #' @export
 #'
+#' @family uncertainty
+#'
 #' @import dplyr
 #' @import assertthat
 #' @importFrom rlang .data inherits_any
@@ -144,6 +147,7 @@
 bootstrap_cube <- function(
     data_cube,
     fun,
+    ...,
     grouping_var,
     samples = 1000,
     ref_group = NA,
@@ -199,7 +203,7 @@ bootstrap_cube <- function(
 
   if (rlang::inherits_any(data_cube, c("processed_cube", "sim_cube"))) {
     # Check if grouping_var column is present in data cube
-    stopifnot("`data_cube` should contain column `grouping_var`" =
+    stopifnot("`data_cube` should contain column `grouping_var`." =
                 grouping_var %in% names(data_cube$data))
 
     # Check if ref_group is present in grouping_var
@@ -214,15 +218,15 @@ bootstrap_cube <- function(
     resample_df <- modelr::bootstrap(data_cube$data, samples, id = "id")
 
     # Function for bootstrapping
-    bootstrap_resample <- function(x, fun) {
+    bootstrap_resample <- function(x, fun, ...) {
       resample_obj <- x$strap[[1]]
       indices <- as.integer(resample_obj)
       data <- resample_obj$data[indices, ]
 
       data_cube_copy <- data_cube
       data_cube_copy$data <- data
 
-      fun(data_cube_copy)$data %>%
+      fun(data_cube_copy, ...)$data %>%
         dplyr::mutate(sample = as.integer(x$id))
     }
   } else {
@@ -242,12 +246,12 @@ bootstrap_cube <- function(
     resample_df <- modelr::bootstrap(data_cube, samples, id = "id")
 
     # Function for bootstrapping
-    bootstrap_resample <- function(x, fun) {
+    bootstrap_resample <- function(x, fun, ...) {
       resample_obj <- x$strap[[1]]
       indices <- as.integer(resample_obj)
       data <- resample_obj$data[indices, ]
 
-      fun(data) %>%
+      fun(data, ...) %>%
         dplyr::mutate(sample = as.integer(x$id))
     }
   }
@@ -258,6 +262,7 @@ bootstrap_cube <- function(
     purrr::map(
       bootstrap_resample,
       fun = fun,
+      ...,
       .progress = ifelse(progress, "Bootstrapping", progress))
 
   if (!is.na(ref_group)) {
@@ -289,9 +294,9 @@ bootstrap_cube <- function(
   } else {
     # Calculate true statistic
     if (rlang::inherits_any(data_cube, c("processed_cube", "sim_cube"))) {
-      t0 <- fun(data_cube)$data
+      t0 <- fun(data_cube, ...)$data
     } else {
-      t0 <- fun(data_cube)
+      t0 <- fun(data_cube, ...)
     }
 
     # Get bootstrap samples as a list