Merge pull request #13 from weberse2/issue-rel-1-7-1

Release 1.7-1
Novartis · Aug 11, 2023 · 20399f1 · 20399f1
2 parents 7350f3f + f7da4eb
commit 20399f1
Show file tree

Hide file tree

Showing 37 changed files with 890 additions and 204 deletions.
diff --git a/.Rbuildignore b/.Rbuildignore
@@ -19,6 +19,8 @@ src/stan_files/.*\.hpp$
 ^inst/sbc/sbc_report_full.html$
 ^.Rd*$
 ^.Rd*/*$
+^.Rprofile$
+^.brms-cache
 ^.gitignore$
 ^.gitattributes$
 ^_pkgdown\.yml$

diff --git a/.Rprofile b/.Rprofile
@@ -0,0 +1,13 @@
+# ensure that an exisitng user .Rprofile is respected
+if(exists("~/.Rprofile")) source("~/.Rprofile")
+
+# sets up brms caching and use of cmdstanr if available
+local({
+    if(requireNamespace("cmdstanr", quietly=TRUE)) {
+        # instruct brms to use cmdstanr as backend and cache all Stan binaries
+        brms_cache_dir <- Sys.getenv("BRMS_CACHE_DIR", here::here(".brms_cache"))
+        # create cache directory if not yet available
+        dir.create(brms_cache_dir, FALSE)
+        options(brms.backend="cmdstanr", cmdstanr_write_stan_file_dir=brms_cache_dir)
+    }
+})
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -8,8 +8,8 @@ Description: Tool-set to support Bayesian evidence synthesis.  This
     for details on applying this package while Neuenschwander et al. (2010)
     <doi:10.1177/1740774509356002> and Schmidli et al. (2014)
     <doi:10.1111/biom.12242> explain details on the methodology.
-Version: 1.7-0
-Date: 2023-07-19
+Version: 1.7-1
+Date: 2023-08-08
 Authors@R: c(person("Novartis", "Pharma AG", role = "cph")
             ,person("Sebastian", "Weber", email="[email protected]", role=c("aut", "cre"))
             ,person("Beat", "Neuenschwander", email="[email protected]", role="ctb")
@@ -37,7 +37,8 @@ Imports:
     stats,
     utils,
     matrixStats,
-    abind
+    abind,
+    rlang
 LinkingTo:
     BH (>= 1.72.0),
     Rcpp (>= 0.12.0),
@@ -63,7 +64,10 @@ Suggests:
     tools,
     broom,
     tidyr,
-    parallel
+    parallel,
+    brms,
+    glue,
+    ragg
 VignetteBuilder: knitr
 SystemRequirements: GNU make, pandoc (>= 1.12.3), pandoc-citeproc, C++17
 Encoding: UTF-8

diff --git a/Makefile b/Makefile
@@ -21,6 +21,7 @@ SRCS = $(R_PKG_SRCS) $(R_SRCS) $(RMD_SRCS) $(STAN_SRCS)
 NODIR_SRC = $(notdir $(SRCS))
 BIN_OBJS = src/package-binary R/sysdata.rda
 DOC_OBJS = man/package-doc inst/doc/$(RPKG).pdf
+RCMD ?= R_PROFILE_USER="$(PROJROOT_ABS)/.Rprofile" "${R_HOME}/bin/R" -q
 
 R_HOME ?= $(shell R RHOME)
 PKG_VERSION ?= $(patsubst ‘%’, %, $(word 2, $(shell grep ^Version DESCRIPTION)))
@@ -32,17 +33,17 @@ all : $(TARGET)
 
 # tell makefile how to turn a Rmd into an md file
 %.md : %.Rmd
-	cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
-	cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
+	cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
+	cd $(@D); $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
 
 %.md : %.R
-	cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
-	cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
+	cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
+	cd $(@D); $(RCMD) -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
 
 # render an html via the respective md file
 %.html : %.md
-	cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
-	cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
+	cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
+	cd $(@D); $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
 
 R/stanmodels.R:
 	## ensure that NAMESPACE contains load directive
@@ -88,7 +89,7 @@ $(TARGET): build/r-source
 
 build/r-source : $(BIN_OBJS) $(DOC_OBJS) $(SRCS)
 	install -d build
-	cd build; "${R_HOME}/bin/R" CMD build .. --no-build-vignettes --no-manual
+	cd build; $(RCMD) CMD build .. --no-build-vignettes --no-manual
 	cd build; mv $(RPKG)_$(PKG_VERSION).tar.gz $(RPKG)-source.tar.gz
 	touch build/r-source-fast
 
@@ -105,7 +106,7 @@ build/r-source-release : $(BIN_OBJS) $(DOC_OBJS) $(SRCS) inst/sbc/sbc_report.htm
 	install -d build/$(RPKG)-$(GIT_TAG)/man
 	cp -v man/*.Rd build/$(RPKG)-$(GIT_TAG)/man
 	# set NOT_CRAN=true to get vignettes render with full sampling
-	cd build; NOT_CRAN=true "${R_HOME}/bin/R" CMD build $(RPKG)-$(GIT_TAG)
+	cd build; NOT_CRAN=true $(RCMD) CMD build --compact-vignettes=both $(RPKG)-$(GIT_TAG)
 	#cd build; NOT_CRAN=false "${R_HOME}/bin/R" CMD build $(RPKG)-$(GIT_TAG) --no-build-vignettes --no-manual
 	rm -rf build/$(RPKG)-$(GIT_TAG)
 	cd build; $(MD5) $(RPKG)-$(GIT_TAG).tar.gz > $(RPKG)-$(GIT_TAG).md5
@@ -121,6 +122,11 @@ binary : NAMESPACE src/package-binary
 PHONY += derived
 derived : NAMESPACE $(BIN_OBJS) $(DOC_OBJS)
 
+PHONY += r-source-check
+r-source-check : r-source
+	cd build; tar xvzf RBesT-source.tar.gz
+	cd build; NOT_CRAN=true $(RCMD) CMD check RBesT
+
 #$(DIR_OBJ)/%.o: %.c $(INCS)
 #    mkdir -p $(@D)
 #    $(CC) -o $@ $(CFLAGS) -c $< $(INC_DIRS)

diff --git a/NAMESPACE b/NAMESPACE
@@ -140,6 +140,7 @@ export(mixfit)
 export(mixgamma)
 export(mixmvnorm)
 export(mixnorm)
+export(mixstanvar)
 export(mn2beta)
 export(mn2gamma)
 export(mn2norm)
@@ -179,6 +180,7 @@ importFrom(matrixStats,logSumExp)
 importFrom(matrixStats,rowLogSumExps)
 importFrom(matrixStats,rowMins)
 importFrom(matrixStats,rowRanks)
+importFrom(rlang,.data)
 importFrom(rstan,extract)
 importFrom(rstan,get_sampler_params)
 importFrom(rstan,sampling)

diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,22 @@
+# RBesT 1.7-1 - August 8th, 2023
+
+## Enhancements
+
+* Allow use of mixture priors in `brms` models with the new adapter
+  function `mixstanvar`.
+* Allow use of named dimensions for multivariate normal mixtures.
+* Add *experimental* diagnostic plots for mixture multivariate normal
+  EM fits. These are subject to changes in future release and user
+  feedback is very welcome.
+* Compress png plots of vignettes saving ~40% in file size of the
+  package sources.
+
+## Bugfixes
+
+* Fix issue when plotting EM diagnostic debugging plots for normal
+  mixtures.
+* Comply with newer ggplot2 standards to not use `aes_string`.
+
 # RBesT 1.7-0 - July 19th, 2023
 
 ## Enhancements

diff --git a/R/EM_bmm_ab.R b/R/EM_bmm_ab.R
@@ -267,6 +267,7 @@ EM_bmm_ab <- function(x, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, t
 
     mixEst <- mixEst[,order(mixEst[1,], decreasing=TRUE),drop=FALSE]
     colnames(mixEst) <- paste("comp", seq(Nc), sep="")
+    likelihood(mixEst) <- "binomial"
     dlink(mixEst) <- identity_dlink
     class(mixEst) <- c("EM", "EMbmm", "betaMix", "mix")
 

diff --git a/R/EM_mnmm.R b/R/EM_mnmm.R
@@ -175,8 +175,8 @@ EM_mnmm <- function(X, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, tol
     ##}
 
     ##mixEst <- list(p=pEst, mean=muEst, sigma=covEst)
-
-    mixEst <- do.call(mixmvnorm, lapply(1:Nc, function(i) c(pEst[i], muEst[i,,drop=FALSE], matrix(covEst[i,,], Nd, Nd))))
+    
+    mixEst <- do.call(mixmvnorm, lapply(1:Nc, function(i) c(pEst[i], muEst[i,,drop=TRUE], matrix(covEst[i,,], Nd, Nd))))
 
     ## give further details
     attr(mixEst, "df") <- df
@@ -185,9 +185,11 @@ EM_mnmm <- function(X, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, tol
 
     attr(mixEst, "Nc") <- Nc
 
+    convert <- function(est) suppressWarnings(do.call(mixmvnorm, lapply(1:Nc, function(i) c(est$p[i], est$mean[i,,drop=FALSE], matrix(est$sigma[i,,], Nd, Nd)))))
+
     attr(mixEst, "tol") <- tol
     attr(mixEst, "traceLli") <- traceLli
-    attr(mixEst, "traceMix") <- traceMix
+    attr(mixEst, "traceMix") <- lapply(traceMix, convert)
     attr(mixEst, "x") <- X
 
     class(mixEst) <- c("EM", "EMmvnmm", "mvnormMix", "mix")
@@ -203,6 +205,12 @@ EM_nmm <- function(X, Nc, mix_init, verbose=FALSE, Niter.max=500, tol, Neps, eps
     mixEst <- EM_mnmm(X=X, Nc=Nc, mix_init=mix_init, verbose=verbose, Niter.max=Niter.max, tol=tol, Neps=Neps, eps=eps)
     rownames(mixEst) <- c("w", "m", "s")
     class(mixEst) <- c("EM", "EMnmm", "normMix", "mix")
+    attr(mixEst, "traceMix") <- lapply(attr(mixEst, "traceMix"),
+                                       function(x) {
+                                           class(x) <- class(mixEst)
+                                           rownames(x) <- rownames(mixEst)
+                                           x
+                                       })
     likelihood(mixEst) <- "normal"
     mixEst
 }

diff --git a/R/EM_msmm.R b/R/EM_msmm.R
@@ -8,8 +8,7 @@
 ## modelling using the t distribution". D. Peel and G.J. McLachlan,
 ## Statistics and Computing (2000), 10, 339-348,
 
-EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1)
-{
+EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1) {
     ## in case X is no matrix, interpret it as uni-variate case
     if(!is.matrix(X))
         X <- matrix(X,ncol=1)
@@ -117,7 +116,22 @@ EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1
         ## calculate additional weights of the U matrix aka latent
         ## tail mass of a point
         for(i in seq(Nc)) {
-            lU[,i] <- log(nuEst[i] + Nd) - log(nuEst[i] + mahalanobis(X, muEst[i,], as.matrix(covEst[i,,])) ) ## Eq. 20
+            Xc_i <- sweep(X, 2L, muEst[i,])
+            Sigma_i <- as.matrix(covEst[i,,])
+            ## in rare cases the covariance matrix becomes (almost)
+            ## singular in which case the alternative cholesky
+            ## factorization gives more stable results
+            maha_dist <- tryCatch(
+                mahalanobis(Xc_i, FALSE, Sigma_i),
+                error = function(e) {
+                    ## see https://stats.stackexchange.com/questions/147210/efficient-fast-mahalanobis-distance-computation
+                    ## also adding eps to the diagonal to further stabilize the computation
+                    L_i <- t(chol(Sigma_i + diag(5 * .Machine$double.eps, Nd, Nd)))
+                    y_i <- forwardsolve(L_i, t(Xc_i))
+                    colSums(y_i^2)
+                }
+            )
+            lU[,i] <- log(nuEst[i] + Nd) - log(nuEst[i] + maha_dist) ## Eq. 20
         }
 
         ## mean probability to be in a specific mixture component ->

diff --git a/R/EM_plot.R b/R/EM_plot.R
@@ -52,8 +52,8 @@
 #' @export
 plot.EM <- function(x, size=1.25, link=c("identity", "logit", "log"), ...) {
     pl <- list()
-    if(inherits(x, "mvnormMix")) stop("Multivariate normal mixtures plotting not supported.")
-    pl$mixdist <- plot.mix(x, size=size, ...)
+    if(!is_mixmv(x))
+        pl$mixdist <- plot.mix(x, size=size, ...)
     ## in verbose mode we output EM fit diagnostics
     if(getOption("RBesT.verbose", FALSE)) {
         ## these NULL assignments make R check happy
@@ -86,58 +86,112 @@ plot.EM <- function(x, size=1.25, link=c("identity", "logit", "log"), ...) {
                 Mw <- within(Mw, { Lw=logit(w) })
         }
         if("EMgmm" %in% class(x)) {
-            Mw <- within(Mw, { la <- log(a)
-                               lb <- log(b) })
+            Mw <- within(Mw, {
+                la <- log(a)
+                lb <- log(b)
+            })
             if(Nc != 1)
                 Mw <- within(Mw, { Lw=logit(w) })
         }
         pars <- names(Mw)[-c(1,2)]
         Mw <- within(Mw, { Comp=factor(comp) } )
         LL <- data.frame(iteration=0:max(Mw$iteration), lli=attr(x, "traceLli"))
-        basePl <- ggplot(Mw, aes_string(x="iteration", colour="Comp")) + geom_line(size=size)
+        basePl <- ggplot(Mw, aes(x=.data$iteration, colour=.data$Comp)) + geom_line(size=size)
         for(p in pars) {
-            pl[[p]] <- basePl + aes_string(y=p)
+            pl[[p]] <- basePl + aes(y=.data$p)
         }
-        pl$lli <- ggplot(subset(LL, iteration>0), aes_string(x="iteration", y="lli")) + geom_line(size=size) + ylab("log-likelihood")
+        pl$lli <- ggplot(subset(LL, iteration>0), aes(x=.data$iteration, y=.data$lli)) + geom_line(size=size) + ylab("log-likelihood")
     }
     ##pl$mix <- plot.mix(x, comp=TRUE, samp=attr(x, "x"), ...)
     link <- match.arg(link)
     dlink(x) <- link_map[[link]]
 
-    samp <- data.frame(Sample=mixlink(x, as.vector(attr(x, "x"))))
-    ## workaround a weird bug in ggplot which enlarges the interval
-    interval <- quantile(samp$Sample, c(0.025, 0.975))
-    n_fun <- 501
-    max_span <- diff(range(samp))
-    interval_span <- diff(interval)
-    n_fun <- min(5E3, round(n_fun * max_span/interval_span))
-
-    if(!is.dlink_identity(dlink(x)))
-        subtitle <- paste("Link:", dlink(x)$name)
-    else
-        subtitle <- NULL
-
-    pl$mixdens <- bayesplot::mcmc_dens(samp) + bayesplot::facet_text(FALSE) +
-        stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
-        ggtitle("Parametric Mixture (black line) and Kernel Estimate of Sample Density", subtitle=subtitle) +
-        bayesplot::xaxis_title(FALSE)
-
     cols <- bayesplot::color_scheme_get(i=1:6)
 
-    pl$mixecdf <- ggplot(samp, aes_string(x="Sample")) +
-        stat_ecdf(geom="area", size=0, fill=cols$light) +
-        stat_ecdf(geom="step", size=size, colour=cols$mid) +
-        stat_function(fun=pmix, args=list(mix=x), size=size, n=n_fun) +
-        ggtitle("Estimated Cumulative Density from Parametric Mixture (black line) and Sample", subtitle=subtitle) +
-        bayesplot::bayesplot_theme_get() +
-        bayesplot::yaxis_title(FALSE) +
-        bayesplot::xaxis_title(FALSE) +
-        bayesplot::facet_text(FALSE)
-
-    pl$mix <- bayesplot::mcmc_hist(samp, binwidth=diff(interval)/50, freq=FALSE) + bayesplot::facet_text(FALSE) +
-        stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
-        ggtitle("Parametric Mixture Density (black line) and Histogram of Sample", subtitle=subtitle) +
-        bayesplot::xaxis_title(FALSE)
+    if(!is_mixmv(x)) {
+        ## univariate case
+        samp <- data.frame(Sample=mixlink(x, as.vector(attr(x, "x"))))
+        ## workaround a weird bug in ggplot which enlarges the interval
+        interval <- quantile(samp$Sample, c(0.025, 0.975))
+        n_fun <- 501
+        max_span <- diff(range(samp))
+        interval_span <- diff(interval)
+        n_fun <- min(5E3, round(n_fun * max_span/interval_span))
+
+        if(!is.dlink_identity(dlink(x)))
+            subtitle <- paste("Link:", dlink(x)$name)
+        else
+            subtitle <- NULL
+
+        pl$mixdens <- bayesplot::mcmc_dens(samp) + bayesplot::facet_text(FALSE) +
+            stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
+            ggtitle("Parametric Mixture (black line) and Kernel Estimate of Sample Density", subtitle=subtitle) +
+            bayesplot::xaxis_title(FALSE)
+
+        pl$mixecdf <- ggplot(samp, aes(x=.data$Sample)) +
+            stat_ecdf(geom="area", size=0, fill=cols$light) +
+            stat_ecdf(geom="step", size=size, colour=cols$mid) +
+            stat_function(fun=pmix, args=list(mix=x), size=size, n=n_fun) +
+            ggtitle("Estimated Cumulative Density from Parametric Mixture (black line) and Sample", subtitle=subtitle) +
+            bayesplot::bayesplot_theme_get() +
+            bayesplot::yaxis_title(FALSE) +
+            bayesplot::xaxis_title(FALSE) +
+            bayesplot::facet_text(FALSE)
+
+        pl$mix <- bayesplot::mcmc_hist(samp, binwidth=diff(interval)/50, freq=FALSE) + bayesplot::facet_text(FALSE) +
+            stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
+            ggtitle("Parametric Mixture Density (black line) and Histogram of Sample", subtitle=subtitle) +
+            bayesplot::xaxis_title(FALSE)
+    } else if(inherits(x, "mvnormMix")) {
+        var1 <- var2 <- NULL
+        ## multivariate case: only support mvnorm for now (the only
+        ## one supported as of Aug 2023). Plot the pair-wise marginal
+        ## densities, which are the pair-wise marginal mixtures.
+        message("Diagnostic plots for mixture multivariate normal densities are experimental.\nPlease note that these are subject to changes in future releases.")
+        samp <- attr(x, "x")
+        p <- ncol(samp)
+        dim_labels <- mvnorm_dim_labels(x[-1,1])
+        if(is.null(dim_labels))
+            dim_labels <- paste0("Dimension ", 1:p)
+        breaks <- 60
+        nbins <- 20
+        pairs <- subset(expand.grid(var1=1:p, var2=1:p), var1 >= var2)
+        layout <- matrix(NA, nrow=p, ncol=p)
+        pl_pairs <- list()
+        pl_pairs_compact <- list()
+        for(i in seq_len(nrow(pairs))) {
+            v1 <- pairs$var1[i]
+            v2 <- pairs$var2[i]
+            label <- paste0("mixpair[", v2, ",", v1, "]")
+            mix_sub <- mvnorm_extract_dim(x, unique(c(v2, v1)))
+            layout[v1,v2] <- i
+            if (v1 == v2) {
+                interval <- quantile(samp[,v1], c(0.025, 0.975))
+                pl_pairs[[label]] <- bayesplot::mcmc_hist(samp[,v1, drop=FALSE], binwidth=diff(interval)/50, freq=FALSE) +
+                    bayesplot::facet_text(FALSE) +
+                    stat_function(inherit.aes=FALSE, fun=function(mix, x) dmix(mix, matrix(x, nrow=length(x))), args=list(mix=mix_sub)) +
+                    ylab(dim_labels[v2]) +
+                    xlab(dim_labels[v2])
+            } else {
+                data_ranges <- apply(samp[,c(v2, v1), drop=FALSE], 2, range)
+                colnames(data_ranges) <- c("x", "y")
+                data_grid <- expand.grid(apply(data_ranges, 2, function(r) seq(r[1], r[2], length=breaks), simplify=FALSE))
+                data_grid$z <- dmix(mix_sub, as.matrix(data_grid), log=TRUE)
+                pl_pairs[[label]] <- bayesplot::mcmc_scatter(samp[,c(v2, v1), drop=FALSE], alpha=0.1) +
+                    bayesplot::facet_text(FALSE) +
+                    geom_contour(aes(z=.data$z), data=data_grid, bins=nbins, colour="black") +
+                    xlab(dim_labels[v2]) +
+                    ylab(dim_labels[v1])
+            }
+            pl_pairs_compact[[label]] <- pl_pairs[[label]]
+            if(v1 != p)
+                pl_pairs_compact[[label]] <- pl_pairs_compact[[label]] + bayesplot::xaxis_title(FALSE) + bayesplot::xaxis_ticks(FALSE) + bayesplot::xaxis_text(FALSE)
+            if(v2 != 1)
+                pl_pairs_compact[[label]] <- pl_pairs_compact[[label]] + bayesplot::yaxis_title(FALSE) + bayesplot::yaxis_ticks(FALSE) + bayesplot::yaxis_text(FALSE)
+        }
+        pl$mixpairs <- bayesplot::bayesplot_grid(plots=pl_pairs_compact, grid_args=list(nrow=p, ncol=p, layout_matrix=layout))
+        pl$mixpairs$bayesplots <- pl_pairs
+    }
 
     pl
 }