Skip to content

Commit

Permalink
Merge pull request #13 from weberse2/issue-rel-1-7-1
Browse files Browse the repository at this point in the history
Release 1.7-1
  • Loading branch information
weberse2 authored Aug 11, 2023
2 parents 7350f3f + f7da4eb commit 20399f1
Show file tree
Hide file tree
Showing 37 changed files with 890 additions and 204 deletions.
2 changes: 2 additions & 0 deletions .Rbuildignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ src/stan_files/.*\.hpp$
^inst/sbc/sbc_report_full.html$
^.Rd*$
^.Rd*/*$
^.Rprofile$
^.brms-cache
^.gitignore$
^.gitattributes$
^_pkgdown\.yml$
Expand Down
13 changes: 13 additions & 0 deletions .Rprofile
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# ensure that an exisitng user .Rprofile is respected
if(exists("~/.Rprofile")) source("~/.Rprofile")

# sets up brms caching and use of cmdstanr if available
local({
if(requireNamespace("cmdstanr", quietly=TRUE)) {
# instruct brms to use cmdstanr as backend and cache all Stan binaries
brms_cache_dir <- Sys.getenv("BRMS_CACHE_DIR", here::here(".brms_cache"))
# create cache directory if not yet available
dir.create(brms_cache_dir, FALSE)
options(brms.backend="cmdstanr", cmdstanr_write_stan_file_dir=brms_cache_dir)
}
})
12 changes: 8 additions & 4 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ Description: Tool-set to support Bayesian evidence synthesis. This
for details on applying this package while Neuenschwander et al. (2010)
<doi:10.1177/1740774509356002> and Schmidli et al. (2014)
<doi:10.1111/biom.12242> explain details on the methodology.
Version: 1.7-0
Date: 2023-07-19
Version: 1.7-1
Date: 2023-08-08
Authors@R: c(person("Novartis", "Pharma AG", role = "cph")
,person("Sebastian", "Weber", email="[email protected]", role=c("aut", "cre"))
,person("Beat", "Neuenschwander", email="[email protected]", role="ctb")
Expand Down Expand Up @@ -37,7 +37,8 @@ Imports:
stats,
utils,
matrixStats,
abind
abind,
rlang
LinkingTo:
BH (>= 1.72.0),
Rcpp (>= 0.12.0),
Expand All @@ -63,7 +64,10 @@ Suggests:
tools,
broom,
tidyr,
parallel
parallel,
brms,
glue,
ragg
VignetteBuilder: knitr
SystemRequirements: GNU make, pandoc (>= 1.12.3), pandoc-citeproc, C++17
Encoding: UTF-8
Expand Down
22 changes: 14 additions & 8 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ SRCS = $(R_PKG_SRCS) $(R_SRCS) $(RMD_SRCS) $(STAN_SRCS)
NODIR_SRC = $(notdir $(SRCS))
BIN_OBJS = src/package-binary R/sysdata.rda
DOC_OBJS = man/package-doc inst/doc/$(RPKG).pdf
RCMD ?= R_PROFILE_USER="$(PROJROOT_ABS)/.Rprofile" "${R_HOME}/bin/R" -q

R_HOME ?= $(shell R RHOME)
PKG_VERSION ?= $(patsubst%’, %, $(word 2, $(shell grep ^Version DESCRIPTION)))
Expand All @@ -32,17 +33,17 @@ all : $(TARGET)

# tell makefile how to turn a Rmd into an md file
%.md : %.Rmd
cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"

%.md : %.R
cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"
cd $(@D); $(RCMD) -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::md_document(variant='markdown'))"

# render an html via the respective md file
%.html : %.md
cd $(@D); echo running R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
cd $(@D); R -q -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
cd $(@D); echo running $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"
cd $(@D); $(RCMD) -e "rmarkdown::render('$(<F)', output_format=rmarkdown::html_document(self_contained=TRUE))"

R/stanmodels.R:
## ensure that NAMESPACE contains load directive
Expand Down Expand Up @@ -88,7 +89,7 @@ $(TARGET): build/r-source

build/r-source : $(BIN_OBJS) $(DOC_OBJS) $(SRCS)
install -d build
cd build; "${R_HOME}/bin/R" CMD build .. --no-build-vignettes --no-manual
cd build; $(RCMD) CMD build .. --no-build-vignettes --no-manual
cd build; mv $(RPKG)_$(PKG_VERSION).tar.gz $(RPKG)-source.tar.gz
touch build/r-source-fast

Expand All @@ -105,7 +106,7 @@ build/r-source-release : $(BIN_OBJS) $(DOC_OBJS) $(SRCS) inst/sbc/sbc_report.htm
install -d build/$(RPKG)-$(GIT_TAG)/man
cp -v man/*.Rd build/$(RPKG)-$(GIT_TAG)/man
# set NOT_CRAN=true to get vignettes render with full sampling
cd build; NOT_CRAN=true "${R_HOME}/bin/R" CMD build $(RPKG)-$(GIT_TAG)
cd build; NOT_CRAN=true $(RCMD) CMD build --compact-vignettes=both $(RPKG)-$(GIT_TAG)
#cd build; NOT_CRAN=false "${R_HOME}/bin/R" CMD build $(RPKG)-$(GIT_TAG) --no-build-vignettes --no-manual
rm -rf build/$(RPKG)-$(GIT_TAG)
cd build; $(MD5) $(RPKG)-$(GIT_TAG).tar.gz > $(RPKG)-$(GIT_TAG).md5
Expand All @@ -121,6 +122,11 @@ binary : NAMESPACE src/package-binary
PHONY += derived
derived : NAMESPACE $(BIN_OBJS) $(DOC_OBJS)

PHONY += r-source-check
r-source-check : r-source
cd build; tar xvzf RBesT-source.tar.gz
cd build; NOT_CRAN=true $(RCMD) CMD check RBesT

#$(DIR_OBJ)/%.o: %.c $(INCS)
# mkdir -p $(@D)
# $(CC) -o $@ $(CFLAGS) -c $< $(INC_DIRS)
Expand Down
2 changes: 2 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -140,6 +140,7 @@ export(mixfit)
export(mixgamma)
export(mixmvnorm)
export(mixnorm)
export(mixstanvar)
export(mn2beta)
export(mn2gamma)
export(mn2norm)
Expand Down Expand Up @@ -179,6 +180,7 @@ importFrom(matrixStats,logSumExp)
importFrom(matrixStats,rowLogSumExps)
importFrom(matrixStats,rowMins)
importFrom(matrixStats,rowRanks)
importFrom(rlang,.data)
importFrom(rstan,extract)
importFrom(rstan,get_sampler_params)
importFrom(rstan,sampling)
Expand Down
19 changes: 19 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,22 @@
# RBesT 1.7-1 - August 8th, 2023

## Enhancements

* Allow use of mixture priors in `brms` models with the new adapter
function `mixstanvar`.
* Allow use of named dimensions for multivariate normal mixtures.
* Add *experimental* diagnostic plots for mixture multivariate normal
EM fits. These are subject to changes in future release and user
feedback is very welcome.
* Compress png plots of vignettes saving ~40% in file size of the
package sources.

## Bugfixes

* Fix issue when plotting EM diagnostic debugging plots for normal
mixtures.
* Comply with newer ggplot2 standards to not use `aes_string`.

# RBesT 1.7-0 - July 19th, 2023

## Enhancements
Expand Down
1 change: 1 addition & 0 deletions R/EM_bmm_ab.R
Original file line number Diff line number Diff line change
Expand Up @@ -267,6 +267,7 @@ EM_bmm_ab <- function(x, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, t

mixEst <- mixEst[,order(mixEst[1,], decreasing=TRUE),drop=FALSE]
colnames(mixEst) <- paste("comp", seq(Nc), sep="")
likelihood(mixEst) <- "binomial"
dlink(mixEst) <- identity_dlink
class(mixEst) <- c("EM", "EMbmm", "betaMix", "mix")

Expand Down
14 changes: 11 additions & 3 deletions R/EM_mnmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ EM_mnmm <- function(X, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, tol
##}

##mixEst <- list(p=pEst, mean=muEst, sigma=covEst)

mixEst <- do.call(mixmvnorm, lapply(1:Nc, function(i) c(pEst[i], muEst[i,,drop=FALSE], matrix(covEst[i,,], Nd, Nd))))
mixEst <- do.call(mixmvnorm, lapply(1:Nc, function(i) c(pEst[i], muEst[i,,drop=TRUE], matrix(covEst[i,,], Nd, Nd))))

## give further details
attr(mixEst, "df") <- df
Expand All @@ -185,9 +185,11 @@ EM_mnmm <- function(X, Nc, mix_init, Ninit=50, verbose=FALSE, Niter.max=500, tol

attr(mixEst, "Nc") <- Nc

convert <- function(est) suppressWarnings(do.call(mixmvnorm, lapply(1:Nc, function(i) c(est$p[i], est$mean[i,,drop=FALSE], matrix(est$sigma[i,,], Nd, Nd)))))

attr(mixEst, "tol") <- tol
attr(mixEst, "traceLli") <- traceLli
attr(mixEst, "traceMix") <- traceMix
attr(mixEst, "traceMix") <- lapply(traceMix, convert)
attr(mixEst, "x") <- X

class(mixEst) <- c("EM", "EMmvnmm", "mvnormMix", "mix")
Expand All @@ -203,6 +205,12 @@ EM_nmm <- function(X, Nc, mix_init, verbose=FALSE, Niter.max=500, tol, Neps, eps
mixEst <- EM_mnmm(X=X, Nc=Nc, mix_init=mix_init, verbose=verbose, Niter.max=Niter.max, tol=tol, Neps=Neps, eps=eps)
rownames(mixEst) <- c("w", "m", "s")
class(mixEst) <- c("EM", "EMnmm", "normMix", "mix")
attr(mixEst, "traceMix") <- lapply(attr(mixEst, "traceMix"),
function(x) {
class(x) <- class(mixEst)
rownames(x) <- rownames(mixEst)
x
})
likelihood(mixEst) <- "normal"
mixEst
}
Expand Down
20 changes: 17 additions & 3 deletions R/EM_msmm.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,7 @@
## modelling using the t distribution". D. Peel and G.J. McLachlan,
## Statistics and Computing (2000), 10, 339-348,

EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1)
{
EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1) {
## in case X is no matrix, interpret it as uni-variate case
if(!is.matrix(X))
X <- matrix(X,ncol=1)
Expand Down Expand Up @@ -117,7 +116,22 @@ EM_msmm <- function(X, Nc, init, Ninit=50, verbose=TRUE, Niter.max=500, tol=1e-1
## calculate additional weights of the U matrix aka latent
## tail mass of a point
for(i in seq(Nc)) {
lU[,i] <- log(nuEst[i] + Nd) - log(nuEst[i] + mahalanobis(X, muEst[i,], as.matrix(covEst[i,,])) ) ## Eq. 20
Xc_i <- sweep(X, 2L, muEst[i,])
Sigma_i <- as.matrix(covEst[i,,])
## in rare cases the covariance matrix becomes (almost)
## singular in which case the alternative cholesky
## factorization gives more stable results
maha_dist <- tryCatch(
mahalanobis(Xc_i, FALSE, Sigma_i),
error = function(e) {
## see https://stats.stackexchange.com/questions/147210/efficient-fast-mahalanobis-distance-computation
## also adding eps to the diagonal to further stabilize the computation
L_i <- t(chol(Sigma_i + diag(5 * .Machine$double.eps, Nd, Nd)))
y_i <- forwardsolve(L_i, t(Xc_i))
colSums(y_i^2)
}
)
lU[,i] <- log(nuEst[i] + Nd) - log(nuEst[i] + maha_dist) ## Eq. 20
}

## mean probability to be in a specific mixture component ->
Expand Down
132 changes: 93 additions & 39 deletions R/EM_plot.R
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
#' @export
plot.EM <- function(x, size=1.25, link=c("identity", "logit", "log"), ...) {
pl <- list()
if(inherits(x, "mvnormMix")) stop("Multivariate normal mixtures plotting not supported.")
pl$mixdist <- plot.mix(x, size=size, ...)
if(!is_mixmv(x))
pl$mixdist <- plot.mix(x, size=size, ...)
## in verbose mode we output EM fit diagnostics
if(getOption("RBesT.verbose", FALSE)) {
## these NULL assignments make R check happy
Expand Down Expand Up @@ -86,58 +86,112 @@ plot.EM <- function(x, size=1.25, link=c("identity", "logit", "log"), ...) {
Mw <- within(Mw, { Lw=logit(w) })
}
if("EMgmm" %in% class(x)) {
Mw <- within(Mw, { la <- log(a)
lb <- log(b) })
Mw <- within(Mw, {
la <- log(a)
lb <- log(b)
})
if(Nc != 1)
Mw <- within(Mw, { Lw=logit(w) })
}
pars <- names(Mw)[-c(1,2)]
Mw <- within(Mw, { Comp=factor(comp) } )
LL <- data.frame(iteration=0:max(Mw$iteration), lli=attr(x, "traceLli"))
basePl <- ggplot(Mw, aes_string(x="iteration", colour="Comp")) + geom_line(size=size)
basePl <- ggplot(Mw, aes(x=.data$iteration, colour=.data$Comp)) + geom_line(size=size)
for(p in pars) {
pl[[p]] <- basePl + aes_string(y=p)
pl[[p]] <- basePl + aes(y=.data$p)
}
pl$lli <- ggplot(subset(LL, iteration>0), aes_string(x="iteration", y="lli")) + geom_line(size=size) + ylab("log-likelihood")
pl$lli <- ggplot(subset(LL, iteration>0), aes(x=.data$iteration, y=.data$lli)) + geom_line(size=size) + ylab("log-likelihood")
}
##pl$mix <- plot.mix(x, comp=TRUE, samp=attr(x, "x"), ...)
link <- match.arg(link)
dlink(x) <- link_map[[link]]

samp <- data.frame(Sample=mixlink(x, as.vector(attr(x, "x"))))
## workaround a weird bug in ggplot which enlarges the interval
interval <- quantile(samp$Sample, c(0.025, 0.975))
n_fun <- 501
max_span <- diff(range(samp))
interval_span <- diff(interval)
n_fun <- min(5E3, round(n_fun * max_span/interval_span))

if(!is.dlink_identity(dlink(x)))
subtitle <- paste("Link:", dlink(x)$name)
else
subtitle <- NULL

pl$mixdens <- bayesplot::mcmc_dens(samp) + bayesplot::facet_text(FALSE) +
stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Parametric Mixture (black line) and Kernel Estimate of Sample Density", subtitle=subtitle) +
bayesplot::xaxis_title(FALSE)

cols <- bayesplot::color_scheme_get(i=1:6)

pl$mixecdf <- ggplot(samp, aes_string(x="Sample")) +
stat_ecdf(geom="area", size=0, fill=cols$light) +
stat_ecdf(geom="step", size=size, colour=cols$mid) +
stat_function(fun=pmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Estimated Cumulative Density from Parametric Mixture (black line) and Sample", subtitle=subtitle) +
bayesplot::bayesplot_theme_get() +
bayesplot::yaxis_title(FALSE) +
bayesplot::xaxis_title(FALSE) +
bayesplot::facet_text(FALSE)

pl$mix <- bayesplot::mcmc_hist(samp, binwidth=diff(interval)/50, freq=FALSE) + bayesplot::facet_text(FALSE) +
stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Parametric Mixture Density (black line) and Histogram of Sample", subtitle=subtitle) +
bayesplot::xaxis_title(FALSE)
if(!is_mixmv(x)) {
## univariate case
samp <- data.frame(Sample=mixlink(x, as.vector(attr(x, "x"))))
## workaround a weird bug in ggplot which enlarges the interval
interval <- quantile(samp$Sample, c(0.025, 0.975))
n_fun <- 501
max_span <- diff(range(samp))
interval_span <- diff(interval)
n_fun <- min(5E3, round(n_fun * max_span/interval_span))

if(!is.dlink_identity(dlink(x)))
subtitle <- paste("Link:", dlink(x)$name)
else
subtitle <- NULL

pl$mixdens <- bayesplot::mcmc_dens(samp) + bayesplot::facet_text(FALSE) +
stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Parametric Mixture (black line) and Kernel Estimate of Sample Density", subtitle=subtitle) +
bayesplot::xaxis_title(FALSE)

pl$mixecdf <- ggplot(samp, aes(x=.data$Sample)) +
stat_ecdf(geom="area", size=0, fill=cols$light) +
stat_ecdf(geom="step", size=size, colour=cols$mid) +
stat_function(fun=pmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Estimated Cumulative Density from Parametric Mixture (black line) and Sample", subtitle=subtitle) +
bayesplot::bayesplot_theme_get() +
bayesplot::yaxis_title(FALSE) +
bayesplot::xaxis_title(FALSE) +
bayesplot::facet_text(FALSE)

pl$mix <- bayesplot::mcmc_hist(samp, binwidth=diff(interval)/50, freq=FALSE) + bayesplot::facet_text(FALSE) +
stat_function(inherit.aes=FALSE, fun=dmix, args=list(mix=x), size=size, n=n_fun) +
ggtitle("Parametric Mixture Density (black line) and Histogram of Sample", subtitle=subtitle) +
bayesplot::xaxis_title(FALSE)
} else if(inherits(x, "mvnormMix")) {
var1 <- var2 <- NULL
## multivariate case: only support mvnorm for now (the only
## one supported as of Aug 2023). Plot the pair-wise marginal
## densities, which are the pair-wise marginal mixtures.
message("Diagnostic plots for mixture multivariate normal densities are experimental.\nPlease note that these are subject to changes in future releases.")
samp <- attr(x, "x")
p <- ncol(samp)
dim_labels <- mvnorm_dim_labels(x[-1,1])
if(is.null(dim_labels))
dim_labels <- paste0("Dimension ", 1:p)
breaks <- 60
nbins <- 20
pairs <- subset(expand.grid(var1=1:p, var2=1:p), var1 >= var2)
layout <- matrix(NA, nrow=p, ncol=p)
pl_pairs <- list()
pl_pairs_compact <- list()
for(i in seq_len(nrow(pairs))) {
v1 <- pairs$var1[i]
v2 <- pairs$var2[i]
label <- paste0("mixpair[", v2, ",", v1, "]")
mix_sub <- mvnorm_extract_dim(x, unique(c(v2, v1)))
layout[v1,v2] <- i
if (v1 == v2) {
interval <- quantile(samp[,v1], c(0.025, 0.975))
pl_pairs[[label]] <- bayesplot::mcmc_hist(samp[,v1, drop=FALSE], binwidth=diff(interval)/50, freq=FALSE) +
bayesplot::facet_text(FALSE) +
stat_function(inherit.aes=FALSE, fun=function(mix, x) dmix(mix, matrix(x, nrow=length(x))), args=list(mix=mix_sub)) +
ylab(dim_labels[v2]) +
xlab(dim_labels[v2])
} else {
data_ranges <- apply(samp[,c(v2, v1), drop=FALSE], 2, range)
colnames(data_ranges) <- c("x", "y")
data_grid <- expand.grid(apply(data_ranges, 2, function(r) seq(r[1], r[2], length=breaks), simplify=FALSE))
data_grid$z <- dmix(mix_sub, as.matrix(data_grid), log=TRUE)
pl_pairs[[label]] <- bayesplot::mcmc_scatter(samp[,c(v2, v1), drop=FALSE], alpha=0.1) +
bayesplot::facet_text(FALSE) +
geom_contour(aes(z=.data$z), data=data_grid, bins=nbins, colour="black") +
xlab(dim_labels[v2]) +
ylab(dim_labels[v1])
}
pl_pairs_compact[[label]] <- pl_pairs[[label]]
if(v1 != p)
pl_pairs_compact[[label]] <- pl_pairs_compact[[label]] + bayesplot::xaxis_title(FALSE) + bayesplot::xaxis_ticks(FALSE) + bayesplot::xaxis_text(FALSE)
if(v2 != 1)
pl_pairs_compact[[label]] <- pl_pairs_compact[[label]] + bayesplot::yaxis_title(FALSE) + bayesplot::yaxis_ticks(FALSE) + bayesplot::yaxis_text(FALSE)
}
pl$mixpairs <- bayesplot::bayesplot_grid(plots=pl_pairs_compact, grid_args=list(nrow=p, ncol=p, layout_matrix=layout))
pl$mixpairs$bayesplots <- pl_pairs
}

pl
}
Expand Down
Loading

0 comments on commit 20399f1

Please sign in to comment.