### ###########################################################
###
### Computing of reference distribution; possibly in parallel
###
### ###########################################################

#' @title Calculate reference distribution using parametric bootstrap
#' 
#' @description Calculate reference distribution of likelihood ratio statistic
#'     in mixed effects models using parametric bootstrap
#'
#' @name pb-refdist
#' 
#' @details The model \code{object} must be fitted with maximum likelihood
#'     (i.e. with \code{REML=FALSE}). If the object is fitted with restricted
#'     maximum likelihood (i.e. with \code{REML=TRUE}) then the model is
#'     refitted with \code{REML=FALSE} before the p-values are calculated. Put
#'     differently, the user needs not worry about this issue.
#' 
#' @aliases PBrefdist PBrefdist.mer PBrefdist.merMod PBrefdist.lm
#' @param largeModel A linear mixed effects model as fitted with the
#'     \code{lmer()} function in the \pkg{lme4} package. This model muse be
#'     larger than \code{smallModel} (see below).
#' @param smallModel A linear mixed effects model as fitted with the
#'     \code{lmer()} function in the \pkg{lme4} package. This model muse be
#'     smaller than \code{largeModel} (see above).
#' @param nsim The number of simulations to form the reference distribution.
#' @param seed Seed for the random number generation.
#' @param cl A vector identifying a cluster; used for calculating the reference
#'     distribution using several cores. See examples below.
#' @param details The amount of output produced. Mainly relevant for debugging
#'     purposes.
#' @return A numeric vector
#' @author Søren Højsgaard \email{sorenh@@math.aau.dk}
#' @seealso \code{\link{PBmodcomp}}, \code{\link{KRmodcomp}}
#' @references Ulrich Halekoh, Søren Højsgaard (2014)., A Kenward-Roger
#'     Approximation and Parametric Bootstrap Methods for Tests in Linear Mixed
#'     Models - The R Package pbkrtest., Journal of Statistical Software,
#'     58(10), 1-30., \url{http://www.jstatsoft.org/v59/i09/}
#' @keywords models inference
#' @examples
#' 
#' data(beets)
#' head(beets)
#' beet0 <- lmer(sugpct ~ block + sow + harvest + (1|block : harvest), data=beets, REML=FALSE)
#' beet_no.harv <- update(beet0, .~.-harvest)
#' rr <- PBrefdist(beet0, beet_no.harv, nsim=20)
#' rr
#' 
#' ## Note: Many more simulations must be made in practice.
#' 
#' ## Computations can be made in parallel using several processors:
#' \dontrun{
#' cl <- makeSOCKcluster(rep("localhost", 4))
#' clusterEvalQ(cl, library(lme4))
#' clusterSetupSPRNG(cl)
#' rr <- PBrefdist(beet0, beet_no.harv, nsim=20)
#' stopCluster(cl)
#' }
#' ## Above, 4 cpu's are used and 5 simulations are made on each cpu.
#' 

#' @rdname pb-refdist
PBrefdist <- function(largeModel, smallModel, nsim=1000, seed=NULL, cl=NULL, details=0){
    UseMethod("PBrefdist")
}

#' @rdname pb-refdist
PBrefdist.lm <- function(largeModel, smallModel, nsim=1000, seed=NULL, cl=NULL, details=0){

  ##cat(".....PBrefdist.lm\n")
  t0 <- proc.time()
  .is.cluster <- !is.null(cl) && inherits(cl, "cluster")

  if (!.is.cluster){
    ref <- .lm_refDist(largeModel, smallModel, nsim, seed=seed)
  } else {
    nsim2 <- round(nsim/length(cl))
    if (details>=1)
      cat(sprintf("* Using %i clusters and %i samples per cluster\n", length(cl), nsim2))
    clusterExport(cl, ls(envir=.GlobalEnv), envir = .GlobalEnv)
    clusterSetRNGStream(cl)
    ref <- unlist(clusterCall(cl, .lm_refDist, largeModel, smallModel, nsim2))
  }

  ref <- ref[ref>0]
  ctime <- (proc.time()-t0)[3]
  attr(ref,"ctime") <- ctime
  if (details>0)
    cat(sprintf("Reference distribution with %i samples; computing time: %5.2f secs. \n",
                length(ref), ctime))

  ref
}

.lm_refDist <- function(lg, sm, nsim=20, seed=NULL, simdata=simulate(sm, nsim=nsim, seed=seed)){
    ##simdata <- simulate(sm, nsim, seed=seed)
    ee  <- new.env()
    ee$simdata <- simdata

    ff.lg <- update.formula(formula(lg),simdata[,ii]~.)
    ff.sm <- update.formula(formula(sm),simdata[,ii]~.)
    environment(ff.lg) <- environment(ff.sm) <- ee

    cl.lg <- getCall(lg)
    cl.sm <- getCall(sm)

    cl.lg$formula <- ff.lg
    cl.sm$formula <- ff.sm

    ref <- rep.int(NA, nsim)
    for (ii in 1:nsim){
        ref[ii] <- 2*(logLik(eval(cl.lg))-logLik(eval(cl.sm)))
    }
    ref
}

.merMod_refDist <- function(lg, sm, nsim=20, seed=NULL, simdata=simulate(sm, nsim=nsim, seed=seed)){
  #simdata <- simulate(sm, nsim=nsim, seed=seed)
  unname(unlist(lapply(simdata, function(yyy){
    sm2     <- refit(sm, newresp=yyy)
    lg2     <- refit(lg, newresp=yyy)
    2*(logLik( lg2, REML=FALSE ) - logLik( sm2, REML=FALSE ))
  })))
}

#' @rdname pb-refdist
PBrefdist.merMod <- function(largeModel, smallModel, nsim=1000, seed=NULL, cl=NULL, details=0){

    t0 <- proc.time()
    if (getME(smallModel, "is_REML"))
        smallModel <- update( smallModel, REML=FALSE )
    if (getME(largeModel, "is_REML"))
        largeModel <- update( largeModel, REML=FALSE )
    
    .is.cluster <- !is.null(cl) && inherits(cl, "cluster")
    if (!.is.cluster){
        ref <- .merMod_refDist(largeModel, smallModel, nsim=nsim, seed=seed)
    } else {
        nsim.cl <- nsim %/% length(cl)
        clusterSetRNGStream(cl)
        ref <- unlist( clusterCall(cl, fun=.merMod_refDist, largeModel, smallModel, nsim=nsim.cl) )
    }
    
    ctime <- (proc.time()-t0)[3]
    attr(ref,"ctime")   <- ctime
    attr(ref,"samples") <- c(nsim=nsim, npos=sum(ref>0))
    if (details>0)
        cat(sprintf("Reference distribution with %5i samples; computing time: %5.2f secs. \n",
                    length(ref), ctime))
    
    ref
}

#' @rdname pb-refdist
PBrefdist.mer <- PBrefdist.merMod















