########################################################
## Copyright (c) 2005
## University of Washington
## Licensed under the terms set forth by University of
## Washington. If you did not sign such a license, you
## are using this software/code illegally and you do not
## have permission to use, modify, or redistribute
## this or any files in this software package.
##
## File: odp.r
## Contains functions for computing the ODP algorithm.
##
## Notes on architecture:
##
## ODP may be calculated in a typical R function from
## the command line, or it may be computed in a
## background thread.
##
## To use the typical R command line, call the statex()
## function, which may take A VERY LONG TIME, giving NO
## FEEDBACK. Calling statex() results in the following
## sequence of function calls:
##
## statex() --> odp.func(background=FALSE)
##    --> odp.start(background=FALSE)
##
## For the threaded version, call odp.func() with
## background=TRUE
##
## odp.func(background=TRUE)
##    --> odp.start(background=TRUE)
##
## In the background thread scenario, odp.func() returns
## immediately, although the calculation continues to run
## "in the background". The function uses a combination
## of Tcl/Tk features and POSIX threads (available ONLY
## within the C library, non-accessible from R) to
## periodically call an update function provided as an
## argument to odp.func(). You may want to check out the
## EDGE GUI code, also in this file, for an example
## of how updates are used to inform the user of the
## calculation's progress.
##
## When the thread finishes, odp.return.func() is called
## once to verify and compile the results. This function
## also calls a user-provided function (provided as an
## argument to odp.func) that can be used (e.g.) to save
## results in a variable or file.
##
#########################################################

require(tcltk, quietly = TRUE, keep.source = FALSE)  || stop("TCLTK support is absent.")
print("includeui.r"); source("includeui.r")
print("include.r"); source("include.r")
print("timecourse.r"); source("timecourse.r")
print("util.r"); source("util.r")
print("thread.r"); source("thread.r")
print("odp.r")
edgedir <- getwd()

## Globals
## Error message for non-finite statistics
gNonFiniteStats = paste("The analysis returned one or more non-finite",
                   "numbers. This probably means there is a systematic",
                   "error in your data resulting in a zero variance",
                   "for a gene.")

#########################################################################
##       This function finds p-values for the static k-sample           
##         fold-change and/or directional differential                  
##         expression problem using the estimated                       
##         ODP rule under a Normal dist'n assumption                    
##       Author(s): John Storey, Alan Dabney, James Dai, Eva Monsen     
##       Language: R                                                    
##       References:                                                    
##       Arguments:                                                     
##         dat - data matrix, rows genes, columns arrays                
##         grp - vector of group labels (2 or more).                    
##         B - number of null permutations.                             
##         match - vector denoting the matched design, if one exists    
##         printmsg - function that prints messages to the user (may be
##           NULL
#########################################################################
statex <- function(dat, grp, match=NULL, B=100, seed=NULL, printmsg=NULL)
{
  result <- odp.func(dat, grp, match, B, seed, printmsg=printmsg,
                     background=FALSE)
  return(result)
}


#########################################################################
## This function runs an ODP score calculation, or starts
## one in a background thread.
## Arguments:
##   dat - the data to work with
##   match -
##   B - the number of iterations
##   seed - seed for the random number generator (NULL indicates that 
##     a random seed should be used)
##   updatefunc - a function to call periodically (approx. every 500 ms)
##     allowing a GUI to check up on the thread's status
##   finalfunc - the function to call when the thread completes
##   printmsg - a function that prints its arguments (to console, gui, 
##     whatever
##   background - if true, starts the ODP calculation in a background
##     thread and returns the thread ID. If false, runs the ODP
##     calculation and returns the results as a list with the following
##     members:
##        lr : the observed statistics
##        lr0 : the null permutation statistics
#########################################################################
odp.func <- function(dat, grp=NULL, match=NULL, B=100, seed=NULL,
                     updatefunc=NULL, finalfunc=NULL,
                     printmsg=NULL,
                     background=TRUE) 
{
  ## Check input parameters
  
  if(!is.matrix(dat))
    stop("dat is not a matrix")
 
  if(is.null(grp))
    grp <- rep(1,ncol(dat))

  if(!is.null(grp) && length(grp) != ncol(dat))
    stop("length of grp must equal number of columns in dat")
  
 

  if(!is.valid.number(B))
    stop("B must be a number")
  
  debug.params()

  if(!is.null(seed)) {
    set.seed(seed)
  }

# gah - alist still preserve NULL list elements
  datalist <- list()
# gah - adding NULL to a list doesn't add anything, so add 0 instead!
  datalist$B <- as.integer(B)
#  if(!is.null(updatefunc)) datalist$updatefunc <- updatefunc
#  else datalist$updatefunc <- 0
#  if(!is.null(finalfunc)) datalist$finalfunc <- finalfunc
#  else datalist$finalfunc <- 0
#  if(!is.null(printmsg)) datalist$printmsg <- printmsg
#  else datalist$printmsg <- 0
  datalist["updatefunc"] <- list(updatefunc)
  datalist["finalfunc"] <- list(finalfunc)
  datalist["printmsg"] <- list(printmsg)

  datalist$grp <- grp
# gah - this has been NULL, and not added
#   datalist$match <- match
  datalist$dat <- dat

  datalist$startTime <- proc.time()[3]
  
  m <- nrow(dat)
  n <- ncol(dat)

  if(!is.null(grp)) {
	grp <- make.consecutive.int(grp)
    k <- length(unique(grp))
  }

  if(is.null(grp) || k==1) {
    grp <- rep(1,n)
    n.grp <- table(grp)
    n.grp <- as.vector(n.grp)
    ID <- matrix(grp,ncol=1)
	k <- 1
  }

  
  if(k>1) {
    ID <- model.matrix(~factor(grp) - 1)
    debug.object(ID)
    n.grp <- table(grp)
    n.grp <- as.vector(n.grp)
  }  

  debug.object(grp)
  debug.object(ID)

  if(!is.null(match)) {
    match <- make.consecutive.int(match)
    if(k!=2) { 
      stop("Cannot do a matched analysis with other than 2 classes.")
    }
    if((n%%2) !=0 ) {
      stop("Number of arrays is not even and you have designated a matched design.")
    }
    if(abs(1-min(sort(match[grp==1])==sort(match[grp==2])))) {
      stop("The matching variable is not correct.")
    }
    xx <- 0*dat[,1:(n/2)]
    for(i in 1:max(match)) {
      xx[,i] <- dat[,grp==2][,match[grp==2]==i] - dat[,grp==1][,match[grp==1]==i]
    }
    dat <- xx
    rm(xx)
    n <- n/2
    grp <- rep(1,n)
    n.grp <- table(grp)
    n.grp <- as.vector(n.grp)
    ID <- matrix(grp,ncol=1)
    k <- 1
  }

  ## If not matching, and if more than one group, center each gene
  if(k>1 && is.null(match)) {
    mu0 <- drop(dat %*% rep(1 / n, n)) 
    dat <- dat - mu0
    ## sigma0: m-vector of "null" sd's
    sigma0 <- drop(sqrt(((dat) ^ 2) %*% rep(1/(n-1), n))) 
  }
  if(k==1) {
    ## sigma0: m-vector of "null" sd's
    sigma0 <- drop(sqrt(((dat) ^ 2) %*% rep(1/n, n)))
  }

  ## xx: m-vector of sums-of-squares 
  xx <- drop((dat ^ 2) %*% rep(1, n))
  ## xk: (m x k) matrix of group-specific sums 
  xk <- dat %*% ID
  ## mu: (m x k) matrix of group-specific means
  mu <- t(t(xk) / n.grp)
  ## sigma: m-vector of pooled sd's 
  sigma <- drop(sqrt(((dat - mu %*% t(ID)) ^ 2) %*% rep(1 / (n - k), n)))

  ## res: (m x n) matrix of residuals
  res <- sqrt(n/(n-k))*(dat - mu %*% t(ID))
  ## vv: (B x n) matrix of indices to use for each bootstrap sample
  vv <- matrix(rep(0,n*B), ncol=n)
  for(i in 1:B) {
    vv[i,] <- sample(1:n, replace=TRUE)
  }
  
  ## nii: m0-vector of indices showing which densities to use in ODP denominator
  nii <- 1:m
  nii <- use.genes(dat=dat, grp=grp)

  ##These should already be defined from above
  ## m: number of genes
  ## n: number of samples
  ## k: number of groups
  ## B: number of bootstrap samples
  
  ## m0: number of densities to use in ODP denominator
  m0 <- length(nii)
  ## set up start data
  datalist$xx <- xx
  datalist$xk <- xk
  datalist$mu <- mu
  datalist$sigma <- sigma
  datalist$sigma0 <- sigma0
  datalist$muBS <- mu[nii, ]
  datalist$sigmaBS <- sigma[nii]
  datalist$sigma0BS <- sigma0[nii]
# add as.integer as appropriate so R will do the conversion
# before calling the C code.
  datalist$ID <- as.integer(ID)
  datalist$n.grp <- as.integer(n.grp)
  datalist$res <- res[nii, ]
# gah - moved up so they get into datalist
#  if(k > 1)
    vv = vv - 1
  nii = nii - 1

  datalist$vv <- as.integer(vv)
  datalist$nii <- as.integer(nii)
# gah - I don't think as.integer was necessary for m, n, k, m0,
# but it doesn't hurt.
  datalist$m <- as.integer(m)
  datalist$n <- as.integer(n)
  datalist$k <- as.integer(k)
#  datalist$B <- B  again?
  datalist$m0 <- as.integer(m0)
# reset startTime here, hopefully more accurate than above.
# this should keep its place in the list.
  datalist$startTime <- proc.time()[3]

# gah - debug datalist
if(debugOn==2) {
   print("print datalist")
   print(datalist)
   }
if(debugOn) print(length(datalist))

  if (background)
    {
      updatefunc <- check.not.nullna(datalist$updatefunc)
      finalfunc <- check.not.nullna(datalist$finalfunc)
      
      ## Start a background thread using odp.start()
      ## Note that endfn=odp.return.func means that odp.return.func()
      ## will be called when the thread finishes.
      ##
      ## Check the thread's progress every 500 ms (1/2 second),
      ## calling updatefunc at each check.
      tid = thread(odp.start, startdata=datalist,
        updatefn=datalist$updatefunc, updatedata=datalist,
        endfn=odp.return.func, enddata=datalist,
        updateInterval=500)
      ## Stop here.
      return(tid)
    }
  else
    {
      ## Call the ODP C function directly.
      result <- odp.start(datalist, background=FALSE)

      ## NaN or NA results invalidate the whole analysis
      if(any(!is.finite(result$lr0)) || any(!is.finite(result$lr))) {
# gah - print lr0 and lr to help find bugs
# this is only for the non-background case, though.
        if(debugOn && any(!is.finite(result$lr0))) {
           print("lr0:")
           print(result$lr0)
        }
        if(debugOn && any(!is.finite(result$lr ))) {
           print("lr:")
           print(result$lr)
        }
        stop(gNonFiniteStats)
      }
  
      result$pvals <- get.pvalues(result$lr, result$lr0)
      
      odpresult <<- result
      return(result)
    }
}

#########################################################################
## This function calls the low-level C function StartODP.
## Arguments:
##   datalist - a list with data needed by the C function.
#########################################################################
odp.start <- function(datalist, background=TRUE)
{
  if(!is.list(datalist))
    stop("datalist argument [passed to odp.start()] is not a list")

  xx <- check.not.nullna(datalist$xx)
  xk <- check.not.nullna(datalist$xk)
  mu <- check.not.nullna(datalist$mu)
  sigma <- check.not.nullna(datalist$sigma)
  sigma0 <- check.not.nullna(datalist$sigma0)
  muBS <- check.not.nullna(datalist$muBS)
  sigmaBS <- check.not.nullna(datalist$sigmaBS)
  sigma0BS <- check.not.nullna(datalist$sigma0BS)
  ID <- check.not.nullna(datalist$ID)
  n.grp <- check.not.nullna(datalist$n.grp)
  res <- check.not.nullna(datalist$res)
  vv <- check.not.nullna(datalist$vv)
  nii <- check.not.nullna(datalist$nii)
  B <- check.not.nullna(datalist$B)
  m0 <- check.not.nullna(datalist$m0)
  
  m = length(xx)
  n = nrow(ID)

# gah - move up so it gets into datalist
#  if(k > 1)
#    vv = vv - 1
#  nii = nii - 1

# gah - debugging data
if(debugOn==3) {
  print(datalist)
  for(i in 1:length(datalist))
    print(paste(i,datalist[i],names(datalist[i]),length(datalist[i])))
  }
#  .Call("showArgs1",datalist);

if(debugOn) savedata <<- datalist;

  if(!is.loaded("RunODP"))
    try.load(paste(edgedir, "/edge", .Platform$dynlib.ext, sep = ""))

##  datalist is passed to RunODP and then to the COdpThread constructor.
##  elements are extracted by position, so one must be careful that
##  datalist elements are added in the same order.  
  result <- .Call("RunODP",
      datalist,
      startThread = as.integer(background)
      )

  return(result)
}

#########################################################################
## This function is used internally. It is called when the background
## ODP thread finishes.
#########################################################################
odp.return.func <- function(tid, progress, datalist)
{
  ## Check whether we were called because someone canceled the thread
  if(as.logical(progress$canceled))
    return("canceled")

  if(!is.list(datalist))
    stop("datalist passed to odp.return.func is not a list")

  ## Unwrap datalist.
  finalfunc <- check.not.nullna(datalist$finalfunc)
  B <- check.not.nullna(datalist$B)
  k <- check.not.nullna(datalist$k)
  mu <- check.not.nullna(datalist$mu)
  printmsg <- check.not.nullna(datalist$printmsg)
 
  result <- check.not.nullna(odp.getresult(tid))

# gah - debug result
if(debugOn) {
   print(cat("runif random number=",runif(1)));
   print(paste("length of lr",length(result$lr)));
   print("result lr[1:100]:")
   print(result$lr[1:100])
   print(paste("sum(lr)",sum(result$lr)))
   print(paste("length of lr0",length(result$lr0)));
   print("result lr0[1:100]:")
   print(result$lr0[1:100])
   print(paste("sum(lr0)",sum(result$lr0)))
   hist(log(result$lr0))
   postscript("/tmp/plot%03d.ps")
   hist(log(result$lr0))
   dev.off()
   }

  datalist$lr <- c(datalist$lr, result$lr)

  ## NaN or NA results invalidate the whole analysis
  if(any(!is.finite(result$lr0)) || any(!is.finite(result$lr))) {
    progress$succeeded <- FALSE
    progress$reason <- gNonFiniteStats
  }
  
  ## lr0 is an m0-row, B-column matrix (where m0 is the
  ## current chunk's number of genes)
  
  ## Concatenate this chunk's null statistics to stats for other
  ## chunks so far
  if(!is.null(datalist$lr0))
    datalist$lr0 <- concat.matrices(datalist$lr0, matrix(result$lr0, ncol=B))
  else
    datalist$lr0 <- matrix(result$lr0, ncol=B)
	
   
  ## Compute p-values for this chunk and concatenate them to the
  ## p-values for other chunks computed so far
  datalist$pvals <- c(datalist$pvals, get.pvalues(result$lr, result$lr0))
  
  ## If no one has set progress$succeeded, assume
  ## successful.
  if(is.null(progress$succeeded))
    progress$succeeded <- TRUE
  
  ## pvals: 1:nrow(dat)
  ## lr: nrow(dat)
  ## lr0: B * nrow(res)
  
  finalfunc(p=datalist$pvals, lr=datalist$lr, lr0=datalist$lr0,
            status=progress)
}

## Calls the C function "GetODPResult"
## to get the result of a background ODP computation.
odp.getresult <- function(tid)
{
  lis <- .Call("GetODPResult", tid=tid)
  return(lis)
}

## Selects which genes to use in the denominator
## of the ODP statistic

use.genes <- function(dat,grp){
  m <- dim(dat)[1]
  if(length(unique(grp)) == 1){
    p <- apply(dat,1,function(x){t.test(x)$p.value})
  }
  else{
    p <- apply(dat,1,function(y){anova(lm(y ~ grp))[1,5]})
  }
  q <- qvalue(p)
  m0 <- round(m*q$pi0)
#  print(m0/m)
  use <- rank(1-p) <= m0
  use <- (1:m)[use]
  return(use)
}
