biasestimate.R

#####################################################################################################################################################################################################################################################################################################################################################

#' Simulation study for evaluating the accuracy of the bias estimate using the mean bias estimate across the simulated scenarios, the mean squared error and the mean half length of the corresponding confidence interval of the estimate

#####################################################################################################################################################################################################################################################################################################################################################

#Libraries
library(MASS)
library(matlib)


#' @param N total sample size
#' @param K number of strata
#' @param m number of endpoints
#' @param balancing the balancing of the total sample across the strata, i.e. in the case of balanced strata balancing=rep(1,K)
#' @param mu_E expected value of the experimental group
#' @param mu_C expected value of the control group
#' @param Sigma covariance matrix of the patient responses regarding the m endpoints
#' @param eta Kxm matrix containing the endpoint and strata specific biasing factors
#' @param randomization one of the randomization procedures "RAR", "PBR",  "BSD", "MP", "EBC" (default parameter for this randomization procedures: PBR(4), BSD(3), EBC(0.67), MP(3))
#' @param seed the seed of the sample
#' @return A list that consists a sample of biased patient responses, the chosen randomization list, the signs of the biasing factor corresponding to the chosen randomization list and the seed for this simulation


stratsamples<-function(N, K, m, balancing,mu_E,mu_C,Sigma,eta, randomization,BSD_par=3,EBC_par=0.67,MP_par=3,PBR_par=4,seed){
  
  #Check input variables
  if(N<=0 | K<=0 ){
    stop('Invalid number for N, m')
  }
  if(length(balancing)!=K)
  {
    stop('Invalid length of balancing vector')
  }
  
  
  samplerand<-list()
  biasseq<-list()
  sample<-list()
  bias_neutral<-list()
  
  #sample size per center
  sizepercenter=N/sum(balancing)*balancing
  
  
  #simulating patient responses under bias
  if(K==1){  
    
    testcondition=1
    
    while(testcondition==1){
      
      seed=seed+1
      set.seed(seed)
      
      #simulating a stratified randomization list
      if(randomization=='BSD'){
        Rand<- bsdPar(sizepercenter, BSD_par, groups = LETTERS[1:2])
        myPar<- genSeq(Rand,1)
        Rand_list<- getRandList(myPar)
      }else if(randomization=='EBC'){
        Rand<-ebcPar(sizepercenter, EBC_par, groups = LETTERS[1:2])
        myPar<- genSeq(Rand,1)
        Rand_list<- getRandList(myPar)
      }else if(randomization=='MP'){
        Rand<-mpPar(sizepercenter ,mti=MP_par, ratio = rep(1, 2), groups = LETTERS[1:2])
        myPar<- genSeq(Rand,1)
        Rand_list<- getRandList(myPar)
      }else if(randomization=='PBR'){
        if(sizepercenter %% PBR_par==0){
          Rand<-pbrPar(rep(PBR_par, times= sizepercenter/PBR_par), K = 2, groups = LETTERS[1:2])
          myPar<- genSeq(Rand,1)
          Rand_list<- getRandList(myPar)
        }else{
          stop("Size of one center cannot be divided by PBR_par")
        }
      }else if(randomization=='RAR'){
        Rand<-rarPar(sizepercenter,2,groups = LETTERS[1:2])
        myPar<- genSeq(Rand,1)
        Rand_list<- getRandList(myPar)
      }else{
        stop("Randomization procedure is not defined")
      }
      
      
      #exclude the randomisation lists for which the OLS estimator does not exist
      if (sum(Rand_list[1,]=="A")==length(Rand_list[1,])){
        testcondition=1
      }
      else if(sum(Rand_list[1,]=="B")==length(Rand_list[1,])){
        testcondition=1
      }
      else if(sum(Rand_list[1, seq(2, length(Rand_list[1,]), by = 2)]=="A")==sizepercenter/2){
        testcondition=1
      }
      else if(sum(Rand_list[1, seq(2, length(Rand_list[1,]), by = 2)]=="B")==sizepercenter/2){
        testcondition=1
      }
      else{
        testcondition=0}
    }
    
    #determine the biasing factor to the corresponding randomization list
    bias_neutral<-append(bias_neutral,list(getExpectation(myPar, selBias("CS", 1, "exact"))))
    bias<-eta[1,]%*%getExpectation(myPar, selBias("CS", 1, "exact"))
    samplerand<-append(samplerand,list(Rand_list))
    
    # determine biased patient responses
    sample<-append(sample,list(sapply(1:sizepercenter,function(x){if(Rand_list[1,x]=="A")
    {a=mvrnorm(1,mu_E+bias[,x],Sigma)}
      else{a=mvrnorm(1,mu_C+bias[,x],Sigma)}
      a})))
    
    
    }else if(K>1){
      for (i in 1:K){
      
        testcondition=1
      
        while(testcondition==1){
        
          seed=seed+1
          set.seed(seed)
        
          #simulating a stratified randomization list
          if(randomization=='BSD'){
            Rand<- bsdPar(sizepercenter[i], BSD_par, groups = LETTERS[1:2])
            myPar<- genSeq(Rand,1)
            Rand_list<- getRandList(myPar)
          }else if(randomization=='EBC'){
            Rand<-ebcPar(sizepercenter[i], EBC_par, groups = LETTERS[1:2])
            myPar<- genSeq(Rand,1)
            Rand_list<- getRandList(myPar)
          }else if(randomization=='MP'){
            Rand<-mpPar(sizepercenter[i] ,mti=MP_par, ratio = rep(1, 2), groups = LETTERS[1:2])
            myPar<- genSeq(Rand,1)
            Rand_list<- getRandList(myPar)
          }else if(randomization=='PBR'){
            if(sizepercenter[i] %% PBR_par==0){
              Rand<-pbrPar(rep(PBR_par, times= sizepercenter[i]/PBR_par), K = 2, groups = LETTERS[1:2])
              myPar<- genSeq(Rand,1)
              Rand_list<- getRandList(myPar)
            }else{
              stop("Size of one center cannot be divided by PBR_par")
            }
          }else if(randomization=='RAR'){
            Rand<-rarPar(sizepercenter[i],2,groups = LETTERS[1:2])
            myPar<- genSeq(Rand,1)
            Rand_list<- getRandList(myPar)
          }else{
            stop("Randomization procedure is not defined")
          }
          
          
          #exclude the randomisation lists for which the OLS estimator does not exist
          if (sum(Rand_list[1,]=="A")==length(Rand_list[1,])){
            testcondition=1
          }
          else if(sum(Rand_list[1,]=="B")==length(Rand_list[1,])){
            testcondition=1
          }
          else if(sum(Rand_list[1, seq(2, length(Rand_list[1,]), by = 2)]=="A")==sizepercenter[i]/2){
            testcondition=1
          }
          else if(sum(Rand_list[1, seq(2, length(Rand_list[1,]), by = 2)]=="B")==sizepercenter[i]/2){
            testcondition=1
          }
          else{
            testcondition=0}
        }
      
      
      #determine the biasing factor to the corresponding randomization list
      bias_neutral<-append(bias_neutral,list(getExpectation(myPar, selBias("CS", 1, "exact"))))
      bias<-eta%*%getExpectation(myPar, selBias("CS", 1, "exact"))
      samplerand<-append(samplerand,list(Rand_list))
      
      # determine a sample of biased patient responses
      sample<-append(sample,list(sapply(1:sizepercenter[i],function(x){if(Rand_list[1,x]=="A")
      {a=mvrnorm(1,mu_E+bias[,x],Sigma)}
        else{a=mvrnorm(1,mu_C+bias[,x],Sigma)}
        a})))
      
    }
  }
  
  
  samplebias<-list(sample,samplerand,bias_neutral,seed)
  
  #return a sample of biased patient responses, the chosen randomization list, the signs of the biasing factor corresponding to the chosen randomization list and the seed for this simulation
  return(samplebias)
}


#' @param N total sample size
#' @param K number of strata
#' @param m number of endpoints
#' @param balancing the balancing of the total sample across the strata, i.e. in the case of balanced strata balancing=rep(1,K)
#' @param mu_E expected value of the experimental group
#' @param mu_C expected value of the control group
#' @param Sigma covariance matrix of the patient responses regarding the m endpoints
#' @param eta Kxm matrix containing the endpoint and strata specific biasing factors
#' @param randomization one of the randomization procedures "RAR", "PBR",  "BSD", "MP", "EBC" (default parameter for this randomization procedures: PBR(4), BSD(3), EBC(0.67), MP(3))
#' @param r_number number of randomization lists for that the bias estimates computed
#' @return a dataframe with variables @param N, @param K, @param m, @param eta, @param randomization, mean squared error, mean half length and mean of the bias estimates across the different considered randomization lists


biasestimation<-function(N, K, balancing,m,Sigma, mu_E,mu_C,eta, randomization,r_number,BSD_par=3,EBC_par=0.67,MP_par=3,PBR_par=4){
  
  
  est<-matrix( ,nrow=K,ncol=r_number)
  lengthCI<-matrix( ,nrow=K,ncol=r_number)
  seed=0
  
  for(j in 1:r_number){
    
    
    #simulating samples of biased patient responses
    samplebias<-stratsamples(N, K,m, balancing,mu_E,mu_C,Sigma,eta, randomization,BSD_par=3,EBC_par=0.67,MP_par=3,PBR_par=4,seed=seed)
    
    
    sample<-samplebias[[1]]
    samplerand<-samplebias[[2]]
    biasseq<-samplebias[[3]]
    seed<-samplebias[[4]]
    
    
    for( i in 1:K){
      
      #computing the OLS estimator of the aggregated bias effects across endpoints per strata
      Yj=colSums(sample[[i]])
      
      
      t_E=numeric(0)
      t_C=numeric(0)
      for (k in samplerand[[i]]){
        if(k=="A"){
          t_E=c(t_E,1)
          t_C=c(t_C,0)
        }
        else{
          t_C=c(t_C,1)
          t_E=c(t_E,0)
        }
      }
      
      B<-matrix(c(t_E,t_C,biasseq[[i]][1,]),nrow=N,ncol=3)
      
      par=inv(t(B)%*%B)%*%t(B)%*%Yj
      
      # bias estimator
      est[i,j]= par[3,1]
      
      #half length of the confidence interval of the bias estimate
      lengthCI[i,j]=qt(1-0.05/2,length(samplerand[[i]])-3)*sqrt((t(Yj-B%*%par)%*%(Yj-B%*%par)*inv(t(B)%*%B)[3,3])/(length(samplerand[[i]])-3))
    }
  }
  
  # mean estimate across all considered randomization lists
  meanest=rowMeans(est)
  
  eta_correct=matrix(rep(rowSums(eta),r_number),nrow=K,ncol=r_number)
  
  # mean squared error
  MSE=rowMeans((est-eta_correct)^2)
  
  #mean half length of the confidence interval of the bias estimates
  CI=rowMeans(lengthCI)
  
  
  out<-data.frame(samplesize=N,number_of_strata=K,number_of_endpoints=m,biasfactor=I(list(eta)), rand=randomization, MSE=MSE, lengthCI=CI,meanest=meanest)
  return(out)
  
}

biasestimation(64,  1, c(1),2,diag(2),rep(0,2),rep(0,2), matrix(rep(0*0.445,2),nrow=1,ncol=2),"PBR",10)

####################################################################################################################################################################################################################################

#' Simulations of the simulation study outlined in the manuscript "A bias-adjusted analysis of stratified clinical trials with multi-component endpoints using the Wei-Lachin test" regarding the accuracy of the bias estimation

######################################################################################################################################################################################################################################

#set the file path to save the simulation output
setwd("/file pathway...")
file.create("biasestimation.csv")


#' Note ∆_{N,K,m} denotes the effect size that corresponds to an 80% power of the stratified Wei-Lachin test in trials with N patients, m endpoints and K strata (∆_{32,2,2} = 0.64, ∆_{32,2,4} = 0.46, ∆_{32,4,2} = 0.66 and ∆_{32,4,4} = 0.47)
effectsizes=c(0.64,0.45,0.44,0.32)
effectsizes=matrix(effectsizes,ncol=2,nrow=2, byrow=T)


#' number of endpoint components
m=c(2,4)


#' number of strata
K=1

#'sample size
N=c(32,64)

#' the common biasing factor for each strata and endpoint component is chosen as proportion (5%,10%) of the effect size that corresponds to a nominal power of 80% of the stratified Wei-Lachin
p=c(0,0.1,1)

#' analyzed stratified randomization procedures
rand=c('RAR','BSD','MP','EBC','PBR')


for(e in 1:length(m)){  
  
  for (a in rand){
    
    for(b in 1:length(N)){
      
      for (z in p) {
        
        write.table(biasestimation(N[b],  K, rep(1,K),m[e],diag(m[e]),rep(effectsizes[b,e],m[e]),rep(0,m[e]), matrix(rep(z*effectsizes[b,e],K*m[e]),nrow=K,ncol=m[e]),a,100000),"biasestimation.csv", append=TRUE ,row.names=FALSE,col.names = FALSE))
        
        
      }
    }
  }
}