######################################################################################### # R code to simulate survival data and plot maximum likelihood estimates for lamba # # 25 October 2005 # # Cancer Clinical Trials Working Group # ######################################################################################### ###################################### # Function to simulate survival data # # n = sample size # # lambda = hazard rate # # atime = accrual time (months) # # nsim = number of data sets # ###################################### simarray <- function(n, lambda, atime, nsim) { simdat <- array(NA, dim=c(n, 5, nsim)) for (i in 1:nsim) { s.time <- round(rexp(n,lambda)) accrual.time <- round(runif(n,0,atime)) fu.time <- 60-accrual.time event <- ifelse(fu.time < s.time,0,1) s.time <- ifelse(fu.time < s.time, fu.time, s.time) id <- as.integer(rank(accrual.time, ties.method = "first")) simdat[,,i] <- cbind(id, s.time, accrual.time, fu.time, event) } assign("simdat", simdat, env=.GlobalEnv) } ########################################### # Function to calculate the MLE of lambda # # lmba: sequence of values for lambda # ########################################### getlik <- function(lmba, simdat) { likvals <- matrix(NA, ncol=dim(simdat)[3], nrow=length(lmba)) likmat <- matrix(NA, ncol=dim(simdat)[3], nrow = length(lmba)) mle <- likatmle <- rep(NA, dim(simdat)[3]) for (i in 1:dim(simdat)[3]) { mle[i] <- 1/(sum(simdat[,5,i]*(simdat[,2,i]) + (1 - simdat[,5,i])*simdat[,4,i]))*sum(simdat[,5,i]) likatmle[i] <- round(sum(-1*(simdat[,5,i]*(log(1/mle[i]) + (simdat[,2,i])*mle[i]) + (1 - simdat[,5,i])*mle[i]*simdat[,4,i])), 2) for (j in 1:length(lmba)) { likmat[j,i] <- round(sum(-1*(simdat[,5,i]*(log(1/lmba[j]) + (simdat[,2,i])*lmba[j]) + (1 - simdat[,5,i])*lmba[j]*simdat[,4,i])), 2) } likvals[,i] <- likmat[,i] likmat[,i] <- likmat[,i] - likatmle[i] } assign("likmat", likmat, env=.GlobalEnv) assign("mle", mle, env=.GlobalEnv) return(likvals) } ################################################################ # Get 1/8 support intervals - point where lik function = 0.125 # ################################################################ getints <- function(likmat, mle, lmba) { low <- upp <- rep(NA, length(mle)) for (i in 1:length(mle)) { low[i] <- min(lmba[exp(likmat[,i]) > 0.12 & exp(likmat[,i]) < 0.13]) upp[i] <- max(lmba[exp(likmat[,i]) > 0.12 & exp(likmat[,i]) < 0.13]) } likpars <- cbind(mle, low, upp) likpars <- exp(-likpars*6) assign("likpars", likpars, env=.GlobalEnv) } ################################################### # Format data for the number of people to look at # # Uses data through end of follow-up # # num: Number of patients to evaluate # ################################################### getlookdat <- function(simdat, num, cuttime) { lookdat <- array(NA, dim=c(num, dim(simdat)[2], dim(simdat)[3])) for (i in 1:dim(simdat)[3]) { timdiff <- simdat[,4,i][simdat[,1,i]<=num] - min(simdat[,4,i][simdat[,1,i]<=num]) fu.timen <- timdiff + cuttime eventn <- ifelse(fu.timen < simdat[,2,i][simdat[,1,i]<=num],0,1) s.timen <- ifelse(fu.timen < simdat[,2,i][simdat[,1,i]<=num], fu.timen, simdat[,2,i][simdat[,1,i]<=num]) lookdat[,,i] <- cbind(simdat[,1,i][simdat[,1,i]<=num], s.timen, simdat[,3,i][simdat[,1,i]<=num], fu.timen, eventn) } return(lookdat) } ############################################################## # Plot mle and support intervals for each simulated data set # # The first function (plotmle) plots data at the end of trial# # The 2nd function (plotbounds) plots data at each look # # all.looks: array of the data for every 5 pts. - see below # ############################################################## plotmle <- function(likpars, color, lsim) { plot(range(likpars), range(0:dim(likpars)[1]), type="n", xlab="Survival Probability at 6 Months", ylab="Simulated Data Set") points(likpars[,1], 1:dim(likpars)[1], pch = 19) segments(likpars[,2], 1:dim(likpars)[1], likpars[,3], 1:dim(likpars)[1], col=color) abline(v = lsim, lty=2, col="grey") } plotbounds <- function(all.looks, color, lsim) { par(bg="black", fg="white", col.axis = "white", col.lab="white") plot(range(all.looks[,2:3,]), range(0:dim(all.looks)[1]), type="n", xlab="Survival Probability at 6 Months", ylab="Simulated Data Set") for (i in 1:dim(all.looks)[3]) { segments(all.looks[,2,i], 1:dim(all.looks)[1], all.looks[,3,i], 1:dim(all.looks)[1], col=color[i]) } abline(v = lsim, lty=2, col="grey") } ##################################################################################################################################### # Try an example # 50 data sets simarray(40, 0.0594, 36, 50) lmba <- seq(0.00001, 0.3, by = 0.00001) getlik(lmba, simdat) getints(likmat, mle, lmba) likpars[,2][likpars[,1]==0] <- min(lmba) # Replaces interval to cover whole line in case of no events # likpars[,3][likpars[,1]==0] <- max(lmba) plotmle(likpars, "black", 0.70) # 50 data sets - longer accrual time simarray(40, 0.0594, 48, 50) lmba <- seq(0.00001, 0.3, by = 0.00001) getlik(lmba, simdat) getints(likmat, mle, lmba) plotmle(likpars, "black", 1/100) # 50 data sets - shorter accrual time simarray(40, 0.0594, 24, 50) lmba <- seq(0.00001, 0.3, by = 0.00001) getlik(lmba, simdat) getints(likmat, mle, lmba) plotmle(likpars, "black", 1/100) # 50 data sets - N=20 simarray(20, 1/100, 36, 50) lmba <- seq(0.0001, 0.08, by = 0.00001) getlik(lmba, simdat) getints(likmat, mle, lmba) plotmle(likpars, "black", 1/100) # Plot likelihood functions for the data sets # plot(range(c(0.3, 1)), range(0:1), type="n", xlab="Survival Probability at 6 Months", ylab="Likelihood Function") choices <- colors() color = sample(choices,25, replace = F) for (i in 1:50) { lines(exp(-1*lmba*6), exp(likmat40[,i]), col=color[i]) } # For each of 50 data sets, start looking at intervals of 5 people at 6 months simdat5 <- getlookdat(simdat, 5, 6) simdat10 <- getlookdat(simdat, 10, 6) simdat15 <- getlookdat(simdat, 15, 6) simdat20 <- getlookdat(simdat, 20, 6) simdat25 <- getlookdat(simdat, 25, 6) simdat30 <- getlookdat(simdat, 30, 6) simdat35 <- getlookdat(simdat, 35, 6) simdat40 <- getlookdat(simdat, 40, 6) likvals5 <- getlik(lmba, simdat5) likmat5 <- likmat likpars5 <- getints(likmat5, mle, lmba) likpars5[,2][likpars5[,1]==1] <- 0 # Replaces interval to cover whole line in case of no events # likpars5[,3][likpars5[,1]==1] <- 1 likvals10 <- getlik(lmba, simdat10) likmat10 <- likmat likpars10 <- getints(likmat10, mle, lmba) likpars10[,2][likpars10[,1]==1] <- 0 likpars10[,3][likpars10[,1]==1] <- 1 likvals15 <- getlik(lmba, simdat15) likmat15 <- likmat likpars15 <- getints(likmat15, mle, lmba) likpars15[,2][likpars15[,1]==1] <- 0 likpars15[,3][likpars15[,1]==1] <- 1 likvals20 <- getlik(lmba, simdat20) likmat20 <- likmat likpars20 <- getints(likmat20, mle, lmba) likvals25 <- getlik(lmba, simdat25) likmat25 <- likmat likpars25 <- getints(likmat25, mle, lmba) likvals30 <- getlik(lmba, simdat30) likmat30 <- likmat likpars30 <- getints(likmat30, mle, lmba) likvals35 <- getlik(lmba, simdat35) likmat35 <- likmat likpars35 <- getints(likmat35, mle, lmba) likvals40 <- getlik(lmba, simdat40) likmat40 <- likmat likpars40 <- getints(likmat40, mle, lmba) # Plot one data set first # plot(range(c(likpars5[1,], likpars10[1,], likpars15[1,])), range(0:9), yaxt="n", type="n", xlab="Survival Probability at 6 Months", ylab="Number of patients used to estimate MLE") points(likpars5[1,1], 1, pch=19) points(likpars10[1,1], 2, pch=19) points(likpars15[1,1], 3, pch=19) points(likpars20[1,1], 4, pch=19) points(likpars25[1,1], 5, pch=19) points(likpars30[1,1], 6, pch=19) points(likpars35[1,1], 7, pch=19) points(likpars40[1,1], 8, pch=19) segments(likpars5[1,2], 1, likpars5[1,3], 1) segments(likpars10[1,2], 2, likpars10[1,3], 2) segments(likpars15[1,2], 3, likpars15[1,3], 3) segments(likpars20[1,2], 4, likpars20[1,3], 4) segments(likpars25[1,2], 5, likpars25[1,3], 5) segments(likpars30[1,2], 6, likpars30[1,3], 6) segments(likpars35[1,2], 7, likpars35[1,3], 7) segments(likpars[1,2], 8, likpars[1,3], 8) axis(2, at=c(1:8), c(5,10,15,20,25,30,35,40)) abline(v=exp(-0.0594*6), lty=2, col="grey") # Use plotbounds function to overlay interval estimates at every 5 patients all.looks <- array(c(likpars5, likpars10, likpars15, likpars20, likpars40), dim=c(50,3,5)) pdf("Support bounds plot 50 sims.pdf", height = 8, width=11) plotbounds(all.looks, c("white", "red", "blue", "yellow", "green"), exp(-0.0594*6)) legend(-0.05, 12, col=c("black", "white", "red", "blue", "yellow", "green"), lty=rep(1, 6), c("Estimate MLE after:", "5 patients", "10 patients", "15 patients", "20 patients", "40 patients"), bty="n", cex=.85, fill="black") dev.off() plotbounds(all.looks, c("white", "red", "aquamarine", "yellow"), exp(-0.0594*6)) legend(0.06, 35, col=c("black", "white", "red", "aquamarine", "yellow"), lty=rep(1, 5), c("Estimate MLE after:", "5 patients", "10 patients", "15 patients", "20 patients"), bty="n", cex=.85) # Calculate likelihood ratios for survival probabilities = 0.70 and 0.45 # # N = 40 has 90% power to detect 70 % v 45 % 6 month survival # # 0.70 => lambda = 0.0594 => lmba value 5940 # 0.45 => lambda = 0.13308 => lmba value 13308 lr5 <- lr10 <- lr15 <- lr20 <- lr25 <- lr30 <- lr35 <- lr40 <- rep(NA, 50) for (i in 1:50) { lr5[i] <- exp(likvals5[5940,i])/exp(likvals5[13308,i]) lr10[i] <- exp(likvals10[5940,i])/exp(likvals10[13308,i]) lr15[i] <- exp(likvals15[5940,i])/exp(likvals15[13308,i]) lr20[i] <- exp(likvals20[5940,i])/exp(likvals20[13308,i]) lr25[i] <- exp(likvals25[5940,i])/exp(likvals25[13308,i]) lr30[i] <- exp(likvals30[5940,i])/exp(likvals30[13308,i]) lr35[i] <- exp(likvals35[5940,i])/exp(likvals35[13308,i]) lr40[i] <- exp(likvals40[5940,i])/exp(likvals40[13308,i]) } # Use Freq. approach and calculate CI for each data set, using data once last patient reaches 6 months # bound <- rep(NA, 50) #freqdat <- simdat40 freqvals <- getlik(lmba, freqdat) freqmat <- likmat freqpars <- getints(freqmat, mle, lmba) for (i in 1:50) { if(is.element(6, summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$time) == "TRUE") { bound[i] <- summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$lower[summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$time == 6] } if(is.element(5, summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$time) == "TRUE") { bound[i] <- summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$lower[summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$time == 5] } if(bound[i]=="NA") { bound[i] <- summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$lower[summary(survfit(Surv(freqdat[,2,i], freqdat[,5,i])))$time == 4] } } b <- round(bound, digits=2) plotmle(freqpars, "black", 0.7) dataset <- 1:50 r45 <- dataset[1*(bound < 0.45) == 1] r50 <- dataset[1*(bound < 0.50) == 1] segments(freqpars[r50,2], r50, freqpars[r50,3], r50, col="red") segments(freqpars[r45,2], r45, freqpars[r45,3], r45, col="blue") par(bg="black", fg="white", col.axis = "white", col.lab="white") plot(range(1:50), range(lr25), xlab="Simulated Data Set", ylab="Likelihood Ratio", ylim=c(0,50)) points(1:50, lr5, pch=19, col="white") abline(h = 8, lty=2, col="white") points(1:50, lr10, pch=19, col="red") points(1:50, lr15, pch=19, col="blue") points(1:50, lr20, pch=19, col="yellow") points(1:50, lr40, pch=19, col="green") legend(0,38,c("5 patients", "10 patients", "15 patients", "20 patients", "40 patients"), pch=rep(19,5), col=c("white", "red", "blue", "yellow", "green"), cex=.7, adj=0, x.intersp=.5, text.width=2.75)