## Set the design parameters for the study

The first stage is to set the number of disabled and non-disabled children that will be recruited overall, and the number of clusters.

library(openxlsx)
library(dplyr)
# Load the number of people enumerated in each cluster
N[,3] <- round(N[,2]*(2000/sum(N[,2])),0) # back-calc for the proportion of CWD in each cluster
N[,4] <- round(N[,2]*(2000/(sum(N[,2]*0.05))),0) # back-calc for the total children in each cluster
colnames(N) <- c('cluster','N','N_cwd','N_c')
head(N)
##   cluster    N N_cwd  N_c
## 1       1 2567    42  843
## 2       2 3471    57 1140
## 3       3 2165    36  711
## 4       4 2405    39  790
## 5       5 2395    39  786
## 6       6 3032    50  996
# n <- N[,3] # total number of disabled children
#
# d <- matrix(,nrow=n*2, ncol=4) # Create data
# d[,1] <- rep(c(0,1),times=c(n,n)) # Code disabled, and non-disabled (1,0)
# c <- 40 # total number of clusters
# d[,2] <- rep(rep(seq(1,c,1), times=rep(n/c, times=c)),times=2)

## Set assummed population-level parameters

Next, we set the population-level parameters, e.g.Â the â€˜trueâ€™ parameter values, for each cluster. In this case, that means the true proportion attending school for disabled and non-disabled children.

# Enrolment
e0 <- c(.7) # enrolment of all children
e1 <- c(.5) # enrolment of disabled children
k <- c(.01) # Set intercluster coefficient of variation in enrollment levels
var_b0 <- k/e0 # Calculate the variance of the cluster proportions enrolled
var_b1 <- k/e1 # Calculate the variance of the cluster proportions enrolled
# Fix true cluster-level means of school enrollment
rnorm_0_1 <- function(p, v){
r <- 1
while (r>=1 | r<=0){
r <- round(rnorm(1, p, v^.5),2)
}
return(r)
}

N$e_0 <- apply(N, 1, function(x) rnorm_0_1(e0, var_b0)) N$e_1 <- apply(N, 1, function(x) rnorm_0_1(e1, var_b1))

# True relationship between enrollment and treatment in school-treatment arm
a0 <- c(.9) # 90% of enrolled children are in school and treated
a1 <- c(.8) # 80% of enrolled children with disabilities are in school and treated
# True proportions treated by the intervention
k <- c(.01) # Set intercluster coefficient of variation in attendance if enrolled
var_b0 <- k/a0 # Calculate the variance of the cluster attendance if enrolled
var_b1 <- k/a1 # Calculate the variance of the cluster attendance if enrolled
# Fix the true cluster-level means of the attendance at school if enrolled
N$a_0 <- apply(N, 1, function(x) rnorm_0_1(a0, var_b0)) N$a_1 <- apply(N, 1, function(x) rnorm_0_1(a1, var_b1))

# Allocate half clusters to treatment and half to control
N$allocation <- sample(c(rep(0,length.out=(nrow(N)/2)),rep(1,length.out=nrow(N)/2))) # Proportion treated in each cluster == # School-only arm: proportion enrolled * proportion in school if enrolled N$t_0 <- N$e_0 * N$a_0 # all children
N$t_1 <- N$e_1 * N$a_1 # children with disabilities # Community arm: set at 98% with small se t_comm <- .98 var_t_comm <- k/t_comm N$t_1[N$allocation == 1] <- apply(N[N$allocation == 1,], 1, function(x) rnorm_0_1(t_comm, var_t_comm))
N$t_0[N$allocation == 1] <- apply(N[N$allocation == 1,], 1, function(x) rnorm_0_1(t_comm, var_t_comm)) ## Generate enrolment outcome Finally, we take 1000 samples from the population, essentially simulating doing the planned study 1000 times. # Number of treated children overall, and number of treated disabled children i <- 1 t_0 <- matrix(, ncol=0, nrow=nrow(N)) t_1 <- matrix(, ncol=0, nrow=nrow(N)) while (i<=1000){ t_0 <- cbind(t_0, apply(N, 1, function(x) sum(sample(c(0,1),size=x[2],prob=c(1-x['t_0'],x['t_0']),replace=T)))) t_1 <- cbind(t_1, apply(N, 1, function(x) sum(sample(c(0,1),size=x[3],prob=c(1-x['t_1'],x['t_1']),replace=T)))) i <- i+1 } ## Conduct analysis on simulated data Using the 1000 simulations, we can conduct whatever analysis we want and see how the estiamtes vary from one simulated sample to the next. For example, we can estimate the proportion attending and the difference between the proportions for disabled and non disabled children. # Produce proportions treated in CWD and in children without disabilities # First need to subtract the number od disabled children treated from the number of non-disabled children treated t_0 <- t_0 - t_1 # And the denominators N_0 <- N[,2] - N[,3] # Calculate the proportions t_0_p <- t_0 / N_0 t_1_p <- t_1 / N[,3] # Append the allocation t_0_p <- cbind(t_0_p, N$allocation)
t_1_p <- cbind(t_1_p, N$allocation) # Disparity between all children and CWD diff_p <- t_0_p[,1:(ncol(t_0_p)-1)] - t_1_p[,1:(ncol(t_1_p)-1)] diff_p <- cbind(diff_p, N$allocation)

# Calculate sample enrolment figures
p0_sc   <- as.data.frame(t_0_p) %>% filter(V1001==0) %>% select(-V1001) %>% apply(2, mean)
p1_sc   <- as.data.frame(t_1_p) %>% filter(V1001==0) %>% select(-V1001) %>% apply(2, mean)
p0_comm <- as.data.frame(t_0_p) %>% filter(V1001==1) %>% select(-V1001) %>% apply(2, mean)
p1_comm <- as.data.frame(t_1_p) %>% filter(V1001==1) %>% select(-V1001) %>% apply(2, mean)

# Median disparity in each arm
diff_sc <- as.data.frame(diff_p) %>% filter(V1001==0) %>% select(-V1001) %>% apply(2, mean)
diff_comm <- as.data.frame(diff_p) %>% filter(V1001==1) %>% select(-V1001) %>% apply(2, mean)

# Difference in the disparity between the arms
diff_in_disp <- diff_sc - diff_comm

We can visualise the level of precision by plotting the results from all 1000 samples. Here we have the proportion in school for children with disabilities, children without disabilities (controls), and the difference between these for each run of the simulation.

## Proportions treated in children without disability

hist(p0_sc, xlim=c(0,1),ylim=c(0,100), main='', xlab='R or RD', freq=F)
hist(p0_comm, add =T, freq=F)

## Proportions treated in children with disability

hist(p1_sc, xlim=c(0,1),ylim=c(0,70), main='',
xlab='R or RD', freq=F)
y=c(50),labels=c('Diff-in-disp.', 'p CWD in SC','p CWD in Comm'))