See slide 18 for the simulation designs.
A. Set the population
set.seed(123321)
nrep <- 10000
dat <- matrix(c(0:10, 0:10 + 1, 0:10 + 2, 0:10 + 3, 0:10 + 4), nrow=5, byrow=TRUE)
n_all <- length(dat)
n_group <- ncol(dat)
n_each <- nrow(dat)
B. Set the sampling design
usedgroup <- 2
usedeach <- 2
usedall <- usedgroup * usedeach
C. Set the function to calculate population standard errors
sdp <- function(x) sqrt(mean((x-mean(x))^2))
D. Set the matrices to save the means and estimated standard errors from each replication.
resultmean <- matrix(NA, nrep, 2)
resultse <- matrix(NA, nrep, 2)
E. Draw two samples: Simple random sampling and multistage sampling. Then calcuate means and estimated standard errors from each sampling method.
for(i in 1:nrep) {
# Simple Random Sampling
index <- sample(1:n_all, usedall)
tempdat1 <- dat[index]
resultmean[i, 1] <- mean(tempdat1)
resultse[i, 1] <- sd(tempdat1)/sqrt(usedall)
# Multistage Random Sampling
index_group <- sample(1:n_group, usedgroup)
tempdat2g1 <- dat[sample(1:n_each, usedeach), index_group[1]]
tempdat2g2 <- dat[sample(1:n_each, usedeach), index_group[2]]
tempdat2 <- c(tempdat2g1, tempdat2g2)
resultmean[i, 2] <- mean(tempdat2)
resultse[i, 2] <- sd(tempdat2)/sqrt(usedall)
}
F. Find the averaged estimates, actual standard errors of the estimates, and the averaged estimated standard errors.
apply(resultmean, 2, mean)
## [1] 7.009575 7.018175
apply(resultmean, 2, sdp)
## [1] 1.671994 2.191452
apply(resultse, 2, mean)
## [1] 1.655730 1.400379
G. Plot the sampling distributions.
hist(resultmean[,1], xlim=c(0, 14), main="Samp Dist of M (Simple)", xlab="M")
abline(v=7, col="red")
hist(resultmean[,2], xlim=c(0, 14), main="Samp Dist of M (Multistage)", xlab="M")
abline(v=7, col="red")
H. Calculate biases.
apply(resultmean, 2, mean) - 7 # Bias
## [1] 0.009575 0.018175
(apply(resultmean, 2, mean) - 7)/7 # Relative Bias
## [1] 0.001367857 0.002596429
apply(resultse, 2, mean) - apply(resultmean, 2, sdp) # Bias in SE
## [1] -0.01626427 -0.79107295
(apply(resultse, 2, mean) - apply(resultmean, 2, sdp))/apply(resultmean, 2, sdp) # Relative Bias in SE
## [1] -0.009727469 -0.360981131
See slide 24 for the simulation designs.
A. Set the population
set.seed(123321)
nrep <- 10000
dat <- matrix(c(0:10, 0:10 + 3, 0:10 + 4, 0:10 + 5, 0:10 + 8), nrow=5, byrow=TRUE)
n_all <- length(dat)
n_group <- ncol(dat)
n_each <- nrow(dat)
B. Set the sampling design
usedgroup <- 2
usedeach <- 2
C. Set the function to calculate population standard errors
sdp <- function(x) sqrt(mean((x-mean(x))^2))
D. Set the matrices to save the means and estimated standard errors from each replication.
resultmean <- matrix(NA, nrep, 2)
resultse <- matrix(NA, nrep, 2)
E. Draw two samples:
for(i in 1:nrep) {
# Single step
index <- sample(1:n_group, usedgroup)
tempdat1 <- dat[3, index]
resultmean[i, 1] <- mean(tempdat1)
resultse[i, 1] <- sd(tempdat1)/sqrt(usedgroup)
# Double Step
tempdat2m1 <- mean(dat[sample(1:n_each, usedeach), index[1]])
tempdat2m2 <- mean(dat[sample(1:n_each, usedeach), index[2]])
tempdat2 <- c(tempdat2m1, tempdat2m2)
resultmean[i, 2] <- mean(tempdat2)
resultse[i, 2] <- sd(tempdat2)/sqrt(usedgroup)
}
F. Find the averaged estimates, actual standard errors of the estimates, and the averaged estimated standard errors.
apply(resultmean, 2, mean)
## [1] 9.016600 9.020725
apply(resultmean, 2, sdp)
## [1] 2.117776 2.384508
apply(resultse, 2, mean)
## [1] 1.997800 2.136925
G. Plot the sampling distributions.
hist(resultmean[,1], xlim=c(2, 16), main="Samp Dist of M (Single)", xlab="M", breaks=11)
abline(v=9, col="red")
hist(resultmean[,2], xlim=c(2, 16), main="Samp Dist of M (Double)", xlab="M", breaks=11)
abline(v=9, col="red")
H. Calculate biases.
apply(resultmean, 2, mean) - 9 # Bias
## [1] 0.016600 0.020725
(apply(resultmean, 2, mean) - 9)/9 # Relative Bias
## [1] 0.001844444 0.002302778
apply(resultse, 2, mean) - apply(resultmean, 2, sdp) # Bias in SE
## [1] -0.1199758 -0.2475826
(apply(resultse, 2, mean) - apply(resultmean, 2, sdp))/apply(resultmean, 2, sdp) # Relative Bias in SE
## [1] -0.05665181 -0.10382967