# R for simulation study # Simple Regression example lm(complaints ~ advance, data = attitude) out <- lm(complaints ~ advance, data = attitude) names(out) out[["coefficients"]] summary(out) summaryout <- summary(out) names(summaryout) summary(out)[["coefficients"]] pvalue <- summary(out)[["coefficients"]][2, 4] est <- summary(out)[["coefficients"]][2, 1] se <- summary(out)[["coefficients"]][2, 2] # n <- 200 b <- 0.5 x <- rnorm(n, 0, 1) e <- rnorm(n, 0, sqrt(1 - b^2)) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) summary(out)[["coefficients"]][2, c(1, 2, 4)] # Run multiple times set.seed(123321) nrep <- 1000 n <- 200 b <- 0.5 result <- matrix(NA, nrep, 3) for(i in 1:nrep) { x <- rnorm(n, 0, 1) e <- rnorm(n, 0, sqrt(1 - b^2)) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)] } sig <- result[,3] < 0.05 result <- cbind(result, sig) colnames(result) <- c("est", "se", "p", "sig") colMeans(result) mean(result[,1]) - b # bias in parameter estimates (mean(result[,1]) - b) / b # relative bias in parameter estimates mean(result[,2]) - sd(result[,1]) # bias in standard errors (mean(result[,2]) - sd(result[,1])) / sd(result[,1]) # relative bias in standard errors mean(result[,3] < 0.05) # let's try to manipulate n: seq(50, 200, 50) and b: seq(0, 0.5, 0.1) set.seed(123321) nrep <- 1000 bs <- seq(0, 0.5, 0.1) ns <- seq(50, 200, 50) ovresult <- NULL for(k in 1:length(bs)){ b <- bs[k] for(j in 1:length(ns)) { n <- ns[j] result <- matrix(NA, nrep, 3) for(i in 1:nrep) { x <- rnorm(n, 0, 1) e <- rnorm(n, 0, sqrt(1 - b^2)) y <- b*x + e dat <- data.frame(x = x, y = y) out <- lm(y ~ x, data = dat) result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)] } result <- cbind(b, n, result) ovresult <- rbind(ovresult, result) } } sig <- ovresult[,5] < 0.05 ovresult <- cbind(ovresult, sig) colnames(ovresult) <- c("b", "n", "est", "se", "p", "sig") aggresult <- aggregate(cbind(est, se, sig) ~ n + b, data = ovresult, FUN = mean) # average aggsdresult <- aggregate(est ~ n + b, data = ovresult, FUN = sd) # sd biasest <- aggresult[,"est"] - aggresult[,"b"] relbiasest <- biasest / aggresult[,"b"] biasse <- aggresult[,"se"] - aggsdresult[,"est"] relbiasse <- biasse / aggsdresult[,"est"] power <- aggresult[,"sig"] tableresult <- data.frame(aggresult[,c(1, 2)], biasest, relbiasest, biasse, relbiasse, power) tableresult # Let's plot a graph coord <- tableresult[tableresult[,"b"] == 0.2, c("n", "power")] plot(coord, type="l", xlab="Sample Size", ylab="Power", main = "My Simulation") plot(c(0,0), type = "n", xlim=c(30, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE) axis(1, at=c(50, 100, 150, 200)) axis(2, at=seq(0, 1, 0.1)) box() points(coord, pch = 20) lines(coord, lty = 1) plot(c(0,0), type = "n", xlim=c(40, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE) axis(1, at=c(50, 100, 150, 200)) axis(2, at=seq(0, 1, 0.1)) box() points(tableresult[tableresult[,"b"] == 0, c("n", "power")], pch = 20, col = "black") lines(tableresult[tableresult[,"b"] == 0, c("n", "power")], lty = 1, col = "black") points(tableresult[tableresult[,"b"] == 0.1, c("n", "power")], pch = 19, col = "red") lines(tableresult[tableresult[,"b"] == 0.1, c("n", "power")], lty = 2, col = "red") points(tableresult[tableresult[,"b"] == 0.2, c("n", "power")], pch = 18, col = "darkgreen") lines(tableresult[tableresult[,"b"] == 0.2, c("n", "power")], lty = 3, col = "darkgreen") points(tableresult[tableresult[,"b"] == 0.3, c("n", "power")], pch = 17, col = "orange") lines(tableresult[tableresult[,"b"] == 0.3, c("n", "power")], lty = 4, col = "orange") points(tableresult[tableresult[,"b"] == 0.4, c("n", "power")], pch = 16, col = "purple") lines(tableresult[tableresult[,"b"] == 0.4, c("n", "power")], lty = 5, col = "purple") points(tableresult[tableresult[,"b"] == 0.5, c("n", "power")], pch = 15, col = "grey80") lines(tableresult[tableresult[,"b"] == 0.5, c("n", "power")], lty = 6, col = "grey80") abline(h = 0.8, lty = 2) legend(x = 150, y = 0.65, legend = c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5"), pch = c(20, 19, 18, 17, 16, 15), lty = 1:6, col = c("black", "red", "darkgreen", "orange", "purple", "grey80")) # Google lty R, pch R, color R # Create function convertCtoF <- function(temp) { f <- temp * (9/5) + 32 return(f) } convertCtoF(25) relbias <- function(v1, v2) { result <- (v1 - v2) / v2 return(result) } relbias(102, 100) relbias(aggresult[,3], aggresult[,2]) p <- 0.3 q <- 0.3 - 0.1 + 0.1 p == q a <- 0.5 - 0.3 b <- 0.3 - 0.1 a == b x <- seq(0, 0.5, 0.1)[4] y <- 0.3 x == y equal <- function(a, b) { abs(a - b) < 0.000001 } equal(p, q) equal(a, b) equal(x, y) # Let's do some function here on points and lines putLine <- function(mat, lty, pch, col) { points(mat, pch = pch, col = col) lines(mat, lty = lty, col = col) } legendtxt <- c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5") lty <- 1:6 pch <- c(20, 19, 18, 17, 16, 15) col <- c("black", "red", "darkgreen", "orange", "purple", "grey80") plot(c(0,0), type = "n", xlim=c(40, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE) axis(1, at=c(50, 100, 150, 200)) axis(2, at=seq(0, 1, 0.1)) box() putLine(tableresult[equal(tableresult[,"b"], 0), c("n", "power")], lty = lty[1], pch = pch[1], col = col[1]) putLine(tableresult[equal(tableresult[,"b"], 0.1), c("n", "power")], lty = lty[2], pch = pch[2], col = col[2]) putLine(tableresult[equal(tableresult[,"b"], 0.2), c("n", "power")], lty = lty[3], pch = pch[3], col = col[3]) putLine(tableresult[equal(tableresult[,"b"], 0.3), c("n", "power")], lty = lty[4], pch = pch[4], col = col[4]) putLine(tableresult[equal(tableresult[,"b"], 0.4), c("n", "power")], lty = lty[5], pch = pch[5], col = col[5]) putLine(tableresult[equal(tableresult[,"b"], 0.5), c("n", "power")], lty = lty[6], pch = pch[6], col = col[6]) abline(h = 0.8, lty = 2) legend(x = 150, y = 0.65, legend = legendtxt, pch = pch, lty = lty, col = col) plotResult <- function(mat, targetcol, aty, ylab, poslegend, hline) { lty <- 1:6 pch <- c(20, 19, 18, 17, 16, 15) col <- c("black", "red", "darkgreen", "orange", "purple", "grey80") legendtxt <- c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5") plot(c(0,0), type = "n", xlim=c(40, 200), ylim=range(aty), xlab="Sample Size", ylab=ylab, main = "My Simulation", axes = FALSE) axis(1, at=c(50, 100, 150, 200)) axis(2, at=aty) box() putLine(mat[equal(mat[,"b"], 0), c("n", targetcol)], lty = lty[1], pch = pch[1], col = col[1]) putLine(mat[equal(mat[,"b"], 0.1), c("n", targetcol)], lty = lty[2], pch = pch[2], col = col[2]) putLine(mat[equal(mat[,"b"], 0.2), c("n", targetcol)], lty = lty[3], pch = pch[3], col = col[3]) putLine(mat[equal(mat[,"b"], 0.3), c("n", targetcol)], lty = lty[4], pch = pch[4], col = col[4]) putLine(mat[equal(mat[,"b"], 0.4), c("n", targetcol)], lty = lty[5], pch = pch[5], col = col[5]) putLine(mat[equal(mat[,"b"], 0.5), c("n", targetcol)], lty = lty[6], pch = pch[6], col = col[6]) abline(h = hline, lty = 2) legend(x = poslegend[1], y = poslegend[2], legend = legendtxt, pch = pch, lty = lty, col = col) } plotResult(tableresult, "biasest", seq(-0.01, 0.01, 0.002), ylab = "Bias", poslegend = c(150, 0), hline = 0) plotResult(tableresult, "relbiasest", seq(-0.03, 0.03, 0.01), ylab = "Relative Bias", poslegend = c(150, 0), hline = 0) range(setdiff(tableresult[,"relbiasse"], c(Inf, -Inf))) plotResult(tableresult, "biasse", seq(-0.004, 0.004, 0.002), ylab = "SE Bias", poslegend = c(150, 0), hline = 0) plotResult(tableresult, "relbiasse", seq(-0.05, 0.05, 0.02), ylab = "Relative SE Bias", poslegend = c(150, 0), hline = 0) plotResult(tableresult, "power", seq(0, 1, 0.2), ylab = "Power", poslegend = c(150, 0.7), hline = 0.8) par(mfrow=c(3,1)) plotResult(tableresult, "relbiasest", seq(-0.03, 0.03, 0.01), ylab = "Relative Bias", poslegend = c(150, 0), hline = 0) plotResult(tableresult, "relbiasse", seq(-0.05, 0.05, 0.02), ylab = "Relative SE Bias", poslegend = c(150, 0), hline = 0) plotResult(tableresult, "power", seq(0, 1, 0.2), ylab = "Power", poslegend = c(150, 0.7), hline = 0.8) dev.off() # Exercise n <- 100 b0 <- 0 b1 <- 1 x <- rnorm(n, 0, 1) fy <- b0 + b1*x p <- exp(fy)/(1 + exp(fy)) y <- rep(NA, n) for(w in 1:n) y[w] <- rbinom(1, 1, p[w]) dat <- data.frame(x = x, y = y) out <- glm(y ~ x, data = dat, family = binomial(link = "logit")) sumout <- summary(out) sumout[["coefficients"]][2, c(1, 2, 4)]