# R for simulation study

# Simple Regression example

lm(complaints ~ advance, data = attitude)
out <- lm(complaints ~ advance, data = attitude)
names(out)
out[["coefficients"]]
summary(out)
summaryout <- summary(out)
names(summaryout)
summary(out)[["coefficients"]]
pvalue <- summary(out)[["coefficients"]][2, 4]
est <- summary(out)[["coefficients"]][2, 1]
se <- summary(out)[["coefficients"]][2, 2]

# 

n <- 200
b <- 0.5
x <- rnorm(n, 0, 1)
e <- rnorm(n, 0, sqrt(1 - b^2))
y <- b*x + e
dat <- data.frame(x = x, y = y)
out <- lm(y ~ x, data = dat)
summary(out)[["coefficients"]][2, c(1, 2, 4)]

# Run multiple times

set.seed(123321)
nrep <- 1000
n <- 200
b <- 0.5
result <- matrix(NA, nrep, 3)
for(i in 1:nrep) {
	x <- rnorm(n, 0, 1)
	e <- rnorm(n, 0, sqrt(1 - b^2))
	y <- b*x + e
	dat <- data.frame(x = x, y = y)
	out <- lm(y ~ x, data = dat)
	result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)]
}

sig <- result[,3] < 0.05
result <- cbind(result, sig)
colnames(result) <- c("est", "se", "p", "sig")
colMeans(result)

mean(result[,1]) - b # bias in parameter estimates
(mean(result[,1]) - b) / b # relative bias in parameter estimates
mean(result[,2]) - sd(result[,1]) # bias in standard errors
(mean(result[,2]) - sd(result[,1])) / sd(result[,1]) # relative bias in standard errors
mean(result[,3] < 0.05)

# let's try to manipulate n: seq(50, 200, 50) and b: seq(0, 0.5, 0.1)

set.seed(123321)
nrep <- 1000
bs <- seq(0, 0.5, 0.1)
ns <- seq(50, 200, 50)
ovresult <- NULL
for(k in 1:length(bs)){
	b <- bs[k]
	for(j in 1:length(ns)) {
		n <- ns[j]
		result <- matrix(NA, nrep, 3)
		for(i in 1:nrep) {
			x <- rnorm(n, 0, 1)
			e <- rnorm(n, 0, sqrt(1 - b^2))
			y <- b*x + e
			dat <- data.frame(x = x, y = y)
			out <- lm(y ~ x, data = dat)
			result[i,] <- summary(out)[["coefficients"]][2, c(1, 2, 4)]
		}
		result <- cbind(b, n, result)
		ovresult <- rbind(ovresult, result)
	}
}
sig <- ovresult[,5] < 0.05
ovresult <- cbind(ovresult, sig)
colnames(ovresult) <- c("b", "n", "est", "se", "p", "sig")

aggresult <- aggregate(cbind(est, se, sig) ~ n + b, data = ovresult, FUN = mean) # average
aggsdresult <- aggregate(est ~ n + b, data = ovresult, FUN = sd) # sd
biasest <- aggresult[,"est"] - aggresult[,"b"]
relbiasest <- biasest / aggresult[,"b"]
biasse <- aggresult[,"se"] - aggsdresult[,"est"]
relbiasse <- biasse / aggsdresult[,"est"]
power <- aggresult[,"sig"]
tableresult <- data.frame(aggresult[,c(1, 2)], biasest, relbiasest, biasse, relbiasse, power)
tableresult

# Let's plot a graph

coord <- tableresult[tableresult[,"b"] == 0.2, c("n", "power")]
plot(coord, type="l", xlab="Sample Size", ylab="Power", main = "My Simulation")

plot(c(0,0), type = "n", xlim=c(30, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE)
axis(1, at=c(50, 100, 150, 200))
axis(2, at=seq(0, 1, 0.1))
box()
points(coord, pch = 20)
lines(coord, lty = 1)

plot(c(0,0), type = "n", xlim=c(40, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE)
axis(1, at=c(50, 100, 150, 200))
axis(2, at=seq(0, 1, 0.1))
box()
points(tableresult[tableresult[,"b"] == 0, c("n", "power")], pch = 20, col = "black")
lines(tableresult[tableresult[,"b"] == 0, c("n", "power")], lty = 1, col = "black")
points(tableresult[tableresult[,"b"] == 0.1, c("n", "power")], pch = 19, col = "red")
lines(tableresult[tableresult[,"b"] == 0.1, c("n", "power")], lty = 2, col = "red")
points(tableresult[tableresult[,"b"] == 0.2, c("n", "power")], pch = 18, col = "darkgreen")
lines(tableresult[tableresult[,"b"] == 0.2, c("n", "power")], lty = 3, col = "darkgreen")
points(tableresult[tableresult[,"b"] == 0.3, c("n", "power")], pch = 17, col = "orange")
lines(tableresult[tableresult[,"b"] == 0.3, c("n", "power")], lty = 4, col = "orange")
points(tableresult[tableresult[,"b"] == 0.4, c("n", "power")], pch = 16, col = "purple")
lines(tableresult[tableresult[,"b"] == 0.4, c("n", "power")], lty = 5, col = "purple")
points(tableresult[tableresult[,"b"] == 0.5, c("n", "power")], pch = 15, col = "grey80")
lines(tableresult[tableresult[,"b"] == 0.5, c("n", "power")], lty = 6, col = "grey80")
abline(h = 0.8, lty = 2)
legend(x = 150, y = 0.65, legend = c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5"), pch = c(20, 19, 18, 17, 16, 15), lty = 1:6, col = c("black", "red", "darkgreen", "orange", "purple", "grey80"))

# Google lty R, pch R, color R

# Create function

convertCtoF <- function(temp) {
	f <- temp * (9/5) + 32
	return(f)
}

convertCtoF(25)

relbias <- function(v1, v2) {
	result <- (v1 - v2) / v2
	return(result)
}
relbias(102, 100)
relbias(aggresult[,3], aggresult[,2])

p <- 0.3
q <- 0.3 - 0.1 + 0.1
p == q
a <- 0.5 - 0.3
b <- 0.3 - 0.1
a == b
x <- seq(0, 0.5, 0.1)[4]
y <- 0.3
x == y

equal <- function(a, b) {
	abs(a - b) < 0.000001
}
equal(p, q)
equal(a, b)
equal(x, y)

# Let's do some function here on points and lines

putLine <- function(mat, lty, pch, col) {
	points(mat, pch = pch, col = col)
	lines(mat, lty = lty, col = col)
}
legendtxt <- c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5")
lty <- 1:6
pch <- c(20, 19, 18, 17, 16, 15)
col <- c("black", "red", "darkgreen", "orange", "purple", "grey80")

plot(c(0,0), type = "n", xlim=c(40, 200), ylim=c(0, 1), xlab="Sample Size", ylab="Power", main = "My Simulation", axes = FALSE)
axis(1, at=c(50, 100, 150, 200))
axis(2, at=seq(0, 1, 0.1))
box()
putLine(tableresult[equal(tableresult[,"b"], 0), c("n", "power")], lty = lty[1], pch = pch[1], col = col[1])
putLine(tableresult[equal(tableresult[,"b"], 0.1), c("n", "power")], lty = lty[2], pch = pch[2], col = col[2])
putLine(tableresult[equal(tableresult[,"b"], 0.2), c("n", "power")], lty = lty[3], pch = pch[3], col = col[3])
putLine(tableresult[equal(tableresult[,"b"], 0.3), c("n", "power")], lty = lty[4], pch = pch[4], col = col[4])
putLine(tableresult[equal(tableresult[,"b"], 0.4), c("n", "power")], lty = lty[5], pch = pch[5], col = col[5])
putLine(tableresult[equal(tableresult[,"b"], 0.5), c("n", "power")], lty = lty[6], pch = pch[6], col = col[6])
abline(h = 0.8, lty = 2)
legend(x = 150, y = 0.65, legend = legendtxt, pch = pch, lty = lty, col = col)

plotResult <- function(mat, targetcol, aty, ylab, poslegend, hline) {
	lty <- 1:6
	pch <- c(20, 19, 18, 17, 16, 15)
	col <- c("black", "red", "darkgreen", "orange", "purple", "grey80")
	legendtxt <- c("b = 0", "b = 0.1", "b = 0.2", "b = 0.3", "b = 0.4", "b = 0.5")
	plot(c(0,0), type = "n", xlim=c(40, 200), ylim=range(aty), xlab="Sample Size", ylab=ylab, 
		main = "My Simulation", axes = FALSE)
	axis(1, at=c(50, 100, 150, 200))
	axis(2, at=aty)
	box()
	putLine(mat[equal(mat[,"b"], 0), c("n", targetcol)], lty = lty[1], pch = pch[1], col = col[1])
	putLine(mat[equal(mat[,"b"], 0.1), c("n", targetcol)], lty = lty[2], pch = pch[2], col = col[2])
	putLine(mat[equal(mat[,"b"], 0.2), c("n", targetcol)], lty = lty[3], pch = pch[3], col = col[3])
	putLine(mat[equal(mat[,"b"], 0.3), c("n", targetcol)], lty = lty[4], pch = pch[4], col = col[4])
	putLine(mat[equal(mat[,"b"], 0.4), c("n", targetcol)], lty = lty[5], pch = pch[5], col = col[5])
	putLine(mat[equal(mat[,"b"], 0.5), c("n", targetcol)], lty = lty[6], pch = pch[6], col = col[6])
	abline(h = hline, lty = 2)
	legend(x = poslegend[1], y = poslegend[2], legend = legendtxt, pch = pch, lty = lty, col = col)
}

plotResult(tableresult, "biasest", seq(-0.01, 0.01, 0.002), ylab = "Bias", poslegend = c(150, 0), hline = 0)
plotResult(tableresult, "relbiasest", seq(-0.03, 0.03, 0.01), ylab = "Relative Bias", poslegend = c(150, 0), hline = 0)
range(setdiff(tableresult[,"relbiasse"], c(Inf, -Inf)))

plotResult(tableresult, "biasse", seq(-0.004, 0.004, 0.002), ylab = "SE Bias", poslegend = c(150, 0), hline = 0)
plotResult(tableresult, "relbiasse", seq(-0.05, 0.05, 0.02), ylab = "Relative SE Bias", poslegend = c(150, 0), hline = 0)
plotResult(tableresult, "power", seq(0, 1, 0.2), ylab = "Power", poslegend = c(150, 0.7), hline = 0.8)

par(mfrow=c(3,1))
plotResult(tableresult, "relbiasest", seq(-0.03, 0.03, 0.01), ylab = "Relative Bias", poslegend = c(150, 0), hline = 0)
plotResult(tableresult, "relbiasse", seq(-0.05, 0.05, 0.02), ylab = "Relative SE Bias", poslegend = c(150, 0), hline = 0)
plotResult(tableresult, "power", seq(0, 1, 0.2), ylab = "Power", poslegend = c(150, 0.7), hline = 0.8)
dev.off()

# Exercise

n <- 100
b0 <- 0
b1 <- 1
x <- rnorm(n, 0, 1)
fy <- b0 + b1*x
p <- exp(fy)/(1 + exp(fy))
y <- rep(NA, n)
for(w in 1:n) y[w] <- rbinom(1, 1, p[w])
dat <- data.frame(x = x, y = y)
out <- glm(y ~ x, data = dat, family = binomial(link = "logit"))
sumout <- summary(out)
sumout[["coefficients"]][2, c(1, 2, 4)]