# Linear Change Plot

T <- 0:4
Y1 <- 20 + 5*T
Y2 <- 10 + 3*T
Y3 <- 40 - 2*T
plot(0, 0, type="n", xlim=c(0,4), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")

# Quadratic Change Plot

T <- seq(0, 4, 0.01)
Y1 <- 20 + 5*T - 2*T*T
Y2 <- 10 + 3*T + 1.5*T*T
Y3 <- 40 - 2*T + 0.5*T*T
plot(0, 0, type="n", xlim=c(0,4), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")

# Cubic Change Plot

T <- seq(0, 4, 0.01)
Y1 <- 20 + 5*T - 2*T*T + 0.5*T*T*T
Y2 <- 10 + 0*T + 3*T*T - 0.5*T^3
Y3 <- 30 + 2*T + 3*T*T - 1*T^3
plot(0, 0, type="n", xlim=c(0,4), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")

# Linear Spline Change Plot

T <- 0:4
T1 <- c(0, 1, 2, 2, 2)
T2 <- c(0, 0, 0, 1, 2)
Y1 <- 20 + 5*T1 - 2*T2
Y2 <- 10 + 3*T1 + 2*T2
Y3 <- 40 - 2*T1 - 2*T2
plot(0, 0, type="n", xlim=c(0,4), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")


# Quadratic Spline Change Plot

T <- seq(0, 4, 0.01)
T1 <- T
T1[T1 > 2] <- 2
T2 <- T - T1
T <- 0:4
T1 <- c(0, 1, 2, 2, 2)
T2 <- c(0, 0, 0, 1, 2)
Y1 <- 20 + 5*T1 - 2*T1^2 - 3*T2 -4*T2^2
Y2 <- 10 + 3*T1 + 1.5*T1^2 + 7*T2 - 2*T2^2
Y3 <- 40 - 4*T1 + 0.5*T1^2 + 0.5*T2^2
plot(0, 0, type="n", xlim=c(0,4), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")

# Quadratic Spline Change Divided in Three Phases

T <- seq(0, 8, 0.01)
T1 <- T
T1[T1 > 3] <- 3
T2 <- T - T1
T2[T2 > 3] <- 3
T3 <- T - T1 - T2

T <- 0:8
T1 <- c(0, 1, 2, 3, 3, 3, 3, 3, 3)
T2 <- c(0, 0, 0, 0, 1, 2, 3, 3, 3)
T3 <- c(0, 0, 0, 0, 0, 0, 0, 1, 2)
Y1 <- 20 + 5*T1 - 2*T1^2 - 3*T2 + 1.5*T2^2 + 4*T3 + 2*T3^2
Y2 <- 10 + 3*T1 + 1.5*T1^2 + 7*T2 - 2*T2^2 - 3*T3 + 0.5*T3^2
Y3 <- 40 - 4*T1 + 0.5*T1^2 + 0.5*T2^2 - 0.5*T3
plot(0, 0, type="n", xlim=c(0,8), ylim=c(0, 50), xlab="T", ylab="Y")
lines(T, Y1, col="red")
lines(T, Y2, col="green")
lines(T, Y3, col="blue")

#########################################################

# Spline Model Analysis Example

dat1 <-read.table("lecture9ex1.csv", sep=",", header=TRUE)
psych::describe(dat1)

dat1$timec <- dat1$time - 1
dat1$timebase <- dat1$timec
dat1[dat1$timebase > 4, "timebase"] <- 4
dat1$timetreat <- dat1$timec - dat1$timebase
dat1[dat1$timetreat > 6, "timetreat"] <- 6
dat1$timefollow <- dat1$timec - dat1$timebase - dat1$timetreat

library(lme4)
out1a <- lmer(stress ~ 1 + (1|pid), data=dat1, REML=FALSE)
summary(out1a)
out1b <- lmer(stress ~ 1 + timec + (1|pid), data=dat1, REML=FALSE)
summary(out1b)
out1c <- lmer(stress ~ 1 + timebase + timetreat 
              + timefollow + (1|pid), data=dat1, REML=FALSE)
summary(out1c)
anova(out1b, out1c)

datl <- split(dat1, dat1$pid)
stresspred <- lapply(datl, function(x) { predict(lm(stress ~ timebase + timetreat + timefollow, data=x)) })
dat1$stress2 <- do.call(c, stresspred)
dat1_2 <- dat1[dat1$pid%%20 == 0,]
dat1_2$treat <- factor(dat1_2$treat, labels=c("Control", "Treatment"))
library(ggplot2)
ggplot(data=dat1_2, aes(x=time, y=stress, group=pid, colour=treat)) + geom_line()

ggplot(data=dat1_2, aes(x=time, y=stress2, group=pid, colour=treat)) + geom_line()

psi <- coef(summary(out1c))
dat1g1 <- dat1[dat1$pid == 1,]
yhat <- psi[1,1] + psi[2,1]*dat1g1$timebase + psi[3,1]*dat1g1$timetreat + psi[4,1]*dat1g1$timefollow
plot(dat1g1$time, yhat, type="l", xlab="time", ylab="stress")


out1d <- lmer(stress ~ 1 + timebase + timetreat 
              + timefollow + (1 + timebase|pid), 
              data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead"))
anova(out1c, out1d)

out1e <- lmer(stress ~ 1 + timebase + timetreat 
              + timefollow + (1 + timetreat|pid), 
              data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead"))
anova(out1c, out1e)

out1f <- lmer(stress ~ 1 + timebase + timetreat 
              + timefollow + (1 + timefollow|pid), 
              data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead"))
anova(out1c, out1f)

out1g <- lmer(stress ~ 1 + timetreat + timefollow 
              + (1 + timetreat+ timefollow|pid), 
              data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead"))
summary(out1g)

tempsd <- c(0.5799, 1.1468, 1.3455)
tempr <- matrix(c(1, 0.2, -0.67,
           0.2, 1, -0.31,
           -0.67, -0.31, 1), 3, 3)
tempcov <- diag(tempsd) %*% tempr %*% diag(tempsd)
temp <- MASS::mvrnorm(1000, c(52.41, -0.88, 0.587), tempcov)

plot(temp[,1], temp[,2], xlab="Baseline Stress", ylab="Stress Change in Treatment Period")
plot(temp[,1], temp[,3], xlab="Baseline Stress", ylab="Stress Change in Follow-up Period")
plot(temp[,2], temp[,3], xlab="Stress Change in Treatment Period", ylab="Stress Change in Follow-up Period")


dat1$sleephrs <- dat1$sleep / 60
dat1$avesleep <- ave(dat1$sleephrs, dat1$pid)
dat1$diffsleep <- dat1$sleephrs - dat1$avesleep

dat1a <- dat1[!duplicated(dat1$pid),]
round(apply(dat1a, 2, mean), 3)

out1h <- lmer(stress ~ 1 + timetreat + timefollow + diffsleep 
              + I(avesleep - 7.075) + I(treat - 0.5) + I(age - 36.638)
              + (1 + timetreat+ timefollow|pid), data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead"))
summary(out1h)

out1i <- lmer(stress ~ 1 + timetreat + timefollow + diffsleep 
              + timetreat*I(avesleep - 7.075) + timetreat*I(treat - 0.5) 
              + timetreat*I(age - 36.638) + timefollow*I(avesleep - 7.075) 
              + timefollow*I(treat - 0.5) + timefollow*I(age - 36.638)
              + (1 + timetreat+ timefollow|pid), data=dat1, REML=FALSE,
              control = lmerControl(optimizer ="Nelder_Mead", calc.derivs = FALSE))
summary(out1i)

# Simple slope tests in each phase for treatment and control groups

psi <- coef(summary(out1i))
ytreat <- 52.499 - 1.812*dat1g1$timetreat + 1.185*dat1g1$timefollow
yctrl <- 52.389 + 0.052*dat1g1$timetreat - 0.005*dat1g1$timefollow

plot(dat1g1$time, ytreat, type="n", xlab="time", ylab="stress", ylim=c(40,55))
lines(dat1g1$time, ytreat, col="red")
lines(dat1g1$time, yctrl, col="green")
legend(1, 43, c("Treatment", "Control"), lty=1, col=c("red", "green"))

sumout1i <- summary(out1i)
coef(sumout1i)

psi <- coef(sumout1i)[,1]
vpsi <- vcov(sumout1i)
cvec1 <- c(1, 0, 0, 0, 0, 0.5, 0, 0, 0, 0, 0, 0, 0) 
cvec2 <- c(1, 0, 0, 0, 0, -0.5, 0, 0, 0, 0, 0, 0, 0)   
cvec3 <- c(0, 1, 0, 0, 0, 0, 0, 0, 0.5, 0, 0, 0, 0) 
cvec4 <- c(0, 1, 0, 0, 0, 0, 0, 0, -0.5, 0, 0, 0, 0) 
cvec5 <- c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0.5, 0) 
cvec6 <- c(0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, -0.5, 0)   
cmat <- cbind(cvec1, cvec2, cvec3, cvec4, cvec5, cvec6)
est <- t(cmat) %*% psi
se <- sqrt(diag(t(cmat) %*% vpsi %*% cmat))
zval <- est/se
pval <- round(pnorm(-abs(zval)) * 2, 6)
cbind(est, se, zval, pval)

cvec1 <- c(0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0) 
cvec2 <- c(0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0)   
cvec3 <- c(0, 0, 0, 0, 0, 1, 0, 0, 2, 0, 0, 0, 0)  
cvec4 <- c(0, 0, 0, 0, 0, 1, 0, 0, 3, 0, 0, 0, 0)  
cvec5 <- c(0, 0, 0, 0, 0, 1, 0, 0, 4, 0, 0, 0, 0)  
cvec6 <- c(0, 0, 0, 0, 0, 1, 0, 0, 5, 0, 0, 0, 0) 
cvec7 <- c(0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 0, 0) 
cvec8 <- c(0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 1, 0) 
cvec9 <- c(0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 2, 0) 
cvec10 <- c(0, 0, 0, 0, 0, 1, 0, 0, 6, 0, 0, 3, 0) 
cmat <- cbind(cvec1, cvec2, cvec3, cvec4, cvec5, cvec6, cvec7, cvec8, cvec9, cvec10)
est <- t(cmat) %*% psi
se <- sqrt(diag(t(cmat) %*% vpsi %*% cmat))
zval <- est/se
pval <- round(pnorm(-abs(zval)) * 2, 6)
cbind(est, se, zval, pval)

# Probing interaction between time and tenure0

plot(dat1$time, dat1$jsat, type="n", xlab="Year of Measurement", ylab="Job Satisfaction")
abline(a = 52.06, b = -2.89, col="red")
abline(a = 49.34, b = -0.81, col="green")
abline(a = 46.62, b = 1.27, col="blue")
legend(0, 90, c(0, 2, 4), col=c("red", "green", "blue"), 
       lty=1, title="Tenure at Year 0")

plot(dat1$tenure0, dat1$jsat, type="n", xlab="Tenure at Year 0", ylab="Job Satisfaction")
abline(a = 49.27 - (2.054*-1.36), b = -1.36, col="red")
abline(a = 48.52 - (2.054*-0.32), b = -0.32, col="green")
abline(a = 47.77 - (2.054*0.72), b = 0.72, col="blue")
abline(a = 47.02 - (2.054*1.76), b = 1.76, col="black")
legend(0, 90, c("0", "1", "2", "3"), col=c("red", "green", "blue", "black"), 
       lty=1, title="Year of Measurement")

cvec1 <- c(1, 0, 0, 0, -2.054, 0, 0, 0, 0, 0, 0, 0) 
cvec2 <- c(1, 0, 0, 0, -0.054, 0, 0, 0, 0, 0, 0, 0)  
cvec3 <- c(1, 0, 0, 0, 1.946, 0, 0, 0, 0, 0, 0, 0) 
cvec4 <- c(0, 1, 0, 0, 0, 0, 0, 0, 0, -2.054, 0, 0) 
cvec5 <- c(0, 1, 0, 0, 0, 0, 0, 0, 0, -0.054, 0, 0) 
cvec6 <- c(0, 1, 0, 0, 0, 0, 0, 0, 0, 1.946, 0, 0)  
cmat <- cbind(cvec1, cvec2, cvec3, cvec4, cvec5, cvec6)
est <- t(cmat) %*% psi
se <- sqrt(diag(t(cmat) %*% vpsi %*% cmat))
zval <- est/se
pval <- round(pnorm(-abs(zval)) * 2, 6)
cbind(est, se, zval, pval)

cvec1 <- c(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 
cvec2 <- c(1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)  
cvec3 <- c(1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 
cvec4 <- c(1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0) 
cvec5 <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0) 
cvec6 <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0)  
cvec7 <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 2, 0, 0)  
cvec8 <- c(0, 0, 0, 0, 1, 0, 0, 0, 0, 3, 0, 0)  
cmat <- cbind(cvec1, cvec2, cvec3, cvec4, cvec5, cvec6, cvec7, cvec8)
est <- t(cmat) %*% psi
se <- sqrt(diag(t(cmat) %*% vpsi %*% cmat))
zval <- est/se
pval <- round(pnorm(-abs(zval)) * 2, 6)
cbind(est, se, zval, pval)


###################################

# Heteroscedastic Models

# Change in error variances

x <- 1:4
ressd <- 1.5
xx <- seq(1, 4, 0.001)
fixed <- rep(1, length(xx))
pow <- ressd * abs(xx)^(0.8)
pown <- ressd * abs(xx)^(-0.8)
powc <- ressd * (2 + abs(xx)^(0.8))
powcn <- ressd * (2 + abs(xx)^(-0.8))
exp <- ressd * exp(0.4 * xx)
expn <- ressd * exp(-0.4 * xx)

plot(xx, pown, type="n", xlim=c(1,4), ylim=c(0,10), xlab="Time", ylab="Residual Standard Deviation", xaxt="n")
axis(side=1, at=1:4)
lines(xx, fixed, col="black", lty=1)
lines(xx, pow, col="blue", lty=1)
lines(xx, pown, col="blue", lty=2)
lines(xx, powc, col="red", lty=1)
lines(xx, powcn, col="red", lty=2)
lines(xx, exp, col="purple", lty=1)
lines(xx, expn, col="purple", lty=2)

legend(1, 10, c("Fixed", "Power: 0.8", "Power: -0.8", "Constant Power: 2, 0.8", "Constant Power: 2, -0.8", "Exponential: 0.4", "Exponential: -0.4"),
       col=c("black", "blue", "blue", "red", "red", "purple", "purple"),
       lty=c(1, 1, 2, 1, 2, 1, 2))

# Power function SD change plot

set.seed(4)
n <- 1000
x <- seq(1,4,length.out = n)
y <- rnorm(n, sd = 1.5*abs(x)^0.8)
sdy <- 1.5 * (1:4)^0.8
plot(x,y, pch=16, cex=0.5, ylab="e")
segments(x0=1, y0=-sdy[1], x1=1, y1=sdy[1], col="red", lwd=2)
segments(x0=2, y0=-sdy[2], x1=2, y1=sdy[2], col="red", lwd=2)
segments(x0=3, y0=-sdy[3], x1=3, y1=sdy[3], col="red", lwd=2)
segments(x0=4, y0=-sdy[4], x1=4, y1=sdy[4], col="red", lwd=2)

### Analysis Examples

# Equal Variance

dat5 <-read.table("lecture8ex2.csv", sep=",", header=TRUE)
dat5$timec <- dat5$time - 1
dat5$avestress <- ave(dat5$stress, dat5$pid)
dat5$diffstress <- dat5$stress - dat5$avestress
library(lme4)
out5 <- lmer(mem ~ 1 + timec + diffstress + avestress + (1 + timec|pid), data=dat5, REML=FALSE)
summary(out5)

library(nlme)
out5a <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit)
summary(out5a)

# Unequal error variances in each timepoint

out5b <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|timec))
summary(out5b)
anova(out5a, out5b)

out5bsigma <- as.numeric(VarCorr(out5b)[3, 2])
out5bsigmatime <- out5bsigma * coef(out5b$modelStruct$varStruct, uncons = FALSE)
c(out5bsigma, out5bsigmatime)

# Exponential change of error variances

out5c <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varExp(form = ~time))
summary(out5c)
anova(out5a, out5c)
out5csigma <- as.numeric(VarCorr(out5c)[3, 2])
expvalue <- coef(out5c$modelStruct$varStruct, uncons = FALSE)
out5csigma * exp(expvalue * (1:4))

# Power function change of error variances

out5d <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varPower(form = ~time))
summary(out5d)
anova(out5a, out5d)
out5dsigma <- as.numeric(VarCorr(out5d)[3, 2])
powervalue <- coef(out5d$modelStruct$varStruct, uncons = FALSE)
out5dsigma * (1:4)^powervalue

# Constant power function change of error variances

out5e <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varConstPower(form = ~time),
             control = lmeControl(maxIter=200, msMaxIter = 200))
summary(out5e)
anova(out5a, out5e)
out5esigma <- as.numeric(VarCorr(out5e)[3, 2])
paramvar <- coef(out5e$modelStruct$varStruct, uncons = FALSE)
out5esigma * (paramvar[1] + (1:4)^paramvar[2])

anova(out5e, out5b)

# Heteroscedastic model for interview data

dat6 <-read.table("lecture4ex2.csv", sep=",", header=TRUE)
out6a <- lme(score ~ 1 + ersex + eesex + eesex:ersex + I(iq - 100), 
             random = ~1 + eesex|erid, 
             data=dat6, method="ML", na.action=na.omit)
summary(out6a)

out6b <- lme(score ~ 1 + ersex + eesex + eesex:ersex + I(iq - 100), 
             random = ~1 + eesex|erid, 
             data=dat6, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|eesex))
summary(out6b)
anova(out6a, out6b)

out6c <- lme(score ~ 1 + ersex + eesex + eesex:ersex + I(iq - 100), 
             random = ~1 + eesex|erid, 
             data=dat6, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|ersex))
summary(out6c)
anova(out6a, out6c)

out6d <- lme(score ~ 1 + ersex + eesex + eesex:ersex + I(iq - 100), 
             random = ~1 + eesex|erid, 
             data=dat6, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|eesex * ersex))
summary(out6d)
anova(out6a, out6d)

#####################################################################

### Autocorrelation

# AR(1) Graph

set.seed(123321)
n <- 10
time <- 6
start <- rnorm(n, 0, 1)
rho0 <- matrix(NA, n, time)
rho3 <- matrix(NA, n, time)
rho5 <- matrix(NA, n, time)
rho7 <- matrix(NA, n, time)
rho9 <- matrix(NA, n, time)
rho3n <- matrix(NA, n, time)
rho5n <- matrix(NA, n, time)
rho7n <- matrix(NA, n, time)
rho9n <- matrix(NA, n, time)
rho0[,1] <- start
rho3[,1] <- start
rho5[,1] <- start
rho7[,1] <- start
rho9[,1] <- start
rho3n[,1] <- start
rho5n[,1] <- start
rho7n[,1] <- start
rho9n[,1] <- start

for(i in 1:(time-1)) {
        rho0[,i+1] <- 0*rho0[,i] + rnorm(n, 0, sqrt(1 - 0))
        rho3[,i+1] <- 0.3*rho3[,i] + rnorm(n, 0, sqrt(1 - 0.3^2))
        rho5[,i+1] <- 0.5*rho5[,i] + rnorm(n, 0, sqrt(1 - 0.5^2))
        rho7[,i+1] <- 0.7*rho7[,i] + rnorm(n, 0, sqrt(1 - 0.7^2))
        rho9[,i+1] <- 0.9*rho9[,i] + rnorm(n, 0, sqrt(1 - 0.9^2))
        rho3n[,i+1] <- -0.3*rho3n[,i] + rnorm(n, 0, sqrt(1 - 0.3^2))
        rho5n[,i+1] <- -0.5*rho5n[,i] + rnorm(n, 0, sqrt(1 - 0.5^2))
        rho7n[,i+1] <- -0.7*rho7n[,i] + rnorm(n, 0, sqrt(1 - 0.7^2))
        rho9n[,i+1] <- -0.9*rho9n[,i] + rnorm(n, 0, sqrt(1 - 0.9^2))
}

rho0 <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho0)))
rho3 <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho3)))
rho5 <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho5)))
rho7 <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho7)))
rho9 <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho9)))
rho3n <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho3n)))
rho5n <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho5n)))
rho7n <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho7n)))
rho9n <- data.frame(id=rep(1:n, each=time), t=rep(1:time, n), y=as.vector(t(rho9n)))


library(ggplot2)
ggplot(data=rho0, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=0", x = 5, y = 2)
ggplot(data=rho3, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=.3", x = 5, y = 2)
ggplot(data=rho5, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=.5", x = 5, y = 2)
ggplot(data=rho7, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=.7", x = 5, y = 2)
ggplot(data=rho9, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=.9", x = 5, y = 2)
ggplot(data=rho3n, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=-.3", x = 5, y = 2)
ggplot(data=rho5n, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=-.5", x = 5, y = 2)
ggplot(data=rho7n, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=-.7", x = 5, y = 2)
ggplot(data=rho9n, aes(x=t, y=y, group=id)) + geom_line(aes(color=factor(id))) + annotate("text", label = "AR(1)=-.9", x = 5, y = 2)

# AR(2) Graph

set.seed(123321)
x <- rep(NA, 100)
x[1] <- 1
x[2] <- -0.5
time <- 100
for(i in 3:time) {
        x[i] <- 0.4*x[i-1] - 0.1*x[i-2] + rnorm(1, 0, sqrt(0.5))
}
plot(x, type="l", xlab="Time", ylab="e")


#### AR Example

dat5 <-read.table("lecture8ex2.csv", sep=",", header=TRUE)
dat5$timec <- dat5$time - 1
dat5$avestress <- ave(dat5$stress, dat5$pid)
dat5$diffstress <- dat5$stress - dat5$avestress
library(nlme)

out5a <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit)
summary(out5a)

out5e <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             correlation=corARMA(form = ~ 1 + timec | pid, p = 1))
summary(out5e)
anova(out5a, out5e)

set.seed(123321)
ar1 <- coef(out5e$modelStruct$corStruct, uncons = FALSE)
out5esigma <- as.numeric(VarCorr(out5e)["Residual", "StdDev"])
e1 <- rnorm(20, 0, out5esigma/sqrt(1 - ar1^2)) 
e2 <- ar1*e1 + rnorm(20, 0, out5esigma)
e3 <- ar1*e2 + rnorm(20, 0, out5esigma)
e4 <- ar1*e3 + rnorm(20, 0, out5esigma)

mye <- c(e1, e2, e3, e4)
myid <- rep(1:20, 4)
myt <- rep(1:4, each=20)
daterror5 <- data.frame(t=myt, y=mye, id=myid)
library(ggplot2)
g <- ggplot(data=daterror5, aes(x=t, y=y, group=id)) 
g + geom_line(aes(color=factor(id)), size=1)



out5f <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             correlation=corARMA(form = ~ 1 + timec | pid, p = 2))
summary(out5f)
anova(out5e, out5f)

set.seed(123321)
ar <- coef(out5f$modelStruct$corStruct, uncons = FALSE)
out5fsigma <- as.numeric(VarCorr(out5f)["Residual", "StdDev"])
out5fsigma <- out5fsigma/sqrt(1 - ar[1]^2 - ar[2]^2 - ((ar[1]^2)*ar[2]/(1 - ar[2])))
e1 <- rnorm(20, 0, out5fsigma)
e2 <- ar[1]*e1 + rnorm(20, 0, out5fsigma)
e3 <- ar[1]*e2 + ar[2]*e1 + rnorm(20, 0, out5fsigma)
e4 <- ar[1]*e3 + ar[2]*e2 + rnorm(20, 0, out5fsigma)

mye <- c(e1, e2, e3, e4)
myid <- rep(1:20, 4)
myt <- rep(1:4, each=20)
daterror6 <- data.frame(t=myt, y=mye, id=myid)
library(ggplot2)
g <- ggplot(data=daterror6, aes(x=t, y=y, group=id)) 
g + geom_line(aes(color=factor(id)), size=1)

out5g <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|timec),
             correlation=corARMA(form = ~ 1 + timec | pid, p = 1))
summary(out5g)
anova(out5e, out5g)

set.seed(123321)
ar1 <- coef(out5g$modelStruct$corStruct, uncons = FALSE)
sdratio <- coef(out5g$modelStruct$varStruct, uncons = FALSE)
out5gsigma <- as.numeric(VarCorr(out5g)["Residual", "StdDev"])
e1 <- rnorm(20, 0, out5gsigma/sqrt(1 - ar1^2))
e2 <- ar1*e1 + rnorm(20, 0, out5gsigma * sdratio[1])
e3 <- ar1*e2 + rnorm(20, 0, out5gsigma * sdratio[2])
e4 <- ar1*e3 + rnorm(20, 0, out5gsigma * sdratio[3])

mye <- c(e1, e2, e3, e4)
myid <- rep(1:20, 4)
myt <- rep(1:4, each=20)
daterror5 <- data.frame(t=myt, y=mye, id=myid)
library(ggplot2)
g <- ggplot(data=daterror5, aes(x=t, y=y, group=id)) 
g + geom_line(aes(color=factor(id)), size=1)

###################################################

### R-squared

# Homoscedastic and no autocorrelation

dat5 <-read.table("lecture8ex2.csv", sep=",", header=TRUE)
dat5$timec <- dat5$time - 1
dat5$avestress <- ave(dat5$stress, dat5$pid)
dat5$diffstress <- dat5$stress - dat5$avestress
library(nlme)
out5a <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit)
sumout5a <- summary(out5a)

r2mlm:::r2mlm_long_manual(data=dat5,
                          covs=c("timec", "diffstress", "avestress"),
                          random_covs=c("timec"),
                          gammas=coef(sumout5a)[-1, "Value"],
                          clusterID="pid",
                          Tau=getVarCov(out5a),
                          sigma2=out5a$sigma^2)

# Homoscedastic and AR(1)

out5e <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             correlation=corARMA(form = ~ 1 + timec | pid, p = 1))
sumout5e <- summary(out5e)

ar1 <- coef(out5e$modelStruct$corStruct, uncons = FALSE)

r2mlm:::r2mlm_long_manual(data=dat5,
                          covs=c("timec", "diffstress", "avestress"),
                          random_covs=c("timec"),
                          gammas=coef(sumout5e)[-1, "Value"],
                          clusterID="pid",
                          Tau=getVarCov(out5e),
                          sigma2=out5e$sigma^2/(1 - ar1^2))

# Heteroscedastic and no autocorrelation

out5b <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|timec))
sumout5b <- summary(out5b)

out5bsigma <- out5b$sigma
out5bsigmatime <- out5bsigma * coef(out5b$modelStruct$varStruct, uncons = FALSE)
out5berrvar <- c(out5bsigma, out5bsigmatime)^2
out5berrvar <- cbind(1:4, out5berrvar)

matchindex5 <- match(dat5$time, out5berrvar[,1])
dat5$vare <- out5berrvar[matchindex5, 2]
aveerrvar5b <- mean(dat5$vare)

r2mlm:::r2mlm_long_manual(data=dat5,
                          covs=c("timec", "diffstress", "avestress"),
                          random_covs=c("timec"),
                          gammas=coef(sumout5b)[-1, "Value"],
                          clusterID="pid",
                          Tau=getVarCov(out5b),
                          sigma2=aveerrvar5b)

# Heteroscedastic and AR(1)

out5g <- lme(mem ~ 1 + timec + diffstress + avestress, 
             random = ~1 + timec|pid, 
             data=dat5, method="ML", na.action=na.omit,
             weights=varIdent(form = ~1|timec),
             correlation=corARMA(form = ~ 1 + timec | pid, p = 1))
sumout5g <- summary(out5g)

ar1 <- coef(out5g$modelStruct$corStruct, uncons = FALSE)
sdratio <- coef(out5g$modelStruct$varStruct, uncons = FALSE)
out5gsigmatime <- out5g$sigma * sdratio
out5gerrvar <- c(out5gsigma, out5gsigmatime)^2

errvar1g <- out5gerrvar[1] / (1 - ar1^2)
errvar2g <- (ar1^2)*errvar1g + out5gerrvar[2]
errvar3g <- (ar1^2)*errvar2g + out5gerrvar[3]
errvar4g <- (ar1^2)*errvar3g + out5gerrvar[4]

out5gerrvar <- cbind(1:4, c(errvar1g, errvar2g, errvar3g, errvar4g))

matchindex5 <- match(dat5$time, out5gerrvar[,1])
dat5$varg <- out5gerrvar[matchindex5, 2]
aveerrvar5g <- mean(dat5$varg)

r2mlm:::r2mlm_long_manual(data=dat5,
                          covs=c("timec", "diffstress", "avestress"),
                          random_covs=c("timec"),
                          gammas=coef(sumout5g)[-1, "Value"],
                          clusterID="pid",
                          Tau=getVarCov(out5g),
                          sigma2=aveerrvar5g)