#################################################################### ### This code is accompanied with the R for multilevel models document ### Sunthud Pornprasertmanit ### 2/13/13 #################################################################### ### R Basics # Packages #install.packages(c("lme4", "nlme")) library(lme4) dat <- read.table("mlbook2_r.dat", header = TRUE) # write.table(dat, file = "mlbook2_r.csv", sep = ",", row.names=FALSE) # dat2 <- read.csv("mlbook2_r.csv", header=TRUE) # dat <- read.table(file.choose(), header = TRUE) dat head(dat) tail(dat) # Descriptive Statistics dat$ses dat[ , "ses"] mean(dat$ses) sd(dat$ses) min(dat$ses) max(dat$ses) var(dat$ses) apply(dat, 2, mean) apply(dat, 2, sd) # Multiple Regression out <- lm(langPOST ~ ses + IQ_verb, data = dat) m6 <- airquality$Month == 6 m7 <- airquality$Month == 7 m8 <- airquality$Month == 8 m9 <- airquality$Month == 9 airquality2 <- data.frame(airquality, m6, m7, m8, m9) tail(airquality2) out2 <- lm(Ozone ~ Temp + m6 + m7 + m8 + m9, data = airquality2) summary(out2) airquality$Month <- factor(airquality$Month, labels=c("May", "Jun", "Jul", "Aug", "Sep")) out3 <- lm(Ozone ~ Temp + Month, data = airquality) summary(out3) #################################################################### ### Multilevel Models # Model 0: Null Model library(lme4) m0 <- lmer(langPOST ~ 1 + (1|schoolnr), data = dat, REML=FALSE) summary(m0) out0 <- summary(m0) coef0 <- coef(out0) tvalue0 <- coef0[,"t value"] pnorm(abs(tvalue0), lower.tail=FALSE) * 2 ranef0 <- out0@REmat ranef0 tau00 <- as.numeric(ranef0[1, 3]) sigma2 <- as.numeric(ranef0[2, 3]) icc <- tau00/(tau00 + sigma2) # Model 1: ANCOVA Model m1 <- lmer(langPOST ~ 1 + IQ_verb + (1|schoolnr), data = dat, REML=FALSE) summary(m1) out1 <- summary(m1) coef1 <- coef(out1) tvalue1 <- coef1[,"t value"] pnorm(abs(tvalue1), lower.tail=FALSE) * 2 ranef1 <- out1@REmat tau00_1 <- as.numeric(ranef1[1, 3]) sigma2_1 <- as.numeric(ranef1[2, 3]) icc_1 <- tau00_1 / (tau00_1 + sigma2_1) icc_1 # Model 2: Means-as-Outcome Model dat$denomina <- factor(dat$denomina) table(dat$denomina) m2 <- lmer(langPOST ~ 1 + denomina + (1|schoolnr), data = dat, REML=FALSE) summary(m2) out2 <- summary(m2) coef2 <- coef(out2) tvalue2 <- coef2[,"t value"] pnorm(abs(tvalue2), lower.tail=FALSE) * 2 ranef2 <- out2@REmat tau00_2 <- as.numeric(ranef2[1, 3]) sigma2_2 <- as.numeric(ranef2[2, 3]) icc_2 <- tau00_2 / (tau00_2 + sigma2_2) icc_2 # Model 3: Adjusted-Means-as-Outcome Model m3 <- lmer(langPOST ~ 1 + IQ_verb + denomina + (1|schoolnr), data = dat, REML=FALSE) summary(m3) out3 <- summary(m3) coef3 <- coef(out3) tvalue3 <- coef3[,"t value"] pnorm(abs(tvalue3), lower.tail=FALSE) * 2 ranef3 <- out3@REmat tau00_3 <- as.numeric(ranef3[1, 3]) sigma2_3 <- as.numeric(ranef3[2, 3]) icc_3 <- tau00_3 / (tau00_3 + sigma2_3) icc_3 # Model 4: Random-coefficients model m4 <- lmer(langPOST ~ 1 + IQ_verb + (1 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m4) out4 <- summary(m4) coef4 <- coef(out4) tvalue4 <- coef4[,"t value"] pnorm(abs(tvalue4), lower.tail=FALSE) * 2 ranef4 <- out4@REmat tau00_4 <- as.numeric(ranef4[1, 3]) sigma2_4 <- as.numeric(ranef4[3, 3]) icc_4 <- tau00_4 / (tau00_4 + sigma2_4) icc_4 # Model 5: Random-coefficients model with fixed intercepts m5 <- lmer(langPOST ~ 1 + IQ_verb + (0 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m5) out5 <- summary(m5) coef5 <- coef(out5) tvalue5 <- coef5[,"t value"] pnorm(abs(tvalue5), lower.tail=FALSE) * 2 # Model 6: Full multilevel model m6 <- lmer(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina + (1 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m6) out6 <- summary(m6) coef6 <- coef(out6) tvalue6 <- coef6[,"t value"] pnorm(abs(tvalue6), lower.tail=FALSE) * 2 (pnorm(abs(tvalue6), lower.tail=FALSE) * 2) < .05 ranef6 <- out6@REmat tau00_6 <- as.numeric(ranef6[1, 3]) sigma2_6 <- as.numeric(ranef6[3, 3]) icc_6 <- tau00_6 / (tau00_6 + sigma2_6) icc_6 # Model 7: Random-coefficients model without Covariances between Random Effects m7 <- lmer(langPOST ~ 1 + IQ_verb + (1|schoolnr) + (0 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m7) # Model 8: Intercepts- and Slopes-as-Outcomes Model without Cross-level Interaction m8 <- lmer(langPOST ~ 1 + IQ_verb + denomina + (1 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m8) # Model 9: Intercepts- and Slopes-as-Outcomes Model without Random Slopes m9 <- lmer(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina + (1|schoolnr), data = dat, REML=FALSE) summary(m9) # Model 10: Intercepts- and Slopes-as-Outcomes Model without Residual Covariances between Random Effects m10 <- lmer(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina + (1|schoolnr) + (0 + IQ_verb|schoolnr), data = dat, REML=FALSE) summary(m10) #################################################################### ### Comparisons between Models # Computing AIC and BIC models <- list(m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10) models.summary <- lapply(models, summary) modelfit <- sapply(models.summary, slot, "AICtab") colnames(modelfit) <- c("m0", "m1", "m2", "m3", "m4", "m5", "m6", "m7", "m8", "m9", "m10") modelfit AIC <- unlist(modelfit[1,]) BIC <- unlist(modelfit[2,]) which(AIC == min(AIC)) which(BIC == min(BIC)) m0sum <- summary(m0) slot(m0sum, "AICtab") # Proportion of variance explained out0 <- summary(m0) ranef0 <- out0@REmat tau00 <- as.numeric(ranef0[1, 3]) sigma2 <- as.numeric(ranef0[2, 3]) out3 <- summary(m3) ranef3 <- out3@REmat tau00_3 <- as.numeric(ranef3[1, 3]) sigma2_3 <- as.numeric(ranef3[2, 3]) groupsize <- table(dat$schoolnr) J <- length(groupsize) denominator <- sum(1/groupsize) harmonic.n <- J/denominator harmonic.n (sigma2 - sigma2_3) / sigma2 (tau00 - tau00_3) / tau00 (sigma2 + tau00 - sigma2_3 - tau00_3) / (sigma2 + tau00) (tau00 + (sigma2/harmonic.n) - tau00_3 - (sigma2_3/harmonic.n)) / (tau00 + (sigma2/harmonic.n)) out6 <- summary(m6) ranef6 <- out6@REmat tau11r <- as.numeric(ranef6[2, 3]) out8 <- summary(m8) ranef8 <- out8@REmat tau11 <- as.numeric(ranef8[2, 3]) (tau11 - tau11r) / tau11 #################################################################### ### Centering # Grand mean centering dat$IQ_verb.grandMC <- dat$IQ_verb - mean(dat$IQ_verb) m1a <- lmer(langPOST ~ 1 + IQ_verb.grandMC + (1|schoolnr), data = dat, REML=FALSE) summary(m1a) m4a <- lmer(langPOST ~ 1 + IQ_verb.grandMC + (1 + IQ_verb.grandMC|schoolnr), data = dat, REML=FALSE) summary(m4a) # Group mean centering dat$IQ_verb.groupMean <- ave(dat$IQ_verb, dat$schoolnr) dat$IQ_verb.groupMC <- dat$IQ_verb - dat$IQ_verb.groupMean m1b <- lmer(langPOST ~ 1 + IQ_verb.groupMC + IQ_verb.groupMean + (1|schoolnr), data = dat, REML=FALSE) summary(m1b) # Centering at L2 dat$IQ_verb.groupMeanC <- dat$IQ_verb.groupMean - mean(dat$IQ_verb.groupMean) dat$sch_min.grandMC <- dat$sch_min - mean(dat$sch_min) m11 <- lmer(langPOST ~ 1 + IQ_verb.groupMC + IQ_verb.groupMeanC + sch_min.grandMC + IQ_verb.groupMC*IQ_verb.groupMeanC + IQ_verb.groupMC*sch_min.grandMC + (1 + IQ_verb.groupMC|schoolnr), data = dat, REML=FALSE) summary(m11) # Not center at L2 m11a <- lmer(langPOST ~ 1 + IQ_verb.groupMC + IQ_verb.groupMean + sch_min + IQ_verb.groupMC*IQ_verb.groupMean + IQ_verb.groupMC*sch_min + (1 + IQ_verb.groupMC|schoolnr), data = dat, REML=FALSE) summary(m11a) #################################################################### ### Interactions # install.packages("rockchalk") # install.packages("phia") # install.packages("rockchalkMultilevel", repos="http://rweb.quant.ku.edu/kran", type="source") library(rockchalkMultilevel) dat$sex <- factor(dat$sex) dat$denomina <- factor(dat$denomina) # Model 12: Lower-level interaction / Probing interaction between two continuous variables m12 <- lmer(langPOST ~ 1 + IQ_verb + ses + IQ_verb*ses + (1 + IQ_verb + ses + IQ_verb*ses|schoolnr), data = dat, REML=FALSE) summary(m12) simpleSlope12 <- plotSlopes.mlm(m12, "IQ_verb", "ses") testSlopes.mlm(simpleSlope12) dat$ses.c <- dat$ses - 9.27 m12c <- lmer(langPOST ~ 1 + IQ_verb + ses.c + IQ_verb*ses.c + (1 + IQ_verb + ses.c + IQ_verb*ses.c|schoolnr), data = dat, REML=FALSE) summary(m12c) # Model 12a: Lower-level interaction with group mean centering / Probing interaction between two continuous variables dat$IQ_verb.groupMean <- ave(dat$IQ_verb, dat$schoolnr) dat$IQ_verb.groupMC <- dat$IQ_verb - dat$IQ_verb.groupMean dat$IQ_verb.groupMeanC <- dat$IQ_verb.groupMean - mean(dat$IQ_verb.groupMean) dat$ses.groupMean <- ave(dat$ses, dat$schoolnr) dat$ses.groupMC <- dat$ses - dat$ses.groupMean dat$ses.groupMeanC <- dat$ses.groupMean - mean(dat$ses.groupMean) m12a <- lmer(langPOST ~ 1 + IQ_verb.groupMC + ses.groupMC + IQ_verb.groupMC*ses.groupMC + IQ_verb.groupMeanC + ses.groupMeanC + (1|schoolnr), data = dat, REML=FALSE) summary(m12a) simpleSlope12a <- plotSlopes.mlm(m12a, "IQ_verb.groupMC", "ses.groupMC") testSlopes.mlm(simpleSlope12a) m12b <- lmer(langPOST ~ 1 + IQ_verb.groupMC + ses.groupMC + IQ_verb.groupMC*ses.groupMC + IQ_verb.groupMeanC + ses.groupMeanC + IQ_verb.groupMC*IQ_verb.groupMeanC + IQ_verb.groupMC*ses.groupMeanC + ses.groupMC*IQ_verb.groupMeanC + ses.groupMC*ses.groupMeanC + IQ_verb.groupMC*ses.groupMC*IQ_verb.groupMeanC* + IQ_verb.groupMC*ses.groupMC*ses.groupMeanC + (1 + IQ_verb.groupMC + ses.groupMC + IQ_verb.groupMC*ses.groupMC|schoolnr), data = dat, REML=FALSE) summary(m12b) # Our tools is applicable only two-way interaction # simpleSlope12b <- plotSlopes.mlm(m12b, "IQ_verb.groupMC", "ses.groupMC") # testSlopes.mlm(simpleSlope12b) # Model 13: Upper-level interaction / Probing interaction between one continuous variable and one categorical variable m13 <- lmer(langPOST ~ 1 + sch_ses + denomina + sch_ses*denomina + (1|schoolnr), data = dat, REML=FALSE) summary(m13) m13a <- lmer(langPOST ~ 1 + sch_ses + denomina + (1|schoolnr), data = dat, REML=FALSE) anova(m13, m13a) simpleSlope13 <- plotSlopes.mlm(m13, "sch_ses", "denomina") testSlopes.mlm(simpleSlope13) ctab <- table(dat$schoolnr, dat$denomina) typeschool <- ctab > 0 apply(typeschool, 2, sum) dat$denomina.c <- relevel(dat$denomina, "2") m13c <- lmer(langPOST ~ 1 + sch_ses + denomina.c + sch_ses*denomina.c + (1|schoolnr), data = dat, REML=FALSE) summary(m13c) dat$sch_ses.c5 <- dat$sch_ses - 5 m13d <- lmer(langPOST ~ 1 + sch_ses.c5 + denomina + sch_ses.c5*denomina + (1|schoolnr), data = dat, REML=FALSE) summary(m13d) # Model 14: Cross-level interaction / Probing interaction between two categorical variables m14 <- lmer(langPOST ~ 1 + sex + denomina + sex*denomina + (1 + sex|schoolnr), data = dat, REML=FALSE) summary(m14) m14a <- lmer(langPOST ~ 1 + sex + denomina + (1 + sex|schoolnr), data = dat, REML=FALSE) anova(m14, m14a) interactionMeans.mlm(m14) plot(interactionMeans.mlm(m14)) testInteractions.mlm(m14, fixed="denomina", across="sex", adjustment = "none") testInteractions.mlm(m14, fixed="sex", across="denomina", adjustment = "bonferroni") testInteractions.mlm(m14, fixed="sex", pairwise="denomina", adjustment = "holm") dat$sex.groupMean <- ave(as.numeric(dat$sex), dat$schoolnr) dat$sex.groupMC <- as.numeric(dat$sex) - dat$sex.groupMean dat$sex.groupMeanC <- dat$sex.groupMean - mean(dat$sex.groupMean) m14b <- lmer(langPOST ~ 1 + sex.groupMC + denomina + sex.groupMC*denomina + sex.groupMeanC + sex.groupMC*sex.groupMeanC + (1 + sex.groupMC|schoolnr), data = dat, REML=FALSE) simpleSlope14b <- plotSlopes.mlm(m14b, "sex.groupMC", "denomina") testSlopes.mlm(simpleSlope14b) interactionMeans.mlm(m14b) plot(interactionMeans.mlm(m14b)) testInteractions.mlm(m14b, fixed="denomina", across="sex.groupMC", adjustment = "none") testInteractions.mlm(m14b, fixed="sex", across="denomina", adjustment = "bonferroni") testInteractions.mlm(m14b, fixed="sex", pairwise="denomina", adjustment = "holm") #################################################################### ### Longitudinal Model library(lme4) long <- read.csv("mathgrowth.csv", header = TRUE, na.strings="-999999") long$gradec <- long$grade - 7 m15 <- lmer(mathach ~ 1 + gradec + (1 + gradec|caseid), data=long, REML=FALSE) summary(m15) m15a <- lmer(mathach ~ 1 + gradec + (1|caseid), data=long, REML=FALSE) anova(m15a, m15) library(ggplot2) ggplot(long, aes(x = gradec, y = mathach, group=caseid)) + geom_line() long1 <- long[1:(6*100),] ggplot(long1, aes(x = gradec, y = mathach, group=caseid)) + geom_line() ggplot(long1, aes(x = gradec, y = mathach, group=caseid)) + geom_smooth(method=lm, se=FALSE) long2 <- long[1:(6*9),] ggplot(long2, aes(x = gradec, y = mathach)) + facet_wrap(~caseid) + geom_point() + geom_smooth(method=lm, se=TRUE) m16 <- lmer(mathach ~ 1 + gradec + I(gradec^2) + (1 + gradec + I(gradec^2)|caseid), data=long, REML=FALSE) summary(m16) m16a <- lmer(mathach ~ 1 + gradec + I(gradec^2) + (1 + gradec|caseid), data=long, REML=FALSE) anova(m15, m16a) anova(m16a, m16) ggplot(long1, aes(x = gradec, y = mathach, group=caseid)) + geom_smooth(method = "lm", formula = y ~ x + I(x^2), se=FALSE) ggplot(long2, aes(x = gradec, y = mathach)) + facet_wrap(~caseid) + geom_point() + geom_smooth(method = "lm", formula = y ~ x + I(x^2), se=TRUE) long$gender <- factor(long$gender, labels=c("female", "male")) m17 <- lmer(mathach ~ 1 + gradec + gender + gradec*gender + (1 + gradec|caseid), data=long, REML=FALSE) summary(m17) anova(m15, m17) library(rockchalkMultilevel) simpleSlope17 <- plotSlopes.mlm(m17, "gradec", "gender") testSlopes.mlm(simpleSlope17) long1 <- long[1:(6*100),] ggplot(long1, aes(x = gradec, y = mathach, group=caseid, colour=gender)) + geom_line() ggplot(long1, aes(x = gradec, y = mathach, group=caseid, colour=gender)) + geom_smooth(method=lm, se=FALSE) long2 <- long[1:(6*9),] ggplot(long2, aes(x = gradec, y = mathach, colour=gender)) + facet_wrap(~caseid) + geom_point() + geom_smooth(method=lm, se=TRUE) long$parentpushC <- long$parentpush - mean(long$parentpush, na.rm=TRUE) long$peerpushC <- long$peerpush - mean(long$peerpush, na.rm=TRUE) m18 <- lmer(mathach ~ 1 + gradec + parentpushC + peerpushC + (1 + gradec|caseid), data=long, REML=FALSE) summary(m18) anova(m15, m18) m18a <- lmer(mathach ~ 1 + gradec + parentpushC + peerpushC + (1 + gradec + parentpushC + peerpushC|caseid), data=long, REML=FALSE) summary(m18a) anova(m18, m18a) # Note on mean centering meanc <- function(x) mean(x, na.rm=TRUE) long$parentpushGroupM <- ave(long$parentpush, long$caseid, FUN=meanc) long$peerpushGroupM <- ave(long$peerpush, long$caseid, FUN=meanc) long$parentpushGroupC <- long$parentpush - long$parentpushGroupM long$peerpushGroupC <- long$peerpush - long$peerpushGroupM library(nlme) m15nlme <- lme(mathach ~ 1 + gradec, random = ~1 + gradec|caseid, data=long, method="ML", na.action=na.omit) summary(m15nlme) m19 <- update(m15nlme, weight=varIdent(form = ~1|gradec)) summary(m19) anova(m15nlme, m19) l1sd <- as.numeric(VarCorr(m19)[3, 2]) l1sd * coef(m19$modelStruct$varStruct, unconstrained = FALSE) ACF(m15nlme) plot(ACF(m15nlme), alpha=.05)) m20 <- update(m15nlme, correlation=corARMA(p = 1)) summary(m20) anova(m15nlme, m20) long$gradec1 <- long$gradec long$gradec1[long$gradec1 %in% c(3, 4, 5)] <- 2 long$gradec2 <- long$gradec - long$gradec1 m21 <- lmer(mathach ~ 1 + gradec1 + gradec2 + (1 + gradec1 + gradec2|caseid), data=long, REML=FALSE) summary(m21) ggplot(long2, aes(x = gradec, y = mathach, group = gradec > 2.5)) + facet_wrap(~caseid) + geom_point() + geom_smooth(method = "lm", se=TRUE) #################################################################### ### Reshape library(lme4) long <- read.csv("mathgrowth.csv", header = TRUE, na.strings="-999999") wide <- reshape(data = long, idvar = "caseid", timevar="grade", v.names=c("mathach", "parentpush", "peerpush"), direction="wide") mathach <- paste0("mathach.", 7:12) parentpush <- paste0("parentpush.", 7:12) peerpush <- paste0("peerpush.", 7:12) timevarying <- list(mathach, parentpush, peerpush) long2 <- reshape(data = wide, idvar = "caseid", times = 7:12, timevar="grade", varying = timevarying, v.names=c("mathach", "parentpush", "peerpush"), direction="long") #################################################################### ### Multiple Imputation library(lme4) library(mice) long <- read.csv("mathgrowth.csv", header = TRUE, na.strings="-999999") long$gender <- long$gender == 2 # 1 = Male long$hispanic <- long$race == 1 # 1 = Hispanic long$black <- long$race == 2 # 1 = Black long$gradec <- long$grade - 7 long <- data.frame(long, intgender = long$gradec * long$gender, inthisp = long$gradec * long$hispanic, intblack = long$gradec * long$black) l2id <- "caseid" unused <- c("schoolid", "grade", "race") used <- setdiff(colnames(long), c(l2id, unused)) ini <- mice(long, maxit = 0) pred <- ini$pred meth <- ini$meth pred[unused, ] <- 0 pred[, unused] <- 0 pred[used, l2id] <- -2 pred[l2id, ] <- 0 meth[c(l2id, unused)] <- "" nomiss <- c("gender", "gradec", "intgender") miss <- setdiff(used, nomiss) random <- c("mathach", "parentpush", "peerpush", "gradec") fixed <- setdiff(used, random) pred[nomiss, ] <- 0 pred[miss, fixed] <- 1 pred[miss, random] <- 2 diag(pred) <- 0 meth[c("likemath", "inthisp", "intblack")] <- "2lonly.norm" meth[c("hispanic", "black")] <- "2lonly.pmm" meth[c("mathach", "parentpush", "peerpush")] <- "2l.pan" meth[nomiss] <- "" meth["inthisp"] <- "~I(gradec*hispanic)" meth["intblack"] <- "~I(gradec*black)" pred[c("hispanic", "black"), c("inthisp", "intblack")] <- 0 # pred["mathach", ] <- c(-2, 0, 0, 0, 2, 2, 1, 1, 0, 2, 1, 1, 1, 1, 1) # pred["parentpush", ] <- c(-2, 0, 0, 2, 0, 2, 1, 1, 0, 2, 1, 1, 1, 1, 1) # pred["peerpush", ] <- c(-2, 0, 0, 2, 2, 0, 1, 1, 0, 2, 1, 1, 1, 1, 1) # pred["likemath", ] <- c(-2, 0, 0, 2, 2, 2, 0, 1, 0, 2, 1, 1, 1, 1, 1) # pred["hispanic", ] <- c(-2, 0, 0, 2, 2, 2, 1, 1, 0, 2, 0, 1, 1, 0, 0) # pred["black", ] <- c(-2, 0, 0, 2, 2, 2, 1, 1, 0, 2, 1, 0, 1, 0, 0) # pred["race", ] <- 0 # pred["inthisp",] <- 0 # pred["intblack",] <- 0 #pred[,"grade"] <- 0 imp <- mice(long, m = 5, maxit = 10, meth = meth, pred = pred, seed = 123321) plot(imp, c("mathach", "parentpush", "peerpush")) plot(imp, c("likemath", "hispanic", "black")) fit1 <- with(imp, lmer(mathach ~ gradec + (1|caseid), REML=FALSE)) fit2 <- with(imp, lmer(mathach ~ gradec + (1 + gradec|caseid), REML=FALSE)) out1 <- pool(fit1) summary(out1) out2 <- pool(fit2) summary(out2) # The method has not been tested yet. library(rockchalkMultilevel) anovaMI(fit1, fit2) # Compare with listwise out1 <- lmer(mathach ~ gradec + (1|caseid), data=long, REML=FALSE) summary(out1) out2 <- lmer(mathach ~ gradec + (1 + gradec|caseid), data=long, REML=FALSE) summary(out2) anova(out1, out2) fit3 <- with(imp, lmer(mathach ~ I(parentpush - ave(parentpush, caseid)) + I(ave(parentpush, caseid) - mean(parentpush)) + (1|caseid), REML=FALSE)) out3 <- pool(fit3) summary(out3) #################################################################### ### Multiparameter comparison m21 <- lmer(mathach ~ 1 + gradec1 + gradec2 + (1 + gradec1 + gradec2|caseid), data=long, REML=FALSE) summary(m21) ctr <- matrix(c(0, 1, -1), 1) library(multcomp) phasediff <- glht(m21, linfct = ctr) summary(phasediff) plot(confint(phasediff)) phasediff2 <- glht(m21, linfct = "gradec1 - gradec2 = 0") summary(phasediff2) ctr1 <- c(0, 0, 1/2, 1/2) ctr2 <- c(0, 0, 1, -1) ctr <- rbind(ctr1, ctr2) pushctr <- glht(m18, linfct = ctr) summary(pushctr) summary(pushctr, test = adjusted(type = "bonferroni")) plot(confint(pushctr)) pushctr2 <- glht(m18, linfct = c("0.5*parentpushC + 0.5*peerpushC = 0", "parentpushC - peerpushC = 0")) summary(pushctr2, test = adjusted(type = "bonferroni")) plot(confint(pushctr2)) ctr <- matrix(c(0, 0, 1/3, 1/3, -1/2, -1/2), 1) typediff <- glht(m3, linfct = ctr) summary(typediff) wald.mlm(m18, ctr) #################################################################### ### Three-level model measure <- rep(1:5, 10 * 20) day <- rep(rep(1:10, each = 5), 20) id <- rep(1:20, each = 5 * 10) pos3 <- rep(rnorm(20, 0, sqrt(0.4)), each = 5 * 10) pos2 <- rep(rnorm(200, 0, sqrt(0.3)), each = 5) pos1 <- rnorm(1000, 0, sqrt(0.3)) posaffect <- round(10 * (pos1 + pos2 + pos3) + 50) pos <- data.frame(id, day, measure, posaffect) write.table(pos, file="posaffect.csv", sep=",", row.names=FALSE, col.names=TRUE) posaffect <- read.csv("posaffect.csv") posaffect[c(1:10, 51:60), ] posaffect$l2unit <- paste(posaffect$id, posaffect$day) posaffect[c(1:10, 51:60), ] library(lme4) m1 <- lmer(posaffect ~ 1 + (1|id) + (1|day), data = posaffect, REML = FALSE) m2 <- lmer(posaffect ~ 1 + (1|id) + (1|l2unit), data = posaffect, REML = FALSE) library(lme4) long <- read.csv("mathgrowthclass.csv", header = TRUE, na.strings="-999999") m22 <- lmer(mathach ~ 1 + (1|caseid) + (1|schoolid), data=long, REML=FALSE) summary(m22) m22 <- lmer(mathach ~ 1 + (1|schoolid/caseid), data=long, REML=FALSE) out22 <- summary(m22) ranef22 <- out22@REmat ranef22 tau3 <- as.numeric(ranef22[1, 3]) tau2 <- as.numeric(ranef22[2, 3]) sigma2 <- as.numeric(ranef22[3, 3]) icc3 <- tau3/(tau3 + tau2 + sigma2) icc3 icc2 <- tau2/(tau3 + tau2 + sigma2) icc2 long$gradec <- long$grade - 7 m23 <- lmer(mathach ~ 1 + gradec + (1 + gradec|caseid) + (1 + gradec|schoolid), data=long, REML=FALSE) summary(m23) long$gender <- factor(long$gender, labels=c("female", "male")) m24 <- lmer(mathach ~ 1 + gradec + gender + gradec*gender + (1 + gradec|caseid) + (1 + gradec + gender + gradec*gender|schoolid), data=long, REML=FALSE) summary(m24) library(rockchalkMultilevel) simpleSlope24 <- plotSlopes.mlm(m24, "gradec", "gender") testSlopes.mlm(simpleSlope24) m24a <- lmer(mathach ~ 1 + gradec + gender + gradec*gender + (1 + gradec|caseid) + (1 + gradec|schoolid), data=long, REML=FALSE) anova(m24a, m24) long$cohortsizeC <- long$cohortsize - mean(long$cohortsize, na.rm = TRUE) m25 <- lmer(mathach ~ 1 + gradec + cohortsizeC + gradec*cohortsizeC + (1 + gradec|caseid) + (1 + gradec|schoolid), data=long, REML=FALSE) summary(m25) library(nlme) m22n <- lme(mathach ~ 1, random = ~1|schoolid/caseid, data=long, method = "ML", na.action="na.omit") summary(m22n) m23n <- lme(mathach ~ 1 + gradec, random = ~1 + gradec|schoolid/caseid, data=long, method = "ML", na.action="na.omit") summary(m23n) m23na <- lme(mathach ~ 1 + gradec, random = list(schoolid = pdSymm(form = ~1), caseid = pdSymm(form = ~1 + gradec)), data=long, method = "ML", na.action="na.omit") summary(m23na) m24n <- lme(mathach ~ 1 + gradec + gender + gradec*gender, random = ~1 + gradec + gender + gradec*gender|schoolid/caseid, data=long, method = "ML", na.action="na.omit") summary(m24n) m25n <- lme(mathach ~ 1 + gradec + gender + gradec*gender, random = ~1 + gradec + gender + gradec*gender|schoolid/caseid, data=long, method = "ML", na.action="na.omit") summary(m25n) #################################################################### ### Multivariate Model library(nlme) long <- read.csv("mathgrowth.csv", header = TRUE, na.strings="-999999") long$gradec <- long$grade - 7 long$gender <- factor(long$gender, labels=c("female", "male")) long <- data.frame(long, obs = seq(nrow(long))) library(reshape2) dvvars <- c("parentpush", "peerpush") othervars <- setdiff(colnames(long), dvvars) long2 <- melt(long, id.vars = othervars, measure.vars = dvvars) long2$constparent <- as.numeric(long2$variable == "parentpush") long2$constpeer <- as.numeric(long2$variable == "peerpush") m26 <- lme(value ~ 0 + constparent + constpeer, data = long2, random = ~0 + constparent + constpeer | caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit") summary(m26) allvar <- VarCorr(m26) dv1sd <- as.numeric(allvar[3, 2]) l1sd <- c(dv1sd, dv1sd * coef(m26$modelStruct$varStruct, unconstrained = FALSE)) l1var <- l1sd^2 l2var <- as.numeric(allvar[1:2, 1]) icc <- l2var / (l1var + l2var) icc library(multcomp) ctr <- glht(m26, "constparent - constpeer = 0") summary(ctr) m26a <- lme(value ~ 1, data = long2, random = ~0 + constparent + constpeer | caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit") summary(m26a) anova(m26, m26a) m26b <- lme(value ~ 0 + constparent + constpeer, data = long2, random = ~1 | caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit") summary(m26b) anova(m26, m26b) m27 <- lme(value ~ 0 + constparent + constparent:gradec + constpeer + constpeer:gradec, data = long2, random = ~0 + constparent + constparent:gradec + constpeer + constpeer:gradec| caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m27) library(multcomp) ctr <- glht(m27, c("constparent - constpeer = 0", "constparent:gradec - gradec:constpeer = 0")) summary(ctr) m27a <- lme(value ~ 1 + constparent:gradec + constpeer:gradec, data = long2, random = ~0 + constparent + constparent:gradec + constpeer + constpeer:gradec| caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m27a) anova(m27, m27a) m27b <- lme(value ~ 0 + gradec + constparent + constpeer, data = long2, random = ~0 + constparent + constparent:gradec + constpeer + constpeer:gradec| caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m27b) anova(m27, m27b) m28 <- lme(value ~ 0 + variable + variable:gradec + variable:gender + variable:gender:gradec, data = long2, random = ~0 + variable + variable:gradec| caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m28) library(multcomp) ctr <- glht(m28, "variableparentpush:gradec:gendermale - variablepeerpush:gradec:gendermale = 0") summary(ctr) m28a <- lme(value ~ 0 + variable + variable:gradec + variable:gender + 1:gender:gradec, data = long2, random = ~0 + variable + variable:gradec| caseid, correlation = corSymm(form = ~ 1 | caseid/obs), weights = varIdent(form = ~1 | variable), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m28a) anova(m28, m28a) #################################################################### ### Multiple-group Model library(nlme) long <- read.csv("mathgrowth.csv", header = TRUE, na.strings="-999999") long$gradec <- long$grade - 7 long$female <- as.numeric(long$gender == 1) long$male <- as.numeric(long$gender == 2) m29 <- lme(mathach ~ 0 + female + male, data = long, random = list(caseid=pdBlocked(list(pdSymm(form = ~0 + female), pdSymm(form = ~0 + male)))), weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit") summary(m29) allvar <- VarCorr(m29) malesd <- as.numeric(allvar[3, 2]) l1sd <- c(malesd * coef(m29$modelStruct$varStruct, unconstrained = FALSE), malesd) l1var <- l1sd^2 l2var <- as.numeric(allvar[1:2, 1]) icc <- l2var / (l1var + l2var) icc m29 <- lme(mathach ~ 0 + female + male, data = long, random = list(caseid=pdBlocked(list(pdSymm(form = ~0 + female), pdSymm(form = ~0 + male)))), weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit") summary(m29) library(multcomp) ctr <- glht(m29, "female - male = 0") summary(ctr) m29a <- lme(mathach ~ 1, data = long, random = list(caseid=pdBlocked(list(pdSymm(form = ~0 + female), pdSymm(form = ~0 + male)))), weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit") summary(m29a) anova(m29, m29a) m29b <- lme(mathach ~ 0 + female + male, data = long, random = ~1 | caseid, weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit") summary(m29b) anova(m29, m29b) m30 <- lme(mathach ~ 0 + female + female:gradec + male + male:gradec, data = long, random=list(caseid=pdBlocked(list(pdSymm(form = ~0 + female + female:gradec), pdSymm(form = ~0 + male + male:gradec)))), weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m30) library(multcomp) ctr <- glht(m30, c("female - male = 0", "female:gradec - gradec:male = 0")) summary(ctr) long$likemathC <- long$likemath - mean(long$likemath, na.rm = TRUE) m31 <- lme(mathach ~ 0 + female + female:gradec + female:likemathC + female:gradec:likemathC + male + male:gradec + male:likemathC + male:gradec:likemathC, data = long, random=list(caseid=pdBlocked(list(pdSymm(form = ~0 + female + female:gradec), pdSymm(form = ~0 + male + male:gradec)))), weights = varIdent(form = ~1 | gender), method = "ML", na.action = "na.omit", control = lmeControl(maxIter = 500, msMaxIter = 500, niterEM = 100, msMaxEval = 400)) summary(m31) library(multcomp) ctr <- glht(m31, "female:gradec:likemathC - gradec:likemathC:male = 0") summary(ctr) #################################################################### ### Check Assumptions fm1 <- lmer(Reaction ~ Days + (Days|Subject), sleepstudy) f <- fitted(fm1) r <- residuals(fm1) plot(f,r) sm <- loess(r~f) v <- seq(min(f),max(f),length=101) lines(v,predict(sm,data.frame(f=v)),col=2) qqnorm(r); qqline(r, col = 2) library(influence.ME) plot.lme #################################################################### ### MCMCglmm library(MCMCglmm) data(PlodiaPO) model1<-MCMCglmm(PO~1, random=~FSfamily, data=PlodiaPO, verbose=FALSE) summary(model1) m1a <- lmer(PO~1 + (1|FSfamily), data=PlodiaPO, REML=FALSE) summary(m1a) #################################################################### ### Codes that I used for creating graphs # Separate regressions in m4 plot(dat$IQ_verb, dat$langPOST, type="n", xlab = "Verbal IQ Score", ylab = "Language Score", main = "Model 4: Random-Coefficient Regressions") l2coef <- coef(m4)$schoolnr for(i in 1:nrow(l2coef)) { l2id <- as.numeric(rownames(l2coef)[i]) l2dat <- dat[dat$schoolnr == l2id,] xmin <- min(l2dat$IQ_verb) xmax <- max(l2dat$IQ_verb) ymin <- l2coef[i, 1] + l2coef[i, 2]*xmin ymax <- l2coef[i, 1] + l2coef[i, 2]*xmax lines(c(xmin, xmax), c(ymin, ymax), lwd=0.8, col="grey50") } totalxmin <- min(dat$IQ_verb) totalxmax <- max(dat$IQ_verb) totalymin <- coef4[1, 1] + coef4[2, 1]*totalxmin totalymax <- coef4[1, 1] + coef4[2, 1]*totalxmax lines(c(totalxmin, totalxmax), c(totalymin, totalymax), lwd=3) # Separate regressions in m5 plot(dat$IQ_verb, dat$langPOST, type="n", xlab = "Verbal IQ Score", ylab = "Language Score", main = "Model 5: Random-Coefficient Regressions with fixed Intercept") l2coef <- coef(m5)$schoolnr for(i in 1:nrow(l2coef)) { l2id <- as.numeric(rownames(l2coef)[i]) l2dat <- dat[dat$schoolnr == l2id,] xmin <- min(l2dat$IQ_verb) xmax <- max(l2dat$IQ_verb) ymin <- l2coef[i, 1] + l2coef[i, 2]*xmin ymax <- l2coef[i, 1] + l2coef[i, 2]*xmax lines(c(xmin, xmax), c(ymin, ymax), lwd=0.8, col="grey50") } totalxmin <- min(dat$IQ_verb) totalxmax <- max(dat$IQ_verb) totalymin <- coef5[1, 1] + coef5[2, 1]*totalxmin totalymax <- coef5[1, 1] + coef5[2, 1]*totalxmax lines(c(totalxmin, totalxmax), c(totalymin, totalymax), lwd=3) # Separate regressions in m3 plot(dat$IQ_verb, dat$langPOST, type="n", xlab = "Verbal IQ Score", ylab = "Language Score", main = "Model 3: Means-as-Outcomes Model") colgroup <- c("red", "blue", "green", "darkgreen", "violet") l2coef <- coef(m3)$schoolnr for(i in 1:nrow(l2coef)) { l2id <- as.numeric(rownames(l2coef)[i]) l2dat <- dat[dat$schoolnr == l2id,] xmin <- min(l2dat$IQ_verb) xmax <- max(l2dat$IQ_verb) d2 <- l2dat$denomina[i] == 2 d3 <- l2dat$denomina[i] == 3 d4 <- l2dat$denomina[i] == 4 d5 <- l2dat$denomina[i] == 5 ymin <- l2coef[i, 1] + l2coef[i, 2]*xmin ymax <- l2coef[i, 1] + l2coef[i, 2]*xmax lines(c(xmin, xmax), c(ymin, ymax), lwd=0.8, col=colgroup[l2dat$denomina[i]]) } dummy <- diag(4) dummy <- rbind(0, dummy) for(i in 1:nrow(dummy)) { totalxmin <- min(dat$IQ_verb[dat$denomina == i]) totalxmax <- max(dat$IQ_verb[dat$denomina == i]) totalymin <- coef3[, 1] %*% c(1, totalxmin, dummy[i,]) totalymax <- coef3[, 1] %*% c(1, totalxmax, dummy[i,]) lines(c(totalxmin, totalxmax), c(totalymin, totalymax), lwd=3, col=colgroup[i]) } legend(2, 25, c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5"), col = colgroup, lty = rep(1, 5), lwd = rep(3, 5)) # Separate regressions in m6 plot(dat$IQ_verb, dat$langPOST, type="n", xlab = "Verbal IQ Score", ylab = "Language Score", main = "Model 6: Intercepts- and Slopes-as-Outcomes Model") colgroup <- c("red", "blue", "green", "darkgreen", "violet") l2coef <- coef(m6)$schoolnr for(i in 1:nrow(l2coef)) { l2id <- as.numeric(rownames(l2coef)[i]) l2dat <- dat[dat$schoolnr == l2id,] xmin <- min(l2dat$IQ_verb) xmax <- max(l2dat$IQ_verb) d2 <- l2dat$denomina[i] == 2 d3 <- l2dat$denomina[i] == 3 d4 <- l2dat$denomina[i] == 4 d5 <- l2dat$denomina[i] == 5 ymin <- l2coef[i, 1] + l2coef[i, 2]*xmin ymax <- l2coef[i, 1] + l2coef[i, 2]*xmax lines(c(xmin, xmax), c(ymin, ymax), lwd=0.8, col=colgroup[l2dat$denomina[i]]) } dummy <- diag(4) dummy <- rbind(0, dummy) for(i in 1:nrow(dummy)) { totalxmin <- min(dat$IQ_verb[dat$denomina == i]) totalxmax <- max(dat$IQ_verb[dat$denomina == i]) totalymin <- coef6[, 1] %*% c(1, totalxmin, dummy[i,], totalxmin*dummy[i,]) totalymax <- coef6[, 1] %*% c(1, totalxmax, dummy[i,], totalxmax*dummy[i,]) print((totalymax - totalymin)/(totalxmax - totalxmin)) lines(c(totalxmin, totalxmax), c(totalymin, totalymax), lwd=3, col=colgroup[i]) } legend(2, 25, c("Type 1", "Type 2", "Type 3", "Type 4", "Type 5"), col = colgroup, lty = rep(1, 5), lwd = rep(3, 5)) #################################################################### ### Data Transformation Script # wide <- read.csv("longusemiss.csv", header = TRUE, na.strings="-999999") # dat <- matrix(NA, nrow(wide)*6, 9) # dat <- as.data.frame(dat) # for(i in 1:nrow(wide)) { # if(i%%100 == 0) print(i) # dat[((i-1)*6) + (1:6), 1] <- wide[i, 1] # dat[((i-1)*6) + (1:6), 2] <- wide[i, 2] # dat[((i-1)*6) + (1:6), 3] <- 7:12 # dat[((i-1)*6) + (1:6), 4] <- unlist(wide[i, 3:8]) # dat[((i-1)*6) + (1:6), 5] <- unlist(wide[i, 10:15]) # dat[((i-1)*6) + (1:6), 6] <- unlist(wide[i, 16:21]) # dat[((i-1)*6) + (1:6), 7] <- wide[i, 9] # dat[((i-1)*6) + (1:6), 8] <- wide[i, 22] # dat[((i-1)*6) + (1:6), 9] <- wide[i, 23] # } # colnames(dat) <- c("caseid", "schoolid", "grade", "mathach", "parentpush", "peerpush", "likemath", "gender", "race") # write.table(dat, file="mathgrowth.csv", sep=",", col.names=TRUE, row.names=FALSE, na="-999999") #################################################################### ### Scratch Paper pr8 <- profile(fm8 <- lmer(mathgain ~ mathkind +minority + ses + (1 | classid) + (1 | schoolid),classroom, REML = FALSE)) library(nlme) library(ProfileLikelihood) m6lme <- lme(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina, random = ~1 + IQ_verb | schoolnr, data = dat, method="ML") summary(m6lme) m6pf <- profilelike.lme(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina, random = ~1 + IQ_verb | schoolnr, data = dat, method="ML", profile.theta=2, subject = "schoolnr") m6 <- lme(langPOST ~ 1 + IQ_verb + denomina + IQ_verb*denomina, random = 1|schoolnr, data = dat, method="ML") summary(m6) xx <- profilelike.lme(formula = yield ~ endpoint, random = ~ 1 | id, correlation=corAR1(form = ~ 1 | id), data=dat, subject="Sample", profile.theta="vapor", method="ML", lo.theta=1, hi.theta=5, length=500, round=2) profilelike.plot(theta=xx$theta, profile.lik.norm=xx$profile.lik.norm, round=4)