library(car) library(psych) sat.act mod1 <- lm(ACT ~ SATV, data = sat.act) mod2 <- lm(cbind(SATV, SATQ) ~ ACT, data = sat.act) summary(mod1) summary(mod2) # Y-hat = 13.87 + 0.024(SATV) coef(mod1) vc <- vcov(mod1) sqrt(diag(vc)) tvalue <- coef(mod1)/sqrt(diag(vc)) pt(tvalue, df = 698) confint(mod1) # Confidence interval of R-squared -> Bootstrap (required a function to extract R-squared) predict(mod1) predict(mod1, data.frame(SATV = 499)) pval <- predict(mod1, data.frame(SATV = 200:800)) result <- data.frame(SATV = 200:800, predicted = pval) # Confidence interval of predicted values pval <- predict(mod1, data.frame(SATV = 200:800), interval = "prediction") anova(mod1) plot(ACT ~ SATV, data = sat.act) abline(mod1, col = "red", lty = 2, lwd = 3) mod3 <- lm(ACT ~ SATV + SATQ, data = sat.act) mod4 <- lm(ACT ~ SATV + SATQ + age + education, data = sat.act) summary(mod3) summary(mod4) anova(mod3, mod4) # Find delta R-squared summary(mod4)[["r.squared"]] - summary(mod3)[["r.squared"]] sat.act$gender <- factor(sat.act$gender, labels=c("male","female")) levels(sat.act$gender) contrasts(sat.act$gender) mod5 <- lm(SATQ ~ gender, data = sat.act) summary(mod5) sat.act$gender.new <- relevel(sat.act$gender, "female") # Treat female as baseline contrasts(sat.act$gender.new) mod6 <- lm(SATQ ~ gender.new, data = sat.act) summary(mod6) mod7 <- lm(SATQ ~ gender.new + education, data = sat.act) summary(mod7) mod8 <- lm(SATQ ~ education, data = sat.act) anova(mod8, mod7) # Moderators modint <- lm(ACT ~ SATV*SATQ, data = sat.act) modnoint <- lm(ACT ~ SATV + SATQ, data = sat.act) summary(modint) summary(modnoint) anova(modnoint, modint) # Simple slope # Probing interaction library(rockchalk) plotSlopes(modint, plotx="SATV", modx="SATQ") plotSlopes(modint, plotx="SATV", modx="SATQ") plotSlopes(modint, plotx="SATV", modx="SATQ", modxVals ="std.dev.") obj <- plotSlopes(modint, plotx="SATV", modx="SATQ", modxVals=c(200,400,600,800)) testSlopes(obj) plot(testSlopes(obj)) # Interaction between categorical and continuous variables modint2 <- lm(ACT ~ SATV * gender + age + education, data = sat.act) summary(modint2) modnoint2 <- lm(ACT ~ SATV + gender + age + education, data = sat.act) summary(modnoint2) anova(modnoint2, modint2) obj2 <- plotSlopes(modint2, plotx="SATV", modx="gender") testSlopes(obj2) # Centering mod8 <- lm(ACT ~ SATQ + SATV, data = sat.act) sat.act$SATQC <- sat.act$SATQ - mean(sat.act$SATQ) sat.act$SATQC <- scale(sat.act$SATQ, center = TRUE, scale = FALSE) sat.act$SATVC <- sat.act$SATV - mean(sat.act$SATV) sat.act$SATVC <- scale(sat.act$SATV, center = TRUE, scale = FALSE) mod9 <- lm(ACT ~ SATQC + SATVC, data = sat.act) summary(mod9) mod10 <- lm(ACT ~ I(SATQ - 500) + I(SATV - 500), data = sat.act) std <- lm(scale(ACT) ~ scale(SATV) + scale(SATQ), data = sat.act) summary(std) mod8 <- lm(ACT ~ SATQ + SATV, data = sat.act) standardize(mod8) # Nonlinear regression mod11 <- lm(ACT ~ SATQ + I(SATQ^2), data = sat.act) summary(mod11) # Simple slope mod12 <- lm(ACT ~ I(SATQ - 500) + I((SATQ - 500)^2), data = sat.act) summary(mod12) x <- 200:800 pred <- predict(mod11, data.frame(SATQ = x)) plot(x, pred) plot(x, pred, type = "l") # log sat.act$logSATQ <- log(sat.act$SATQ) sat.act$logACT <- log(sat.act$ACT) nReg3 <- lm(logACT ~ logSATQ, data = sat.act) summary(nReg3) ################################################################### ## Regression: Checking Assumptions about the Residuals mod1 <- lm(ACT ~ SATV * SATQ + age, data = sat.act) mod1$residuals ## check the normality assumption qqnorm(mod1$residuals) qqline(mod1$residuals) library(QuantPsyc) plotNormX(mod1$residuals) ## check the heteroscedasticity assumption: ## check that the residuals are random across predicted values plot(mod1$residuals ~ mod1$fitted.values) ## check that the residuals are random across predictors plot(mod1$residuals ~ mod1$model$SATQ) plot(mod1$residuals ~ mod1$model$SATV) plot(mod1$residuals ~ mod1$model$age) # Regression: Checking for Linearity and Multicollinearity ## a scatterplot matrix is a quick way to see several scatterplots at once pairs(sat.act[ , 3:6]) ## add a loess curve and 95% confidence lines using the "car" package scatterplot(sat.act$SATQ, sat.act$SATV) scatterplotMatrix(sat.act[ , 3:6]) ?pairs.panels ## as does "psych" pairs.panels(sat.act[ , 3:6]) # Collinearity ## check correlation matrix cor(sat.act[ , 3:6], use = "pairwise.complete") mod2 <- lm(ACT ~ gender + age + SATQ + SATV, data = sat.act) vif(mod2) # any values > 10 are problematic ## tolerance is the reciprocal of the variance inflation factor 1 / vif(mod2) # any values < 0.1 are problematic # Outliers mReg1 <- lm(ACT ~ SATV + SATQ, data = sat.act) summary(mReg1) dat <- influence.measures(mReg1) newdata <- sat.act[,c("ACT", "SATV", "SATQ")] meaniv <- colMeans(newdata, na.rm = TRUE) coviv <- cov(newdata, use = "complete.obs") d2 <- mahalanobis(newdata, meaniv, coviv) d2 <- d2[!is.na(d2)] plot(density(d2, bw = 0.5), main="Squared Mahalanobis distances, n=700, p=3") ; rug(d2) # Show the data points