# R for simulation study

# Analyze the attitude data by multiple regression using lm function

out <- lm(rating ~ complaints + privileges + learning + raises + 
    critical + advance, data = attitude)
summary(out)

# Analyze the attitude data by multiple regression using Mplus

write.table(attitude, file = "attitude.csv", row.names = FALSE, 
    col.names = FALSE, sep = ",")

getwd()
list.files()

myscript <- "
  TITLE: Multiple Regression on Attitude Data;
  DATA: 
    FILE IS attitude.csv;
  VARIABLE: 
    NAMES ARE rating complaints privileges learning raises critical advance;
  MODEL:	
    rating ON complaints privileges learning raises critical advance;
"
write(myscript, "attitude.inp")
list.files()

shell("mplus attitude.inp attitude.out")
list.files()

output <- readLines("attitude.out")

output[106:123]

files <- c("attitude.csv", "attitude.inp", "attitude.out")
file.remove(files)

# Use Mplus to analyze data by mediation model

set.seed(123321)
x <- rnorm(200, 0, 1)
m <- 0.2 * x + rnorm(200, 0, sqrt(0.96))
y <- 0.2 * x + 0.2 * m + rnorm(200, 0, sqrt(0.88)) 
dat <- data.frame(x, m, y)
write.table(dat, file = "example.csv", row.names = FALSE, 
    col.names = FALSE, sep = ",")

exscript <- "
  TITLE: Example on Mediation Model;
  DATA: 
    FILE IS example.csv;
  VARIABLE: 
    NAMES ARE x m y;
  MODEL:	
    y ON x (c) 
         m (b);
    m ON x (a);
  MODEL CONSTRAINT:
    new(ab);
    ab = a * b;
"
write(exscript, "example.inp")

shell("mplus example.inp example.out")
exout <- readLines("example.out")
exout[110:131]

# Extract the parameter estimate of the indirect effect

grep("AB", exout)

exline <- exout[grep("AB", exout)[2]]

exsplit <- strsplit(exline, " ")

exresult <- setdiff(exsplit[[1]], "")

as.numeric(exresult[2:5])

grep("THE MODEL ESTIMATION TERMINATED NORMALLY", exout)

files <- c("example.csv", "example.inp", "example.out")
file.remove(files)
list.files()

# paste and paste0 functions

paste("a", "b", "c")
paste("a", "b", "c", sep = "")
paste0("a", "b", "c")

paste0("script", 2, ".inp")

# Monte Carlo simulation on mediation model varying sample size conditions

n <- c(50, 100, 200, 400, 800)
for(i in 1:length(n)) dir.create(paste0("n", n[i]))
list.files()

nRep <- 1000
currentDir <- getwd()
set.seed(123321)
for(i in 1:length(n)) {
  for(j in 1:nRep) {
    x <- rnorm(n[i], 0, 1)
    m <- 0.2 * x + rnorm(n[i], 0, sqrt(0.96))
    y <- 0.2 * x + 0.2 * m + rnorm(n[i], 0, sqrt(0.88)) 
    dat <- data.frame(x, m, y)
    filename <- paste0("dat", j, ".csv")
    targetFile <- paste0(currentDir, "/n", n[i], "/", filename)
    write.table(dat, file = targetFile, row.names = FALSE, 
	    col.names = FALSE, sep = ",")
  }
}

template <- "
  TITLE:	
    My Simulation;
  DATA:	
    FILE IS subFile;
  VARIABLE:	
    NAMES ARE x m y;
  MODEL:	
    y ON x (c) 
         m (b);
    m ON x (a);
  MODEL CONSTRAINT:
    new(ab);
    ab = a * b;
"
for(i in 1:length(n)) {
  for(j in 1:nRep) {
    datname <- paste0("dat", j, ".csv")
    tempscript <- gsub("subFile", datname, template)
    filename <- paste0("script", j, ".inp")
    targetFile <- paste0(currentDir, "/n", n[i], "/", filename)
    write(tempscript, targetFile)
  }
}

for(i in 1:length(n)) {
  targetDir <- paste0(currentDir, "/n", n[i], "/")
  setwd(targetDir)
  for(j in 1:nRep) {
    filename <- paste0("script", j, ".inp")
    outname <- paste0("output", j, ".out")
    shell(paste0("mplus ", filename, " ", outname))
  }
}
setwd(currentDir)

result <- NULL
for(i in 1:length(n)) {
  for(j in 1:nRep) {
    targetFile <- paste0(currentDir, "/n", n[i], "/", "output", j, ".out")
    tempoutput <- readLines(targetFile)
    if(length(grep("THE MODEL ESTIMATION TERMINATED NORMALLY", 
	    tempoutput)) > 0) {
      targetline <- grep("AB", tempoutput)[2]
      textline <- tempoutput[targetline]
      textsplit <- strsplit(textline, " ")
      textresult <- setdiff(textsplit[[1]], "")
      result <- rbind(result, c(n[i], as.numeric(textresult[2:5])))
    } else {
	  result <- rbind(result, c(n[i], rep(NA, 4)))
	}
  }
}
colnames(result) <- c("n", "est", "se", "z", "p")
aggregate(est ~ n, data = result, FUN = mean)

# Exercise

set.seed(123321)
x <- rnorm(250, 0, 1)
m1 <- 0.2 * x + rnorm(250, 0, sqrt(0.96))
m2 <- 0.2 * x + rnorm(250, 0, sqrt(0.96))
y <- 0.2 * x + 0.2 * m1 + 0.2 * m2 + rnorm(250, 0, sqrt(0.84)) 
dat <- data.frame(x, m1, m2, y)
write.table(dat, file = "exercise.csv", row.names = FALSE, 
    col.names = FALSE, sep = ",")

exercisescript <- "
  TITLE: Example on Mutliple-Mediation Model;
  DATA: 
    FILE IS exercise.csv;
  VARIABLE: 
    NAMES ARE x m1 m2 y;
  MODEL:	
    y ON x (c) 
         m1 (b1)
		 m2 (b2);
    m1 ON x (a1);
	m2 ON x (a2);
  MODEL CONSTRAINT:
    new(ab1, ab2, indirect);
    ab1 = a1 * b1;
	ab2 = a2 * b2;
	indirect = ab1 + ab2;
"
write(exercisescript, "exercise.inp")

shell("mplus exercise.inp exercise.out")
exerciseout <- readLines("exercise.out")
exerciseout[140:143]