sem::cfa(…) is elegant

Prof. John Fox's sem package provided an elegant interface to define CFA and general SEM models. The following is an example given by Dr. K-T Hau's SEM course at coursera and his textbook (chap1_2).

library(sem) #R v322; sem v3.1-6
options(fit.indices=c("RMSEA","CFI","SRMR","AIC","BIC"))

# First of all, do not forget to provide the sample size
N=100 
# Then correlation matrix
cor.Item <-readMoments()
1.00
0.12 	1.00
0.08 	0.08 	1.00
0.50 	0.11 	0.08 	1.00
0.48 	0.03 	0.12 	0.45 	1.00
0.07 	0.46 	0.15 	0.08 	0.11 	1.00
0.05 	0.44 	0.15 	0.12 	0.12 	0.44 	1.00
0.14 	0.17 	0.53 	0.14 	0.08 	0.10 	0.06 	1.00
0.16 	0.05 	0.43 	0.10 	0.06 	0.08 	0.10 	0.54 	1.00


# Fixed factor variances with reference.indicators=F
model.cfa <- cfa(reference.indicators=F)
F1: X1, X4, X5
F2: X2, X6, X7
F3: X3, X8, X9

# Results
sem.cfa <- sem(model.cfa,cor.Item,N=N)
summary(sem.cfa)

# More details
## The model and the data
model.cfa
print.table(cor.Item)

## Histogram &amp;amp; density of the correlation values
hist(cor.lower <- cor.Item[row(cor.Item) > col(cor.Item)],freq=F,breaks=10,
     xlab='Correlation',col='grey');
rug(jitter(cor.lower),col='red')
lines(density(cor.lower),col='blue',lty=2)

## Highlight those  > 0.4 correlations
library(ellipse)
plotcorr(cor.Item,col=grey(1-abs(cor.Item)),type='lower',diag=T);

## In a grouped order
idx <- c(1,4,5,2,6,7,3,8,9)
plotcorr(cor.Item[idx,idx],col=grey(1-abs(cor.Item[idx,idx])),diag=T);

One more example (Chap3_1_2 Model A, B, & C), to demo usage of modification indices and GraphViz --

library(sem) #R v322; sem v3.1-6
options(fit.indices=c("RMSEA","CFI","SRMR","AIC","BIC"))


cor.17 <-readMoments(text="
1
.34 1
.38 .35 1
.02 .03 .04  1
.15 .19 .14 .02  1
.17 .15 .20 .01 .42  1
.20 .13 .12 .00 .40 .21  1
.32 .32 .21 .03 .10 .10 .07 1
.10 .17 .12 .02 .15 .18 .23 .13  1
.14 .16 .15 .03 .14 .19 .18 .18 .37  1
.14 .15 .19 .01 .18 .30 .13 .08 .38 .38  1
.18 .16 .24 .02 .14 .21 .21 .22 .06 .23 .18  1
.19 .20 .15 .01 .14 .24 .09 .24 .15 .21 .21 .45  1
.18 .21 .18 .03 .25 .18 .18 .18 .22 .12 .24 .28 .35  1
.08 .18 .16 .01 .22 .20 .22 .12 .12 .16 .21 .25 .20 .26  1
.12 .16 .25 .02 .15 .12 .20 .14 .17 .20 .14 .20 .15 .20 .50  1
.20 .16 .18 .04 .25 .14 .21 .17 .21 .21 .23 .15 .21 .22 .29 .41  1
")

# Fixed factor variances with reference.indicators=F
# Fixed the 1st loading with reference.indicators=T

Chap.3_1_2_A <- cfa(reference.indicators=T,text="
eta1:X1,X2,X3,X4
eta2:X5,X6,X7,X8
eta3:X9,X10,X11
eta4:X12,X13,X14
eta5:X15,X16,X17
")
Chap.3_1_2_A #检查模型是否固定负荷放开方差
# 
(sem.3_1_2_A <- sem(Chap.3_1_2_A,cor.17,N=350))
# sem.3_1_2_A 对象都有哪些属性?
(sum.3_1_2_A <- summary(sem.3_1_2_A))
# sum.3_1_2_A 对象都有哪些属性?

# modification indices
modIndices(sem.3_1_2_A)
summary(modIndices(sem.3_1_2_A))

# Find the small parameters
sum.3_1_2_A$coeff[order(abs(sum.3_1_2_A$coeff[,1])),]
sdc.3_1_2_A <- standardizedCoefficients(sem.3_1_2_A)
sdc.3_1_2_A[order(abs(sdc.3_1_2_A$`Std. Estimate`)),]
# Highlight the non-significant parameters
sum.3_1_2_A$coeff[sum.3_1_2_A$coeff[,4] >=.05,]


# The alternative models, delete X4 and load X8 from eta1 rather than from eta2
Chap.3_1_2_B <- cfa(reference.indicators=T,text="
eta1: X1, X2, X3, X8
eta2: X5, X6, X7
eta3: X9, X10,X11
eta4: X12,X13,X14
eta5: X15,X16,X17
")

Chap.3_1_2_C <- cfa(reference.indicators=T, text="
eta1: X1, X2, X3, X8
eta2: X5, X6, X7, X8
eta3: X9, X10,X11
eta4: X12,X13,X14
eta5: X15,X16,X17
")
cor.16 <- cor.17[rownames(cor.17)!='X4',colnames(cor.17)!='X4']

# Results
sem.3_1_2_B <- sem(Chap.3_1_2_B,cor.16,N=350)
(sum.3_1_2_B <- summary(sem.3_1_2_B))

modIndices(sem.3_1_2_B)

sem.3_1_2_C <- sem(Chap.3_1_2_C,cor.16,N=350)
(sum.3_1_2_C <- summary(sem.3_1_2_C))
modIndices(sem.3_1_2_C)

# Model Comparision
## Embedded models
anova(sem.3_1_2_B,sem.3_1_2_C)
## Non-embedded models
AIC(sem.3_1_2_B) - AIC(sem.3_1_2_A)
sum.3_1_2_B$AIC - sum.3_1_2_A$AIC

# Path Diagram, ignoring dual-headed arrows 
# View the diagram in RStudio Viewer 
pathDiagram(sem.3_1_2_B,ignore.double=T)

# RMSEA 
sum.3_1_2_B <- summary(sem.3_1_2_B)
sum.3_1_2_B$RMSEA
(I.16 <- diag(diag(cor.16)))
summary(sem(Chap.3_1_2_B,cor.16+0.02*I.16,350))$RMSEA


## Histogram & density of the correlation values
hist(cor.lower <- cor.17[row(cor.17) > col(cor.17)],freq=F,breaks=10,
     xlab='Correlation',col='grey');
rug(jitter(cor.lower),col='red')
lines(density(cor.lower),col='blue',lty=2)

## Fixed Var vs Fixed Weight
(Chap.3_1_2_B.V <-cfa(reference.indicators=F,text="
  eta1: X1, X2, X3, X8
  eta2: X5, X6, X7
  eta3: X9, X10,X11
  eta4: X12,X13,X14
  eta5: X15,X16,X17
"))
sem.3_1_2_B.V <- sem(Chap.3_1_2_B.V,cor.16,350)
sum.3_1_2_B.V <- summary(sem.3_1_2_B.V)
sum.3_1_2_B.V$chisq
sum.3_1_2_B$chisq

## Highlight those  > = 0.3 correlations
library(ellipse)
plotcorr(cor.17,col=grey((cor.17<.3)),type='lower',diag=T);

## In a grouped order,in darker colors
idx <- c(c(1,2,3,8),5:7,9:17,4)
cor.17.FU <- (cor.17-diag(diag(cor.17))+t(cor.17))[idx,idx]
plotcorr(cor.17.FU,col=grey((1-abs(cor.17.FU))^2),diag=T);
abline(v=(vh <- c(4,7,10,13,16)+.5),lty=2,col='red')
abline(h=18-vh,lty=2,col='red')

#####################
#####################

cor.5 <- readMoments(diag = F,text="
  0.418			
  0.407	0.48		
  0.547	0.469	0.481	
  0.422	0.458	0.443	0.496
")
(pc.5 <- princomp(covmat =cor.5 ))
screeplot(pc.5)
abline(h=1,lty=2)


(Chap.3_4_2<- cfa(reference.indicators=T,covs = paste('eta',1:5,sep = ''),text="
eta1: X1, X2, X3, X8
eta2: X5, X6, X7
eta3: X9, X10,X11
eta4: X12,X13,X14
eta5: X15,X16,X17
xi:eta1,eta2,eta3,eta4,eta5
"))

summary(sem.3_4_2 <- sem(Chap.3_4_2,cor.16,N = 350))
pathDiagram(sem.3_4_2,edge.labels = 'v')
anova(sem.3_4_2,sem.3_1_2_B)

###############
###############

cor.18 <- readMoments(names = c(paste("y",1:9,sep=""),paste("x",1:9,sep = "")),diag = F,text = "
0.68																
0.60	0.58															
0.01	0.10	0.07														
0.12	0.04	0.06	0.29													
0.06	0.06	0.01	0.35	0.24												
0.09	0.13	0.10	0.05	0.03	0.07											
0.04	0.08	0.16	0.10	0.12	0.06	0.25										
0.06	0.09	0.02	0.02	0.09	0.16	0.29	0.36									
0.23	0.26	0.19	0.05	0.04	0.04	0.08	0.09	0.09
0.11	0.13	0.12	0.03	0.05	0.03	0.02	0.06	0.06	0.40
0.16	0.09	0.09	0.10	0.10	0.02	0.04	0.12	0.15	0.29	0.20
0.24	0.26	0.22	0.14	0.06	0.10	0.06	0.07	0.08	0.03	0.04	0.02
0.21	0.22	0.29	0.07	0.05	0.17	0.12	0.06	0.06	0.03	0.12	0.04	0.55
0.29	0.28	0.26	0.06	0.07	0.05	0.06	0.15	0.2	0.1	0.03	0.12	0.64	0.61
0.15	0.16	0.19	0.18	0.08	0.07	0.08	0.1	0.06	0.15	0.16	0.07	0.25	0.25	0.16		
0.24	0.20	0.16	0.13	0.15	0.18	0.19	0.18	0.14	0.11	0.07	0.16	0.19	0.21	0.22	0.35
0.14	0.25	0.12	0.09	0.11	0.09	0.09	0.11	0.21	0.17	0.09	0.05	0.21	0.23	0.18	0.39	0.48
")
str(cor.18)
library(ellipse)
plotcorr(cor.18,type = 'l',col=grey(1-abs(cor.18)))

# Without intial values, there will be some trouble 
(M0 <- cfa(reference.indicators = T,text = "
  eta1:y1,y2,y3
  eta2:y4,y5,y6
  eta3:y7,y8,y9
  xi1:x1,x2,x3
  xi2:x4,x5,x6
  xi3:x7,x8,x9
"))
(sem.0 <- sem(M0,cor.18,500))
# (sum.0 <- summary(sem.0))
# Error in summary.objectiveML(sem.0) : 
#   coefficient covariances cannot be computed

# With initial values, M0 is OK
(M0 <- cfa(reference.indicators = T,text = "
  eta1:y1,y2(1),y3(1)
  eta2:y4,y5(1),y6(1)
  eta3:y7,y8(1),y9(1)
  xi1:x1,x2(1),x3(1)
  xi2:x4,x5(1),x6(1)
  xi3:x7,x8(1),x9(1)
"))
sem.0 <- sem(M0,S = cor.18,N = 500)
(sum.0 <- summary(sem.0))

(M1 <- cfa(covs = c("xi1,xi2,xi3","eta2,eta3"),reference.indicators = T,text = "
  eta1:y1,y2(1),y3(1),eta2
  eta2:y4,y5(1),y6(1)
  eta3:y7,y8(1),y9(1)
  xi1:x1,x2(1),x3(1),eta1,eta3
  xi2:x4,x5(1),x6(1),eta1
  xi3:x7,x8(1),x9(1),eta1,eta2
"))
sem.1 <- sem(M1,S = cor.18,N = 500)
(sum.1 <- summary(sem.1))
anova(sem.1,sem.0)
sum.1$RMSEA
sum.0$RMSEA

pathDiagram(sem.1)
modIndices(sem.1)
(M5 <- update(M1,text="
  add,xi3 -> eta3,gamma33
  delete,eta1 -> eta2
  delete,xi1 -> eta3
  delete,eta2 <-> eta3"))
sem.5 <- sem(M5,S = cor.18,N = 500)
(sum.5 <- summary(sem.5))
sum.5$RMSEA
standardizedCoefficients(sem.5)

Leave a Reply

Your email address will not be published. Required fields are marked *