The goal for today is to calculate a PCA model. First, let’s load our packages:
# Load tidyverse and lavaan packages
library(tidyverse)
library(psych)
library(GPArotation)
Let’s load in some data:
data <- read.csv("https://uoepsy.github.io/data/conduct_probs.csv")
str(data)
## 'data.frame': 450 obs. of 11 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item1 : num -0.101 -2.57 0.398 -0.81 -2.011 ...
## $ item2 : num 0.758 -2.079 -1.202 -0.747 -2.028 ...
## $ item3 : num 0.725 -1.283 1.104 -0.993 -1.338 ...
## $ item4 : num 0.0336 -0.9411 1.1564 0.5656 -0.6479 ...
## $ item5 : num 0.2 -0.862 0.963 -0.165 -1.938 ...
## $ item6 : num -0.4 -1.159 -0.774 0.326 -0.384 ...
## $ item7 : num 0.898 -2.426 -1.227 0.101 -0.456 ...
## $ item8 : num 1.024 -2.666 -0.78 0.682 -0.673 ...
## $ item9 : num -0.2627 -1.9893 0.0983 0.4273 -1.1475 ...
## $ item10: num 0.663 -1.08 0.506 0.936 -0.419 ...
data <- data %>%
select(item1:item10)
Now let’s view a sample correlation matrix (i.e., our input matrix):
# Bartlett's test of sphericity
cortest.bartlett(cor(data), n = nrow(data))
## $chisq
## [1] 2237.533
##
## $p.value
## [1] 0
##
## $df
## [1] 45
# Significant, can press ahead!
# KMO
KMO(data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data)
## Overall MSA = 0.87
## MSA for each item =
## item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## 0.90 0.88 0.92 0.88 0.84 0.94 0.82 0.81 0.95 0.94
# .87 = Merotorious!
cormat <- round(cor(data),2)
cormat
## item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## item1 1.00 0.59 0.49 0.48 0.60 0.17 0.30 0.32 0.26 0.20
## item2 0.59 1.00 0.53 0.51 0.66 0.20 0.33 0.30 0.29 0.19
## item3 0.49 0.53 1.00 0.49 0.55 0.15 0.25 0.24 0.25 0.15
## item4 0.48 0.51 0.49 1.00 0.65 0.23 0.29 0.32 0.28 0.25
## item5 0.60 0.66 0.55 0.65 1.00 0.21 0.30 0.29 0.27 0.21
## item6 0.17 0.20 0.15 0.23 0.21 1.00 0.54 0.57 0.41 0.44
## item7 0.30 0.33 0.25 0.29 0.30 0.54 1.00 0.83 0.61 0.58
## item8 0.32 0.30 0.24 0.32 0.29 0.57 0.83 1.00 0.61 0.59
## item9 0.26 0.29 0.25 0.28 0.27 0.41 0.61 0.61 1.00 0.44
## item10 0.20 0.19 0.15 0.25 0.21 0.44 0.58 0.59 0.44 1.00
View the eigenvectors and eigenvalues:
eigenmatrix <- (eigen(cor(data)))
eigenmatrix$values
## [1] 4.5228379 1.9899606 0.6147201 0.5644917 0.5420785 0.5074611 0.4115899
## [8] 0.3950373 0.2909343 0.1608885
# Scree plot
plot(eigenmatrix$values, # Vector of eigenvalues
type = 'b', # Display points and lines
pch = 16, #
main = "Scree plot",
ylab = "Eigenvalues")
# Minimum average partial
vss(data)
##
## Very Simple Structure
## Call: vss(x = data)
## Although the VSS complexity 1 shows 7 factors, it is probably more reasonable to think about 2 factors
## VSS complexity 2 achieves a maximimum of 0.92 with 2 factors
##
## The Velicer MAP achieves a minimum of 0.03 with 2 factors
## BIC achieves a minimum of -118.82 with 2 factors
## Sample Size adjusted BIC achieves a minimum of -41.19 with 3 factors
##
## Statistics by number of factors
## vss1 vss2 map dof chisq prob sqresid fit RMSEA BIC SABIC complex
## 1 0.77 0.00 0.106 35 8.8e+02 3.6e-161 6.0 0.77 0.231 662 773 1.0
## 2 0.78 0.92 0.034 26 4.0e+01 3.9e-02 2.1 0.92 0.035 -119 -36 1.1
## 3 0.78 0.91 0.058 18 1.2e+01 8.6e-01 1.9 0.93 0.000 -98 -41 1.2
## 4 0.63 0.90 0.104 11 6.1e+00 8.7e-01 1.4 0.95 0.000 -61 -26 1.3
## 5 0.69 0.87 0.149 5 2.4e+00 7.9e-01 1.6 0.94 0.000 -28 -12 1.3
## 6 0.71 0.89 0.252 0 1.2e-01 NA 1.4 0.95 NA NA NA 1.4
## 7 0.79 0.89 0.397 -4 1.1e-07 NA 1.5 0.94 NA NA NA 1.3
## 8 0.79 0.89 0.455 -7 1.6e-08 NA 1.5 0.94 NA NA NA 1.4
## eChisq SRMR eCRMS eBIC
## 1 1.0e+03 1.6e-01 0.179 797
## 2 1.4e+01 1.8e-02 0.024 -145
## 3 3.4e+00 9.1e-03 0.014 -107
## 4 1.6e+00 6.3e-03 0.013 -66
## 5 7.5e-01 4.3e-03 0.013 -30
## 6 2.9e-02 8.4e-04 NA NA
## 7 2.2e-08 7.4e-07 NA NA
## 8 2.8e-09 2.6e-07 NA NA
# Parallel analysis
fa.parallel(data, n.iter = 500)
## Parallel analysis suggests that the number of factors = 2 and the number of components = 2
Running Factor Analysis and comparing two solutions:
# Estimate different types of models
data_fa2_norotate <- fa(data, nfactors = 2, fm = "ml", rotate = "none")
data_fa2_orthog <- fa(data, nfactors = 2, fml = "ml", rotate = "varimax")
data_fa2_oblique <- fa(data, nfactors = 2, fm = "ml", rotate = "oblimin")
# View solutions
data_fa2_norotate
## Factor Analysis using method = ml
## Call: fa(r = data, nfactors = 2, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## item1 0.52 0.49 0.51 0.49 2.0
## item2 0.54 0.55 0.60 0.40 2.0
## item3 0.45 0.49 0.44 0.56 2.0
## item4 0.52 0.49 0.51 0.49 2.0
## item5 0.56 0.65 0.74 0.26 2.0
## item6 0.59 -0.20 0.38 0.62 1.2
## item7 0.86 -0.27 0.81 0.19 1.2
## item8 0.88 -0.30 0.85 0.15 1.2
## item9 0.66 -0.13 0.45 0.55 1.1
## item10 0.61 -0.21 0.42 0.58 1.2
##
## ML1 ML2
## SS loadings 3.99 1.72
## Proportion Var 0.40 0.17
## Cumulative Var 0.40 0.57
## Proportion Explained 0.70 0.30
## Cumulative Proportion 0.70 1.00
##
## Mean item complexity = 1.6
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 5.03 with Chi Square = 2237.53
## df of the model are 26 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 450 with the empirical chi square 15.05 with prob < 0.96
## The total n.obs was 450 with Likelihood Chi Square = 38.26 with prob < 0.057
##
## Tucker Lewis Index of factoring reliability = 0.99
## RMSEA index = 0.032 and the 90 % confidence intervals are 0 0.053
## BIC = -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.97 0.91
## Multiple R square of scores with factors 0.94 0.83
## Minimum correlation of possible factor scores 0.87 0.67
data_fa2_orthog
## Factor Analysis using method = minres
## Call: fa(r = data, nfactors = 2, rotate = "varimax", fml = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## MR1 MR2 h2 u2 com
## item1 0.19 0.69 0.52 0.48 1.1
## item2 0.19 0.75 0.60 0.40 1.1
## item3 0.14 0.66 0.45 0.55 1.1
## item4 0.22 0.67 0.50 0.50 1.2
## item5 0.16 0.84 0.73 0.27 1.1
## item6 0.61 0.12 0.39 0.61 1.1
## item7 0.87 0.19 0.80 0.20 1.1
## item8 0.90 0.18 0.84 0.16 1.1
## item9 0.64 0.22 0.45 0.55 1.2
## item10 0.65 0.12 0.43 0.57 1.1
##
## MR1 MR2
## SS loadings 2.93 2.79
## Proportion Var 0.29 0.28
## Cumulative Var 0.29 0.57
## Proportion Explained 0.51 0.49
## Cumulative Proportion 0.51 1.00
##
## Mean item complexity = 1.1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 5.03 with Chi Square = 2237.53
## df of the model are 26 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.02
##
## The harmonic n.obs is 450 with the empirical chi square 13.72 with prob < 0.98
## The total n.obs was 450 with Likelihood Chi Square = 40.02 with prob < 0.039
##
## Tucker Lewis Index of factoring reliability = 0.989
## RMSEA index = 0.035 and the 90 % confidence intervals are 0.008 0.055
## BIC = -118.82
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## MR1 MR2
## Correlation of (regression) scores with factors 0.95 0.93
## Multiple R square of scores with factors 0.90 0.86
## Minimum correlation of possible factor scores 0.81 0.71
data_fa2_oblique
## Factor Analysis using method = ml
## Call: fa(r = data, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## item1 0.04 0.70 0.51 0.49 1
## item2 0.02 0.77 0.60 0.40 1
## item3 -0.01 0.67 0.44 0.56 1
## item4 0.05 0.69 0.51 0.49 1
## item5 -0.05 0.88 0.74 0.26 1
## item6 0.62 -0.01 0.38 0.62 1
## item7 0.90 0.00 0.81 0.19 1
## item8 0.93 -0.02 0.85 0.15 1
## item9 0.63 0.09 0.45 0.55 1
## item10 0.65 -0.02 0.42 0.58 1
##
## ML1 ML2
## SS loadings 2.91 2.80
## Proportion Var 0.29 0.28
## Cumulative Var 0.29 0.57
## Proportion Explained 0.51 0.49
## Cumulative Proportion 0.51 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 5.03 with Chi Square = 2237.53
## df of the model are 26 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 450 with the empirical chi square 15.05 with prob < 0.96
## The total n.obs was 450 with Likelihood Chi Square = 38.26 with prob < 0.057
##
## Tucker Lewis Index of factoring reliability = 0.99
## RMSEA index = 0.032 and the 90 % confidence intervals are 0 0.053
## BIC = -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.96 0.94
## Multiple R square of scores with factors 0.93 0.88
## Minimum correlation of possible factor scores 0.85 0.76
# Pull out loadings
data_fa2_norotate$loadings
##
## Loadings:
## ML1 ML2
## item1 0.518 0.493
## item2 0.540 0.554
## item3 0.448 0.493
## item4 0.522 0.490
## item5 0.557 0.653
## item6 0.585 -0.196
## item7 0.856 -0.275
## item8 0.876 -0.296
## item9 0.658 -0.130
## item10 0.611 -0.212
##
## ML1 ML2
## SS loadings 3.992 1.724
## Proportion Var 0.399 0.172
## Cumulative Var 0.399 0.572
data_fa2_oblique$loadings
##
## Loadings:
## ML1 ML2
## item1 0.696
## item2 0.767
## item3 0.671
## item4 0.694
## item5 0.878
## item6 0.621
## item7 0.899
## item8 0.931
## item9 0.629
## item10 0.653
##
## ML1 ML2
## SS loadings 2.890 2.782
## Proportion Var 0.289 0.278
## Cumulative Var 0.289 0.567
data_fa2_orthog$loadings
##
## Loadings:
## MR1 MR2
## item1 0.189 0.693
## item2 0.187 0.754
## item3 0.139 0.659
## item4 0.217 0.671
## item5 0.159 0.841
## item6 0.615 0.117
## item7 0.871 0.193
## item8 0.900 0.185
## item9 0.635 0.220
## item10 0.648 0.117
##
## MR1 MR2
## SS loadings 2.934 2.788
## Proportion Var 0.293 0.279
## Cumulative Var 0.293 0.572
# Factor correlations
data_fa2_orthog$Phi
## NULL
round(data_fa2_oblique$Phi, 2)
## ML1 ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
# Evaluate fit
data_fa2_oblique$TLI
## [1] 0.9902915
data_fa2_oblique$RMSEA
## RMSEA lower upper confidence
## 0.03229554 0.00000000 0.05308517 0.90000000
Now let’s check the matrices to test our understanding and how it aligns with path analysis:
# Original correlation matrix
cormat
## item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## item1 1.00 0.59 0.49 0.48 0.60 0.17 0.30 0.32 0.26 0.20
## item2 0.59 1.00 0.53 0.51 0.66 0.20 0.33 0.30 0.29 0.19
## item3 0.49 0.53 1.00 0.49 0.55 0.15 0.25 0.24 0.25 0.15
## item4 0.48 0.51 0.49 1.00 0.65 0.23 0.29 0.32 0.28 0.25
## item5 0.60 0.66 0.55 0.65 1.00 0.21 0.30 0.29 0.27 0.21
## item6 0.17 0.20 0.15 0.23 0.21 1.00 0.54 0.57 0.41 0.44
## item7 0.30 0.33 0.25 0.29 0.30 0.54 1.00 0.83 0.61 0.58
## item8 0.32 0.30 0.24 0.32 0.29 0.57 0.83 1.00 0.61 0.59
## item9 0.26 0.29 0.25 0.28 0.27 0.41 0.61 0.61 1.00 0.44
## item10 0.20 0.19 0.15 0.25 0.21 0.44 0.58 0.59 0.44 1.00
# Model implied correlation matrix
cormat_mi <- data_fa2_oblique$model
cormat_mi
## item1 item2 item3 item4 item5 item6 item7
## item1 0.5113029 0.5529590 0.4749796 0.5120434 0.6107637 0.2061736 0.3075875
## item2 0.5529590 0.5988017 0.5150122 0.5536137 0.6630098 0.2075996 0.3104137
## item3 0.4749796 0.5150122 0.4434873 0.4754212 0.5715646 0.1656290 0.2482770
## item4 0.5120434 0.5536137 0.4754212 0.5128119 0.6111896 0.2093074 0.3121339
## item5 0.6107637 0.6630098 0.5715646 0.6111896 0.7373725 0.1980643 0.2976813
## item6 0.2061736 0.2075996 0.1656290 0.2093074 0.1980643 0.3811605 0.5550989
## item7 0.3075875 0.3104137 0.2482770 0.3121339 0.2976813 0.5550989 0.8085466
## item8 0.3069754 0.3089328 0.2463286 0.3116720 0.2943813 0.5707308 0.8311456
## item9 0.2765451 0.2836412 0.2308950 0.2797924 0.2819269 0.4107589 0.5992008
## item10 0.2112177 0.2122369 0.1689371 0.2145097 0.2015247 0.3990484 0.5810649
## item8 item9 item10
## item1 0.3069754 0.2765451 0.2112177
## item2 0.3089328 0.2836412 0.2122369
## item3 0.2463286 0.2308950 0.1689371
## item4 0.3116720 0.2797924 0.2145097
## item5 0.2943813 0.2819269 0.2015247
## item6 0.5707308 0.4107589 0.3990484
## item7 0.8311456 0.5992008 0.5810649
## item8 0.8545913 0.6148135 0.5975354
## item9 0.6148135 0.4500477 0.4294061
## item10 0.5975354 0.4294061 0.4178295
# View residual matrix
resid_mat <- data_fa2_oblique$residual
resid_mat[, 1:5]
## item1 item2 item3 item4 item5
## item1 0.488697122 0.039041631 0.018793455 -0.034743982 -0.0134435813
## item2 0.039041631 0.401198255 0.014820150 -0.045680868 -0.0035044350
## item3 0.018793455 0.014820150 0.556512704 0.010012379 -0.0221607595
## item4 -0.034743982 -0.045680868 0.010012379 0.487188053 0.0379581084
## item5 -0.013443581 -0.003504435 -0.022160760 0.037958108 0.2626274816
## item6 -0.032785532 -0.003801771 -0.012899079 0.017019085 0.0141324055
## item7 -0.002680117 0.021490028 0.004201652 -0.026052198 -0.0018602582
## item8 0.011978736 -0.012411743 -0.003817461 0.010742451 -0.0009987794
## item9 -0.015407845 0.007696575 0.023523778 0.003187512 -0.0078090903
## item10 -0.012069929 -0.022777454 -0.014603102 0.039677786 0.0061489231