The goal for today is to calculate a PCA model. First, let’s load our packages:

# Load tidyverse and lavaan packages
library(tidyverse)
library(psych)
library(GPArotation)

Let’s load in some data:

data <- read.csv("https://uoepsy.github.io/data/conduct_probs.csv")
str(data)
## 'data.frame':    450 obs. of  11 variables:
##  $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item1 : num  -0.101 -2.57 0.398 -0.81 -2.011 ...
##  $ item2 : num  0.758 -2.079 -1.202 -0.747 -2.028 ...
##  $ item3 : num  0.725 -1.283 1.104 -0.993 -1.338 ...
##  $ item4 : num  0.0336 -0.9411 1.1564 0.5656 -0.6479 ...
##  $ item5 : num  0.2 -0.862 0.963 -0.165 -1.938 ...
##  $ item6 : num  -0.4 -1.159 -0.774 0.326 -0.384 ...
##  $ item7 : num  0.898 -2.426 -1.227 0.101 -0.456 ...
##  $ item8 : num  1.024 -2.666 -0.78 0.682 -0.673 ...
##  $ item9 : num  -0.2627 -1.9893 0.0983 0.4273 -1.1475 ...
##  $ item10: num  0.663 -1.08 0.506 0.936 -0.419 ...
data <- data %>%
  select(item1:item10)

Now let’s view a sample correlation matrix (i.e., our input matrix):

# Bartlett's test of sphericity
cortest.bartlett(cor(data), n = nrow(data))
## $chisq
## [1] 2237.533
## 
## $p.value
## [1] 0
## 
## $df
## [1] 45
# Significant, can press ahead!

# KMO
KMO(data)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = data)
## Overall MSA =  0.87
## MSA for each item = 
##  item1  item2  item3  item4  item5  item6  item7  item8  item9 item10 
##   0.90   0.88   0.92   0.88   0.84   0.94   0.82   0.81   0.95   0.94
# .87 = Merotorious!

cormat <- round(cor(data),2)
cormat
##        item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## item1   1.00  0.59  0.49  0.48  0.60  0.17  0.30  0.32  0.26   0.20
## item2   0.59  1.00  0.53  0.51  0.66  0.20  0.33  0.30  0.29   0.19
## item3   0.49  0.53  1.00  0.49  0.55  0.15  0.25  0.24  0.25   0.15
## item4   0.48  0.51  0.49  1.00  0.65  0.23  0.29  0.32  0.28   0.25
## item5   0.60  0.66  0.55  0.65  1.00  0.21  0.30  0.29  0.27   0.21
## item6   0.17  0.20  0.15  0.23  0.21  1.00  0.54  0.57  0.41   0.44
## item7   0.30  0.33  0.25  0.29  0.30  0.54  1.00  0.83  0.61   0.58
## item8   0.32  0.30  0.24  0.32  0.29  0.57  0.83  1.00  0.61   0.59
## item9   0.26  0.29  0.25  0.28  0.27  0.41  0.61  0.61  1.00   0.44
## item10  0.20  0.19  0.15  0.25  0.21  0.44  0.58  0.59  0.44   1.00

View the eigenvectors and eigenvalues:

eigenmatrix <- (eigen(cor(data)))

eigenmatrix$values
##  [1] 4.5228379 1.9899606 0.6147201 0.5644917 0.5420785 0.5074611 0.4115899
##  [8] 0.3950373 0.2909343 0.1608885

Determining the number of components to retain

# Scree plot
plot(eigenmatrix$values, # Vector of eigenvalues
     type = 'b', # Display points and lines
     pch = 16, # 
     main = "Scree plot", 
     ylab = "Eigenvalues")

# Minimum average partial
vss(data)

## 
## Very Simple Structure
## Call: vss(x = data)
## Although the VSS complexity 1 shows  7  factors, it is probably more reasonable to think about  2  factors
## VSS complexity 2 achieves a maximimum of 0.92  with  2  factors
## 
## The Velicer MAP achieves a minimum of 0.03  with  2  factors 
## BIC achieves a minimum of  -118.82  with  2  factors
## Sample Size adjusted BIC achieves a minimum of  -41.19  with  3  factors
## 
## Statistics by number of factors 
##   vss1 vss2   map dof   chisq     prob sqresid  fit RMSEA  BIC SABIC complex
## 1 0.77 0.00 0.106  35 8.8e+02 3.6e-161     6.0 0.77 0.231  662   773     1.0
## 2 0.78 0.92 0.034  26 4.0e+01  3.9e-02     2.1 0.92 0.035 -119   -36     1.1
## 3 0.78 0.91 0.058  18 1.2e+01  8.6e-01     1.9 0.93 0.000  -98   -41     1.2
## 4 0.63 0.90 0.104  11 6.1e+00  8.7e-01     1.4 0.95 0.000  -61   -26     1.3
## 5 0.69 0.87 0.149   5 2.4e+00  7.9e-01     1.6 0.94 0.000  -28   -12     1.3
## 6 0.71 0.89 0.252   0 1.2e-01       NA     1.4 0.95    NA   NA    NA     1.4
## 7 0.79 0.89 0.397  -4 1.1e-07       NA     1.5 0.94    NA   NA    NA     1.3
## 8 0.79 0.89 0.455  -7 1.6e-08       NA     1.5 0.94    NA   NA    NA     1.4
##    eChisq    SRMR eCRMS eBIC
## 1 1.0e+03 1.6e-01 0.179  797
## 2 1.4e+01 1.8e-02 0.024 -145
## 3 3.4e+00 9.1e-03 0.014 -107
## 4 1.6e+00 6.3e-03 0.013  -66
## 5 7.5e-01 4.3e-03 0.013  -30
## 6 2.9e-02 8.4e-04    NA   NA
## 7 2.2e-08 7.4e-07    NA   NA
## 8 2.8e-09 2.6e-07    NA   NA
# Parallel analysis
fa.parallel(data, n.iter = 500)

## Parallel analysis suggests that the number of factors =  2  and the number of components =  2

Running Factor Analysis and comparing two solutions:

# Estimate different types of models
data_fa2_norotate <- fa(data, nfactors = 2, fm = "ml", rotate = "none")
data_fa2_orthog <- fa(data, nfactors = 2, fml = "ml", rotate = "varimax")
data_fa2_oblique <- fa(data, nfactors = 2, fm = "ml", rotate = "oblimin")

# View solutions
data_fa2_norotate
## Factor Analysis using method =  ml
## Call: fa(r = data, nfactors = 2, rotate = "none", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         ML1   ML2   h2   u2 com
## item1  0.52  0.49 0.51 0.49 2.0
## item2  0.54  0.55 0.60 0.40 2.0
## item3  0.45  0.49 0.44 0.56 2.0
## item4  0.52  0.49 0.51 0.49 2.0
## item5  0.56  0.65 0.74 0.26 2.0
## item6  0.59 -0.20 0.38 0.62 1.2
## item7  0.86 -0.27 0.81 0.19 1.2
## item8  0.88 -0.30 0.85 0.15 1.2
## item9  0.66 -0.13 0.45 0.55 1.1
## item10 0.61 -0.21 0.42 0.58 1.2
## 
##                        ML1  ML2
## SS loadings           3.99 1.72
## Proportion Var        0.40 0.17
## Cumulative Var        0.40 0.57
## Proportion Explained  0.70 0.30
## Cumulative Proportion 0.70 1.00
## 
## Mean item complexity =  1.6
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.03 with Chi Square =  2237.53
## df of  the model are 26  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic n.obs is  450 with the empirical chi square  15.05  with prob <  0.96 
## The total n.obs was  450  with Likelihood Chi Square =  38.26  with prob <  0.057 
## 
## Tucker Lewis Index of factoring reliability =  0.99
## RMSEA index =  0.032  and the 90 % confidence intervals are  0 0.053
## BIC =  -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.97 0.91
## Multiple R square of scores with factors          0.94 0.83
## Minimum correlation of possible factor scores     0.87 0.67
data_fa2_orthog
## Factor Analysis using method =  minres
## Call: fa(r = data, nfactors = 2, rotate = "varimax", fml = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##         MR1  MR2   h2   u2 com
## item1  0.19 0.69 0.52 0.48 1.1
## item2  0.19 0.75 0.60 0.40 1.1
## item3  0.14 0.66 0.45 0.55 1.1
## item4  0.22 0.67 0.50 0.50 1.2
## item5  0.16 0.84 0.73 0.27 1.1
## item6  0.61 0.12 0.39 0.61 1.1
## item7  0.87 0.19 0.80 0.20 1.1
## item8  0.90 0.18 0.84 0.16 1.1
## item9  0.64 0.22 0.45 0.55 1.2
## item10 0.65 0.12 0.43 0.57 1.1
## 
##                        MR1  MR2
## SS loadings           2.93 2.79
## Proportion Var        0.29 0.28
## Cumulative Var        0.29 0.57
## Proportion Explained  0.51 0.49
## Cumulative Proportion 0.51 1.00
## 
## Mean item complexity =  1.1
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.03 with Chi Square =  2237.53
## df of  the model are 26  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.02 
## 
## The harmonic n.obs is  450 with the empirical chi square  13.72  with prob <  0.98 
## The total n.obs was  450  with Likelihood Chi Square =  40.02  with prob <  0.039 
## 
## Tucker Lewis Index of factoring reliability =  0.989
## RMSEA index =  0.035  and the 90 % confidence intervals are  0.008 0.055
## BIC =  -118.82
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    MR1  MR2
## Correlation of (regression) scores with factors   0.95 0.93
## Multiple R square of scores with factors          0.90 0.86
## Minimum correlation of possible factor scores     0.81 0.71
data_fa2_oblique
## Factor Analysis using method =  ml
## Call: fa(r = data, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          ML1   ML2   h2   u2 com
## item1   0.04  0.70 0.51 0.49   1
## item2   0.02  0.77 0.60 0.40   1
## item3  -0.01  0.67 0.44 0.56   1
## item4   0.05  0.69 0.51 0.49   1
## item5  -0.05  0.88 0.74 0.26   1
## item6   0.62 -0.01 0.38 0.62   1
## item7   0.90  0.00 0.81 0.19   1
## item8   0.93 -0.02 0.85 0.15   1
## item9   0.63  0.09 0.45 0.55   1
## item10  0.65 -0.02 0.42 0.58   1
## 
##                        ML1  ML2
## SS loadings           2.91 2.80
## Proportion Var        0.29 0.28
## Cumulative Var        0.29 0.57
## Proportion Explained  0.51 0.49
## Cumulative Proportion 0.51 1.00
## 
##  With factor correlations of 
##      ML1  ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.03 with Chi Square =  2237.53
## df of  the model are 26  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic n.obs is  450 with the empirical chi square  15.05  with prob <  0.96 
## The total n.obs was  450  with Likelihood Chi Square =  38.26  with prob <  0.057 
## 
## Tucker Lewis Index of factoring reliability =  0.99
## RMSEA index =  0.032  and the 90 % confidence intervals are  0 0.053
## BIC =  -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.96 0.94
## Multiple R square of scores with factors          0.93 0.88
## Minimum correlation of possible factor scores     0.85 0.76
# Pull out loadings
data_fa2_norotate$loadings
## 
## Loadings:
##        ML1    ML2   
## item1   0.518  0.493
## item2   0.540  0.554
## item3   0.448  0.493
## item4   0.522  0.490
## item5   0.557  0.653
## item6   0.585 -0.196
## item7   0.856 -0.275
## item8   0.876 -0.296
## item9   0.658 -0.130
## item10  0.611 -0.212
## 
##                  ML1   ML2
## SS loadings    3.992 1.724
## Proportion Var 0.399 0.172
## Cumulative Var 0.399 0.572
data_fa2_oblique$loadings
## 
## Loadings:
##        ML1    ML2   
## item1          0.696
## item2          0.767
## item3          0.671
## item4          0.694
## item5          0.878
## item6   0.621       
## item7   0.899       
## item8   0.931       
## item9   0.629       
## item10  0.653       
## 
##                  ML1   ML2
## SS loadings    2.890 2.782
## Proportion Var 0.289 0.278
## Cumulative Var 0.289 0.567
data_fa2_orthog$loadings
## 
## Loadings:
##        MR1   MR2  
## item1  0.189 0.693
## item2  0.187 0.754
## item3  0.139 0.659
## item4  0.217 0.671
## item5  0.159 0.841
## item6  0.615 0.117
## item7  0.871 0.193
## item8  0.900 0.185
## item9  0.635 0.220
## item10 0.648 0.117
## 
##                  MR1   MR2
## SS loadings    2.934 2.788
## Proportion Var 0.293 0.279
## Cumulative Var 0.293 0.572
# Factor correlations
data_fa2_orthog$Phi
## NULL
round(data_fa2_oblique$Phi, 2)
##      ML1  ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
# Evaluate fit
data_fa2_oblique$TLI
## [1] 0.9902915
data_fa2_oblique$RMSEA
##      RMSEA      lower      upper confidence 
## 0.03229554 0.00000000 0.05308517 0.90000000

Now let’s check the matrices to test our understanding and how it aligns with path analysis:

# Original correlation matrix
cormat
##        item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## item1   1.00  0.59  0.49  0.48  0.60  0.17  0.30  0.32  0.26   0.20
## item2   0.59  1.00  0.53  0.51  0.66  0.20  0.33  0.30  0.29   0.19
## item3   0.49  0.53  1.00  0.49  0.55  0.15  0.25  0.24  0.25   0.15
## item4   0.48  0.51  0.49  1.00  0.65  0.23  0.29  0.32  0.28   0.25
## item5   0.60  0.66  0.55  0.65  1.00  0.21  0.30  0.29  0.27   0.21
## item6   0.17  0.20  0.15  0.23  0.21  1.00  0.54  0.57  0.41   0.44
## item7   0.30  0.33  0.25  0.29  0.30  0.54  1.00  0.83  0.61   0.58
## item8   0.32  0.30  0.24  0.32  0.29  0.57  0.83  1.00  0.61   0.59
## item9   0.26  0.29  0.25  0.28  0.27  0.41  0.61  0.61  1.00   0.44
## item10  0.20  0.19  0.15  0.25  0.21  0.44  0.58  0.59  0.44   1.00
# Model implied correlation matrix
cormat_mi <- data_fa2_oblique$model
cormat_mi
##            item1     item2     item3     item4     item5     item6     item7
## item1  0.5113029 0.5529590 0.4749796 0.5120434 0.6107637 0.2061736 0.3075875
## item2  0.5529590 0.5988017 0.5150122 0.5536137 0.6630098 0.2075996 0.3104137
## item3  0.4749796 0.5150122 0.4434873 0.4754212 0.5715646 0.1656290 0.2482770
## item4  0.5120434 0.5536137 0.4754212 0.5128119 0.6111896 0.2093074 0.3121339
## item5  0.6107637 0.6630098 0.5715646 0.6111896 0.7373725 0.1980643 0.2976813
## item6  0.2061736 0.2075996 0.1656290 0.2093074 0.1980643 0.3811605 0.5550989
## item7  0.3075875 0.3104137 0.2482770 0.3121339 0.2976813 0.5550989 0.8085466
## item8  0.3069754 0.3089328 0.2463286 0.3116720 0.2943813 0.5707308 0.8311456
## item9  0.2765451 0.2836412 0.2308950 0.2797924 0.2819269 0.4107589 0.5992008
## item10 0.2112177 0.2122369 0.1689371 0.2145097 0.2015247 0.3990484 0.5810649
##            item8     item9    item10
## item1  0.3069754 0.2765451 0.2112177
## item2  0.3089328 0.2836412 0.2122369
## item3  0.2463286 0.2308950 0.1689371
## item4  0.3116720 0.2797924 0.2145097
## item5  0.2943813 0.2819269 0.2015247
## item6  0.5707308 0.4107589 0.3990484
## item7  0.8311456 0.5992008 0.5810649
## item8  0.8545913 0.6148135 0.5975354
## item9  0.6148135 0.4500477 0.4294061
## item10 0.5975354 0.4294061 0.4178295
# View residual matrix
resid_mat <- data_fa2_oblique$residual
resid_mat[, 1:5]
##               item1        item2        item3        item4         item5
## item1   0.488697122  0.039041631  0.018793455 -0.034743982 -0.0134435813
## item2   0.039041631  0.401198255  0.014820150 -0.045680868 -0.0035044350
## item3   0.018793455  0.014820150  0.556512704  0.010012379 -0.0221607595
## item4  -0.034743982 -0.045680868  0.010012379  0.487188053  0.0379581084
## item5  -0.013443581 -0.003504435 -0.022160760  0.037958108  0.2626274816
## item6  -0.032785532 -0.003801771 -0.012899079  0.017019085  0.0141324055
## item7  -0.002680117  0.021490028  0.004201652 -0.026052198 -0.0018602582
## item8   0.011978736 -0.012411743 -0.003817461  0.010742451 -0.0009987794
## item9  -0.015407845  0.007696575  0.023523778  0.003187512 -0.0078090903
## item10 -0.012069929 -0.022777454 -0.014603102  0.039677786  0.0061489231