The goal for today is to calculate a PCA model. First, let’s load our packages:

# Load tidyverse and lavaan packages
library(tidyverse)
library(psych)
library(GPArotation)

# CFA testing
library(lavaan)

Let’s load in some data:

data <- read.csv("https://uoepsy.github.io/data/conduct_probs.csv")
str(data)
## 'data.frame':    450 obs. of  11 variables:
##  $ X     : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ item1 : num  -0.101 -2.57 0.398 -0.81 -2.011 ...
##  $ item2 : num  0.758 -2.079 -1.202 -0.747 -2.028 ...
##  $ item3 : num  0.725 -1.283 1.104 -0.993 -1.338 ...
##  $ item4 : num  0.0336 -0.9411 1.1564 0.5656 -0.6479 ...
##  $ item5 : num  0.2 -0.862 0.963 -0.165 -1.938 ...
##  $ item6 : num  -0.4 -1.159 -0.774 0.326 -0.384 ...
##  $ item7 : num  0.898 -2.426 -1.227 0.101 -0.456 ...
##  $ item8 : num  1.024 -2.666 -0.78 0.682 -0.673 ...
##  $ item9 : num  -0.2627 -1.9893 0.0983 0.4273 -1.1475 ...
##  $ item10: num  0.663 -1.08 0.506 0.936 -0.419 ...
data <- data %>%
  select(item1:item10)

Let’s run a Tucker’s congruence coefficient to see the difference between the two solutions:

# Estimate different types of models
fa_orthog <- fa(data, nfactors = 2, fml = "ml", rotate = "varimax")
fa_oblique <- fa(data, nfactors = 2, fm = "ml", rotate = "oblimin")

fa_oblique
## Factor Analysis using method =  ml
## Call: fa(r = data, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
##          ML1   ML2   h2   u2 com
## item1   0.04  0.70 0.51 0.49   1
## item2   0.02  0.77 0.60 0.40   1
## item3  -0.01  0.67 0.44 0.56   1
## item4   0.05  0.69 0.51 0.49   1
## item5  -0.05  0.88 0.74 0.26   1
## item6   0.62 -0.01 0.38 0.62   1
## item7   0.90  0.00 0.81 0.19   1
## item8   0.93 -0.02 0.85 0.15   1
## item9   0.63  0.09 0.45 0.55   1
## item10  0.65 -0.02 0.42 0.58   1
## 
##                        ML1  ML2
## SS loadings           2.91 2.80
## Proportion Var        0.29 0.28
## Cumulative Var        0.29 0.57
## Proportion Explained  0.51 0.49
## Cumulative Proportion 0.51 1.00
## 
##  With factor correlations of 
##      ML1  ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
## 
## Mean item complexity =  1
## Test of the hypothesis that 2 factors are sufficient.
## 
## df null model =  45  with the objective function =  5.03 with Chi Square =  2237.53
## df of  the model are 26  and the objective function was  0.09 
## 
## The root mean square of the residuals (RMSR) is  0.02 
## The df corrected root mean square of the residuals is  0.03 
## 
## The harmonic n.obs is  450 with the empirical chi square  15.05  with prob <  0.96 
## The total n.obs was  450  with Likelihood Chi Square =  38.26  with prob <  0.057 
## 
## Tucker Lewis Index of factoring reliability =  0.99
## RMSEA index =  0.032  and the 90 % confidence intervals are  0 0.053
## BIC =  -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy             
##                                                    ML1  ML2
## Correlation of (regression) scores with factors   0.96 0.94
## Multiple R square of scores with factors          0.93 0.88
## Minimum correlation of possible factor scores     0.85 0.76
fa.congruence(fa_orthog, fa_oblique)
##      ML1  ML2
## MR1 0.97 0.24
## MR2 0.23 0.98

Reliability testing:

# Cronbach's Alpha
data %>% 
  select(item1:item5) %>%
  alpha()
## Number of categories should be increased  in order to count frequencies.
## 
## Reliability analysis   
## Call: alpha(x = .)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N  ase    mean   sd median_r
##       0.86      0.86    0.84      0.55 6.2 0.01 -0.0088 0.85     0.54
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.84  0.86  0.88
## Duhachek  0.84  0.86  0.88
## 
##  Reliability if an item is dropped:
##       raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## item1      0.84      0.84    0.80      0.56 5.2    0.013 0.0054  0.54
## item2      0.82      0.83    0.79      0.54 4.7    0.014 0.0049  0.52
## item3      0.85      0.85    0.81      0.58 5.5    0.012 0.0055  0.59
## item4      0.84      0.84    0.80      0.57 5.3    0.012 0.0034  0.57
## item5      0.81      0.81    0.76      0.51 4.2    0.015 0.0018  0.50
## 
##  Item statistics 
##         n raw.r std.r r.cor r.drop     mean  sd
## item1 450  0.79  0.79  0.71   0.66  0.01209 1.1
## item2 450  0.82  0.82  0.76   0.71 -0.02844 1.1
## item3 450  0.76  0.76  0.67   0.62 -0.03781 1.1
## item4 450  0.77  0.78  0.70   0.64  0.01058 1.0
## item5 450  0.86  0.86  0.83   0.77 -0.00034 1.0
# McDonald's Omega
data %>% 
  select(item1:item5) %>%
  omega()
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect.  Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected.  Examine the results carefully
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful

## Omega 
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip, 
##     digits = digits, title = title, sl = sl, labels = labels, 
##     plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option, 
##     covar = covar)
## Alpha:                 0.86 
## G.6:                   0.84 
## Omega Hierarchical:    0.84 
## Omega H asymptotic:    0.95 
## Omega Total            0.88 
## 
## Schmid Leiman Factor loadings greater than  0.2 
##          g  F1*  F2*  F3*   h2   u2   p2
## item1 0.74                0.54 0.46 1.01
## item2 0.80                0.66 0.34 0.98
## item3 0.69                0.48 0.52 0.99
## item4 0.65      0.39      0.62 0.38 0.69
## item5 0.79      0.39      0.78 0.22 0.80
## 
## With Sums of squares  of:
##    g  F1*  F2*  F3* 
## 2.72 0.00 0.31 0.07 
## 
## general/max  8.83   max/min =   Inf
## mean percent general =  0.89    with sd =  0.14 and cv of  0.16 
## Explained Common Variance of the general factor =  0.88 
## 
## The degrees of freedom are -2  and the fit is  0 
## The number of observations was  450  with Chi Square =  0  with prob <  NA
## The root mean square of the residuals is  0 
## The df corrected root mean square of the residuals is  NA
## 
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 5  and the fit is  0.08 
## The number of observations was  450  with Chi Square =  33.66  with prob <  2.8e-06
## The root mean square of the residuals is  0.05 
## The df corrected root mean square of the residuals is  0.06 
## 
## RMSEA index =  0.113  and the 10 % confidence intervals are  0.079 0.151
## BIC =  3.11 
## 
## Measures of factor score adequacy             
##                                                  g F1*   F2*   F3*
## Correlation of scores with factors            0.92   0  0.63  0.41
## Multiple R square of scores with factors      0.85   0  0.39  0.17
## Minimum correlation of factor score estimates 0.70  -1 -0.22 -0.66
## 
##  Total, General and Subset omega for each subset
##                                                  g F1*  F2* F3*
## Omega total for total scores and subscales    0.88  NA 0.82 0.8
## Omega general for total scores and subscales  0.84  NA 0.63 0.8
## Omega group for total scores and subscales    0.04  NA 0.19 0.0
# Split-half reliability
data %>%
  select(item1:item5) %>%
  splitHalf()
## Split half reliabilities  
## Call: splitHalf(r = .)
## 
## Maximum split half reliability (lambda 4) =  0.85
## Guttman lambda 6                          =  0.84
## Average split half reliability            =  0.82
## Guttman lambda 3 (alpha)                  =  0.86
## Guttman lambda 2                          =  0.86
## Minimum split half reliability  (beta)    =  0.8
## Average interitem r =  0.55  with median =  0.54

Let’s score them up:

# Extract sum scores
data <- data %>%
  rowwise() %>%
  mutate(score1 = mean(item1:item5),
         score2 = mean(item6:item10))

# Extract factor scores
scores <- fa_oblique$scores

# View the data
head(data)
## # A tibble: 6 × 12
## # Rowwise: 
##     item1  item2   item3   item4   item5  item6  item7  item8   item9 item10
##     <dbl>  <dbl>   <dbl>   <dbl>   <dbl>  <dbl>  <dbl>  <dbl>   <dbl>  <dbl>
## 1 -0.101   0.758  0.725   0.0336  0.200  -0.400  0.898  1.02  -0.263   0.663
## 2 -2.57   -2.08  -1.28   -0.941  -0.862  -1.16  -2.43  -2.67  -1.99   -1.08 
## 3  0.398  -1.20   1.10    1.16    0.963  -0.774 -1.23  -0.780  0.0983  0.506
## 4 -0.810  -0.747 -0.993   0.566  -0.165   0.326  0.101  0.682  0.427   0.936
## 5 -2.01   -2.03  -1.34   -0.648  -1.94   -0.384 -0.456 -0.673 -1.15   -0.419
## 6 -0.0819 -0.252  0.0840 -0.499   0.0173  2.21  -0.676 -0.476  1.30    0.368
## # ℹ 2 more variables: score1 <dbl>, score2 <dbl>

Construct validity / criterion validity:

cor(data$score1,
    data$score2)
## [1] 0.216305

Optional

Let’s add some outcome data:

data <- data %>%
  rowwise() %>%
  mutate(
    out1 = rnorm(n = 1),
    out2 = rnorm(n = 1),
    out3 = rnorm(n = 1),
    out4 = rnorm(n = 1)
  )

And now fit a SEM model:

sem_spec <- '
# Factors
predictor =~ item6 + item7 + item8 + item9 + item10
mediator =~ item1 + item2 + item3 + item 4 + item5
outcome =~ out1 + out2 + out3 + out4

# Paths
outcome ~ predictor + mediator
'

sem_model <- cfa(model = sem_spec, data = data)
## Warning in lavaan::lavaan(model = sem_spec, data = data, model.type = "cfa", : lavaan WARNING:
##     the optimizer warns that a solution has NOT been found!
summary(sem_model,
        fit.measures = T,
        std = T
        )
## Warning in lav_object_summary(object = object, header = header, fit.measures = fit.measures, : lavaan WARNING: fit measures not available if model did not converge
## lavaan 0.6.16 did NOT end normally after 1070 iterations
## ** WARNING ** Estimates below are most likely unreliable
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                        31
## 
##   Number of observations                           450
## 
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Latent Variables:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   predictor =~                                                          
##     item6             1.000                               0.583    0.617
##     item7             1.664       NA                      0.970    0.901
##     item8             1.575       NA                      0.918    0.921
##     item9             1.184       NA                      0.690    0.669
##     item10            1.115       NA                      0.650    0.646
##   mediator =~                                                           
##     item1             1.000                               0.777    0.720
##     item2             1.093       NA                      0.849    0.777
##     item3             0.903       NA                      0.702    0.665
##     item4             0.945       NA                      0.734    0.717
##     item5             1.114       NA                      0.866    0.850
##   outcome =~                                                            
##     out1              1.000                               0.003    0.003
##     out2             -1.514       NA                     -0.005   -0.005
##     out3              3.412       NA                      0.011    0.011
##     out4           2519.289       NA                      8.126    8.462
## 
## Regressions:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   outcome ~                                                             
##     predictor         0.000       NA                      0.002    0.002
##     mediator         -0.000       NA                     -0.016   -0.016
## 
## Covariances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##   predictor ~~                                                          
##     mediator          0.195       NA                      0.430    0.430
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)   Std.lv  Std.all
##    .item6             0.551       NA                      0.551    0.619
##    .item7             0.217       NA                      0.217    0.188
##    .item8             0.150       NA                      0.150    0.151
##    .item9             0.588       NA                      0.588    0.552
##    .item10            0.589       NA                      0.589    0.583
##    .item1             0.562       NA                      0.562    0.482
##    .item2             0.475       NA                      0.475    0.397
##    .item3             0.619       NA                      0.619    0.557
##    .item4             0.511       NA                      0.511    0.486
##    .item5             0.289       NA                      0.289    0.278
##    .out1              1.053       NA                      1.053    1.000
##    .out2              0.998       NA                      0.998    1.000
##    .out3              1.043       NA                      1.043    1.000
##    .out4            -65.105       NA                    -65.105  -70.613
##     predictor         0.340       NA                      1.000    1.000
##     mediator          0.604       NA                      1.000    1.000
##    .outcome           0.000       NA                      1.000    1.000