The goal for today is to calculate a PCA model. First, let’s load our packages:
# Load tidyverse and lavaan packages
library(tidyverse)
library(psych)
library(GPArotation)
# CFA testing
library(lavaan)
Let’s load in some data:
data <- read.csv("https://uoepsy.github.io/data/conduct_probs.csv")
str(data)
## 'data.frame': 450 obs. of 11 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ item1 : num -0.101 -2.57 0.398 -0.81 -2.011 ...
## $ item2 : num 0.758 -2.079 -1.202 -0.747 -2.028 ...
## $ item3 : num 0.725 -1.283 1.104 -0.993 -1.338 ...
## $ item4 : num 0.0336 -0.9411 1.1564 0.5656 -0.6479 ...
## $ item5 : num 0.2 -0.862 0.963 -0.165 -1.938 ...
## $ item6 : num -0.4 -1.159 -0.774 0.326 -0.384 ...
## $ item7 : num 0.898 -2.426 -1.227 0.101 -0.456 ...
## $ item8 : num 1.024 -2.666 -0.78 0.682 -0.673 ...
## $ item9 : num -0.2627 -1.9893 0.0983 0.4273 -1.1475 ...
## $ item10: num 0.663 -1.08 0.506 0.936 -0.419 ...
data <- data %>%
select(item1:item10)
Let’s run a Tucker’s congruence coefficient to see the difference between the two solutions:
# Estimate different types of models
fa_orthog <- fa(data, nfactors = 2, fml = "ml", rotate = "varimax")
fa_oblique <- fa(data, nfactors = 2, fm = "ml", rotate = "oblimin")
fa_oblique
## Factor Analysis using method = ml
## Call: fa(r = data, nfactors = 2, rotate = "oblimin", fm = "ml")
## Standardized loadings (pattern matrix) based upon correlation matrix
## ML1 ML2 h2 u2 com
## item1 0.04 0.70 0.51 0.49 1
## item2 0.02 0.77 0.60 0.40 1
## item3 -0.01 0.67 0.44 0.56 1
## item4 0.05 0.69 0.51 0.49 1
## item5 -0.05 0.88 0.74 0.26 1
## item6 0.62 -0.01 0.38 0.62 1
## item7 0.90 0.00 0.81 0.19 1
## item8 0.93 -0.02 0.85 0.15 1
## item9 0.63 0.09 0.45 0.55 1
## item10 0.65 -0.02 0.42 0.58 1
##
## ML1 ML2
## SS loadings 2.91 2.80
## Proportion Var 0.29 0.28
## Cumulative Var 0.29 0.57
## Proportion Explained 0.51 0.49
## Cumulative Proportion 0.51 1.00
##
## With factor correlations of
## ML1 ML2
## ML1 1.00 0.43
## ML2 0.43 1.00
##
## Mean item complexity = 1
## Test of the hypothesis that 2 factors are sufficient.
##
## df null model = 45 with the objective function = 5.03 with Chi Square = 2237.53
## df of the model are 26 and the objective function was 0.09
##
## The root mean square of the residuals (RMSR) is 0.02
## The df corrected root mean square of the residuals is 0.03
##
## The harmonic n.obs is 450 with the empirical chi square 15.05 with prob < 0.96
## The total n.obs was 450 with Likelihood Chi Square = 38.26 with prob < 0.057
##
## Tucker Lewis Index of factoring reliability = 0.99
## RMSEA index = 0.032 and the 90 % confidence intervals are 0 0.053
## BIC = -120.58
## Fit based upon off diagonal values = 1
## Measures of factor score adequacy
## ML1 ML2
## Correlation of (regression) scores with factors 0.96 0.94
## Multiple R square of scores with factors 0.93 0.88
## Minimum correlation of possible factor scores 0.85 0.76
fa.congruence(fa_orthog, fa_oblique)
## ML1 ML2
## MR1 0.97 0.24
## MR2 0.23 0.98
Reliability testing:
# Cronbach's Alpha
data %>%
select(item1:item5) %>%
alpha()
## Number of categories should be increased in order to count frequencies.
##
## Reliability analysis
## Call: alpha(x = .)
##
## raw_alpha std.alpha G6(smc) average_r S/N ase mean sd median_r
## 0.86 0.86 0.84 0.55 6.2 0.01 -0.0088 0.85 0.54
##
## 95% confidence boundaries
## lower alpha upper
## Feldt 0.84 0.86 0.88
## Duhachek 0.84 0.86 0.88
##
## Reliability if an item is dropped:
## raw_alpha std.alpha G6(smc) average_r S/N alpha se var.r med.r
## item1 0.84 0.84 0.80 0.56 5.2 0.013 0.0054 0.54
## item2 0.82 0.83 0.79 0.54 4.7 0.014 0.0049 0.52
## item3 0.85 0.85 0.81 0.58 5.5 0.012 0.0055 0.59
## item4 0.84 0.84 0.80 0.57 5.3 0.012 0.0034 0.57
## item5 0.81 0.81 0.76 0.51 4.2 0.015 0.0018 0.50
##
## Item statistics
## n raw.r std.r r.cor r.drop mean sd
## item1 450 0.79 0.79 0.71 0.66 0.01209 1.1
## item2 450 0.82 0.82 0.76 0.71 -0.02844 1.1
## item3 450 0.76 0.76 0.67 0.62 -0.03781 1.1
## item4 450 0.77 0.78 0.70 0.64 0.01058 1.0
## item5 450 0.86 0.86 0.83 0.77 -0.00034 1.0
# McDonald's Omega
data %>%
select(item1:item5) %>%
omega()
## Warning in fa.stats(r = r, f = f, phi = phi, n.obs = n.obs, np.obs = np.obs, :
## The estimated weights for the factor scores are probably incorrect. Try a
## different factor score estimation method.
## Warning in fac(r = r, nfactors = nfactors, n.obs = n.obs, rotate = rotate, : An
## ultra-Heywood case was detected. Examine the results carefully
## Warning in cov2cor(t(w) %*% r %*% w): diag(.) had 0 or NA entries; non-finite
## result is doubtful
## Omega
## Call: omegah(m = m, nfactors = nfactors, fm = fm, key = key, flip = flip,
## digits = digits, title = title, sl = sl, labels = labels,
## plot = plot, n.obs = n.obs, rotate = rotate, Phi = Phi, option = option,
## covar = covar)
## Alpha: 0.86
## G.6: 0.84
## Omega Hierarchical: 0.84
## Omega H asymptotic: 0.95
## Omega Total 0.88
##
## Schmid Leiman Factor loadings greater than 0.2
## g F1* F2* F3* h2 u2 p2
## item1 0.74 0.54 0.46 1.01
## item2 0.80 0.66 0.34 0.98
## item3 0.69 0.48 0.52 0.99
## item4 0.65 0.39 0.62 0.38 0.69
## item5 0.79 0.39 0.78 0.22 0.80
##
## With Sums of squares of:
## g F1* F2* F3*
## 2.72 0.00 0.31 0.07
##
## general/max 8.83 max/min = Inf
## mean percent general = 0.89 with sd = 0.14 and cv of 0.16
## Explained Common Variance of the general factor = 0.88
##
## The degrees of freedom are -2 and the fit is 0
## The number of observations was 450 with Chi Square = 0 with prob < NA
## The root mean square of the residuals is 0
## The df corrected root mean square of the residuals is NA
##
## Compare this with the adequacy of just a general factor and no group factors
## The degrees of freedom for just the general factor are 5 and the fit is 0.08
## The number of observations was 450 with Chi Square = 33.66 with prob < 2.8e-06
## The root mean square of the residuals is 0.05
## The df corrected root mean square of the residuals is 0.06
##
## RMSEA index = 0.113 and the 10 % confidence intervals are 0.079 0.151
## BIC = 3.11
##
## Measures of factor score adequacy
## g F1* F2* F3*
## Correlation of scores with factors 0.92 0 0.63 0.41
## Multiple R square of scores with factors 0.85 0 0.39 0.17
## Minimum correlation of factor score estimates 0.70 -1 -0.22 -0.66
##
## Total, General and Subset omega for each subset
## g F1* F2* F3*
## Omega total for total scores and subscales 0.88 NA 0.82 0.8
## Omega general for total scores and subscales 0.84 NA 0.63 0.8
## Omega group for total scores and subscales 0.04 NA 0.19 0.0
# Split-half reliability
data %>%
select(item1:item5) %>%
splitHalf()
## Split half reliabilities
## Call: splitHalf(r = .)
##
## Maximum split half reliability (lambda 4) = 0.85
## Guttman lambda 6 = 0.84
## Average split half reliability = 0.82
## Guttman lambda 3 (alpha) = 0.86
## Guttman lambda 2 = 0.86
## Minimum split half reliability (beta) = 0.8
## Average interitem r = 0.55 with median = 0.54
Let’s score them up:
# Extract sum scores
data <- data %>%
rowwise() %>%
mutate(score1 = mean(item1:item5),
score2 = mean(item6:item10))
# Extract factor scores
scores <- fa_oblique$scores
# View the data
head(data)
## # A tibble: 6 × 12
## # Rowwise:
## item1 item2 item3 item4 item5 item6 item7 item8 item9 item10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 -0.101 0.758 0.725 0.0336 0.200 -0.400 0.898 1.02 -0.263 0.663
## 2 -2.57 -2.08 -1.28 -0.941 -0.862 -1.16 -2.43 -2.67 -1.99 -1.08
## 3 0.398 -1.20 1.10 1.16 0.963 -0.774 -1.23 -0.780 0.0983 0.506
## 4 -0.810 -0.747 -0.993 0.566 -0.165 0.326 0.101 0.682 0.427 0.936
## 5 -2.01 -2.03 -1.34 -0.648 -1.94 -0.384 -0.456 -0.673 -1.15 -0.419
## 6 -0.0819 -0.252 0.0840 -0.499 0.0173 2.21 -0.676 -0.476 1.30 0.368
## # ℹ 2 more variables: score1 <dbl>, score2 <dbl>
Construct validity / criterion validity:
cor(data$score1,
data$score2)
## [1] 0.216305
Let’s add some outcome data:
data <- data %>%
rowwise() %>%
mutate(
out1 = rnorm(n = 1),
out2 = rnorm(n = 1),
out3 = rnorm(n = 1),
out4 = rnorm(n = 1)
)
And now fit a SEM model:
sem_spec <- '
# Factors
predictor =~ item6 + item7 + item8 + item9 + item10
mediator =~ item1 + item2 + item3 + item 4 + item5
outcome =~ out1 + out2 + out3 + out4
# Paths
outcome ~ predictor + mediator
'
sem_model <- cfa(model = sem_spec, data = data)
## Warning in lavaan::lavaan(model = sem_spec, data = data, model.type = "cfa", : lavaan WARNING:
## the optimizer warns that a solution has NOT been found!
summary(sem_model,
fit.measures = T,
std = T
)
## Warning in lav_object_summary(object = object, header = header, fit.measures = fit.measures, : lavaan WARNING: fit measures not available if model did not converge
## lavaan 0.6.16 did NOT end normally after 1070 iterations
## ** WARNING ** Estimates below are most likely unreliable
##
## Estimator ML
## Optimization method NLMINB
## Number of model parameters 31
##
## Number of observations 450
##
##
## Parameter Estimates:
##
## Standard errors Standard
## Information Expected
## Information saturated (h1) model Structured
##
## Latent Variables:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## predictor =~
## item6 1.000 0.583 0.617
## item7 1.664 NA 0.970 0.901
## item8 1.575 NA 0.918 0.921
## item9 1.184 NA 0.690 0.669
## item10 1.115 NA 0.650 0.646
## mediator =~
## item1 1.000 0.777 0.720
## item2 1.093 NA 0.849 0.777
## item3 0.903 NA 0.702 0.665
## item4 0.945 NA 0.734 0.717
## item5 1.114 NA 0.866 0.850
## outcome =~
## out1 1.000 0.003 0.003
## out2 -1.514 NA -0.005 -0.005
## out3 3.412 NA 0.011 0.011
## out4 2519.289 NA 8.126 8.462
##
## Regressions:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## outcome ~
## predictor 0.000 NA 0.002 0.002
## mediator -0.000 NA -0.016 -0.016
##
## Covariances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## predictor ~~
## mediator 0.195 NA 0.430 0.430
##
## Variances:
## Estimate Std.Err z-value P(>|z|) Std.lv Std.all
## .item6 0.551 NA 0.551 0.619
## .item7 0.217 NA 0.217 0.188
## .item8 0.150 NA 0.150 0.151
## .item9 0.588 NA 0.588 0.552
## .item10 0.589 NA 0.589 0.583
## .item1 0.562 NA 0.562 0.482
## .item2 0.475 NA 0.475 0.397
## .item3 0.619 NA 0.619 0.557
## .item4 0.511 NA 0.511 0.486
## .item5 0.289 NA 0.289 0.278
## .out1 1.053 NA 1.053 1.000
## .out2 0.998 NA 0.998 1.000
## .out3 1.043 NA 1.043 1.000
## .out4 -65.105 NA -65.105 -70.613
## predictor 0.340 NA 1.000 1.000
## mediator 0.604 NA 1.000 1.000
## .outcome 0.000 NA 1.000 1.000