**Additional Resources:**

**The Example:**

The example is based on responses by 117 university students to a 50 item version of the IPIP.

```
# Required packages.
require(psych);
require(foreign);
# Import data from SPSS data file.
personality <- foreign::read.spss("spss\\personality.sav",
to.data.frame = TRUE)
# Factor analysis.
items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5",
"ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11",
"ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17",
"ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23",
"ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29",
"ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35",
"ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41",
"ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47",
"ipip48", "ipip49", "ipip50") ;
# Descriptive Statistics.
itemDescriptiveStatistics <- sapply(personality[items],
function(x) c(mean=mean(x), sd=sd(x), n = length(x)));
cbind(attr(personality, "variable.labels")[items],
round(t(itemDescriptiveStatistics), 2) );
# Scree plot.
psych::VSS.scree(cor(personality[items]));
# Some other indicators of the number of factors.
psych::VSS(cor(personality[items]), 10,
n.obs = nrow(personality), rotate = "promax");
# Communalities
itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
round(cbind(itemCommunalities), 2);
# List items with low communalities.
itemsWithLowCommunalities <- names(itemCommunalities[
itemCommunalities < .25]);
cat("Items with low communalities (< .25)\n");
problematicItemText <- attr(personality,
"variable.labels")[itemsWithLowCommunalities ];
problematicItemCommunalities <- round(itemCommunalities[
itemsWithLowCommunalities],3);
data.frame(itemText = problematicItemText,
communality = problematicItemCommunalities);
# Variance explained by each factor before rotation.
# (see Proportion Var)
factanal(personality[items], factors = 5, rotation = "none");
# Variance explained by each factor after rotatoin.
# (see Proportion Var)
factanal(personality[items], factors = 5, rotation = "promax");
# Loadings prior to rotation.
fitNoRotation <- factanal(personality[items],
factors = 5, rotation = "none");
print(fitNoRotation$loadings, cutoff = .30, sort = TRUE);
# Loadings after rotation.
fitAfterRotation <- factanal(personality[items],
factors = 5, rotation = "promax");
print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE);
# Correlations between factors
# This assumes use of a correlated rotation method such as promax
factorCorrelationsRegression <- cor(factanal(
personality[items], factors = 5,
rotation = "promax", scores = "regression")$scores);
round(factorCorrelationsRegression,2);
```

And this is what the output looks like. If you copy and paste the output into a text editor like Notepad, it will look prettier.

```
> # Required packages.
> require(psych);
> require(foreign);
>
> # Import data from SPSS data file.
> personality <- foreign::read.spss("spss\\personality.sav",
+ to.data.frame = TRUE)
>
> # Factor analysis.
> items <- c("ipip1", "ipip2", "ipip3", "ipip4", "ipip5",
+ "ipip6", "ipip7", "ipip8", "ipip9", "ipip10", "ipip11",
+ "ipip12", "ipip13", "ipip14", "ipip15", "ipip16", "ipip17",
+ "ipip18", "ipip19", "ipip20", "ipip21", "ipip22", "ipip23",
+ "ipip24", "ipip25", "ipip26", "ipip27", "ipip28", "ipip29",
+ "ipip30", "ipip31", "ipip32", "ipip33", "ipip34", "ipip35",
+ "ipip36", "ipip37", "ipip38", "ipip39", "ipip40", "ipip41",
+ "ipip42", "ipip43", "ipip44", "ipip45", "ipip46", "ipip47",
+ "ipip48", "ipip49", "ipip50") ;
>
> # Descriptive Statistics.
> itemDescriptiveStatistics <- sapply(personality[items],
+ function(x) c(mean=mean(x), sd=sd(x), n = length(x)));
> cbind(attr(personality, "variable.labels")[items],
+ round(t(itemDescriptiveStatistics), 2) );
mean sd n
ipip1 "Q1. I Am the life of the party " "2.84" "1.2" "117"
ipip2 "Q2. I Feel little concern for others" "1.58" "0.9" "117"
ipip3 "Q3. I Am always prepared" "3.25" "1.13" "117"
ipip4 "Q4. I Get stressed out easily" "3.35" "1.32" "117"
ipip5 "Q5. I Have a rich vocabulary" "3.61" "1.06" "117"
ipip6 "Q6. I Dont talk a lot" "2.38" "1.2" "117"
ipip7 "Q7. I Am interested in people" "4.4" "0.78" "117"
ipip8 "Q8. I Leave my belongings around" "2.69" "1.42" "117"
ipip9 "Q9. I Am relaxed most of the time" "3.27" "1.08" "117"
ipip10 "Q10. I Have difficulty understanding abstract ideas" "2.08" "1.03" "117"
ipip11 "Q11. I Feel comfortable around people" "3.91" "1.02" "117"
ipip12 "Q12. I Insult people" "1.94" "1.12" "117"
ipip13 "Q13. I Pay attention to details" "3.91" "0.96" "117"
ipip14 "Q14. I Worry about things" "3.7" "1.04" "117"
ipip15 "Q15. I Have a vivid imagination" "3.94" "1.07" "117"
ipip16 "Q16. I Keep in the background" "2.74" "1.25" "117"
ipip17 "Q17. I Sympathize with others feelings" "4.38" "0.65" "117"
ipip18 "Q18. I Make a mess of things" "2.42" "1.04" "117"
ipip19 "Q19. I Seldom feel blue" "2.92" "1.16" "117"
ipip20 "Q20. I Am not interested in abstract ideas" "2.01" "1.05" "117"
ipip21 "Q21. I Start conversations" "3.65" "1.1" "117"
ipip22 "Q22. I Am not interested in other peoples problems" "1.68" "0.8" "117"
ipip23 "Q23. I Get chores done right away" "2.82" "1.23" "117"
ipip24 "Q24. I Am easily disturbed" "3.09" "1.22" "117"
ipip25 "Q25. I Have excellent ideas" "3.75" "0.94" "117"
ipip26 "Q26. I Have little to say" "2.08" "1" "117"
ipip27 "Q27. I Have a soft heart" "4.15" "0.88" "117"
ipip28 "Q28. I Often forget to put things back in their proper place" "2.56" "1.28" "117"
ipip29 "Q29. I Get upset easily" "2.87" "1.31" "117"
ipip30 "Q30. I Do not have a good imagination" "1.9" "0.99" "117"
ipip31 "Q31. I Talk to a lot of different people at parties" "3.23" "1.3" "117"
ipip32 "Q32. I Am not really interested in others" "1.7" "0.75" "117"
ipip33 "Q33. I Like order" "3.67" "1.07" "117"
ipip34 "Q34. I Change my mood a lot" "3.15" "1.21" "117"
ipip35 "Q35. I Am quick to understand things" "3.85" "0.94" "117"
ipip36 "Q36. I Dont like to draw attention to myself" "3.06" "1.22" "117"
ipip37 "Q37. I Take time out for others" "4.12" "0.83" "117"
ipip38 "Q38. I Shirk my duties" "2.32" "1.06" "117"
ipip39 "Q39. I Have frequent mood swings" "2.88" "1.31" "117"
ipip40 "Q40. I Use difficult words" "3.18" "1.16" "117"
ipip41 "Q41. I Dont mind being the center of attention" "3.25" "1.29" "117"
ipip42 "Q42. I Feel others emotions" "4.21" "0.64" "117"
ipip43 "Q43. I Follow a schedule" "3.44" "1.13" "117"
ipip44 "Q44. I Get irritated easily" "2.94" "1.21" "117"
ipip45 "Q45. I Spend time reflecting on things" "4.15" "0.91" "117"
ipip46 "Q46. I Am quiet around strangers" "3.21" "1.33" "117"
ipip47 "Q47. I Make people feel at ease" "3.73" "0.89" "117"
ipip48 "Q48. I Am exacting in my work" "3.63" "0.92" "117"
ipip49 "Q49. I Often feel blue" "2.36" "1.27" "117"
ipip50 "Q50. I Am full of ideas" "3.81" "0.96" "117"
>
> # Scree plot.
> psych::VSS.scree(cor(personality[items]));
>
> # Some other indicators of the number of factors.
> psych::VSS(cor(personality[items]), 10,
+ n.obs = nrow(personality), rotate = "promax");
Very Simple Structure
VSS complexity 1 achieves a maximimum of 0.64 with 6 factors
VSS complexity 2 achieves a maximimum of 0.74 with 4 factors
The Velicer MAP criterion achieves a minimum of 0.04 with 6 factors
Velicer MAP
[1] 0.04 0.04 0.03 0.02 0.02 0.02 0.02 0.02 0.02 0.02
Very Simple Structure Complexity 1
[1] 0.43 0.48 0.57 0.64 0.64 0.64 0.60 0.61 0.64 0.61
Very Simple Structure Complexity 2
[1] 0.00 0.55 0.67 0.74 0.74 0.73 0.71 0.73 0.71 0.71
>
> # Communalities
> itemCommunalities <- 1 - dataForScreePlot$uniquenesses;
> round(cbind(itemCommunalities), 2);
itemCommunalities
ipip1 0.72
ipip2 0.19
ipip3 0.40
ipip4 0.77
ipip5 0.59
ipip6 0.70
ipip7 0.58
ipip8 0.62
ipip9 0.49
ipip10 0.46
ipip11 0.56
ipip12 0.27
ipip13 0.46
ipip14 0.75
ipip15 0.75
ipip16 0.75
ipip17 0.45
ipip18 0.55
ipip19 0.60
ipip20 0.47
ipip21 0.68
ipip22 0.43
ipip23 0.59
ipip24 0.41
ipip25 0.68
ipip26 0.77
ipip27 0.53
ipip28 0.54
ipip29 0.75
ipip30 0.80
ipip31 0.73
ipip32 0.41
ipip33 0.44
ipip34 0.92
ipip35 0.53
ipip36 0.63
ipip37 0.25
ipip38 0.34
ipip39 0.72
ipip40 1.00
ipip41 0.80
ipip42 0.64
ipip43 0.49
ipip44 0.65
ipip45 0.37
ipip46 0.52
ipip47 0.47
ipip48 0.44
ipip49 0.82
ipip50 0.75
>
> # List items with low communalities.
> itemsWithLowCommunalities <- names(itemCommunalities[
+ itemCommunalities < .25]);
> cat("Items with low communalities (< .25)\n");
Items with low communalities (< .25)
> problematicItemText <- attr(personality,
+ "variable.labels")[itemsWithLowCommunalities ];
> problematicItemCommunalities <- round(itemCommunalities[
+ itemsWithLowCommunalities],3);
> data.frame(itemText = problematicItemText,
+ communality = problematicItemCommunalities);
itemText communality
ipip2 Q2. I Feel little concern for others 0.187
ipip37 Q37. I Take time out for others 0.249
>
>
> # Variance explained by each factor before rotation.
> # (see Proportion Var)
> factanal(personality[items], factors = 5, rotation = "none");
Call:
factanal(x = personality[items], factors = 5, rotation = "none")
Uniquenesses:
ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25
0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329
ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50
0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264
Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 -0.679 0.403 -0.166
ipip2 -0.293 0.120 -0.131
ipip3 -0.173 0.564
ipip4 0.517 0.251 0.510 0.203
ipip5 -0.212 0.266 -0.151 0.156 -0.136
ipip6 0.634 -0.419
ipip7 -0.448 0.362 0.123 0.415
ipip8 0.146 0.193 -0.632
ipip9 -0.404 -0.268 -0.298 -0.292
ipip10 0.215 -0.370 0.427 0.308
ipip11 -0.641 -0.101 0.342
ipip12 0.323 -0.294
ipip13 0.255 -0.295 0.445
ipip14 0.538 0.163 0.380 0.294 0.147
ipip15 -0.149 0.646 -0.159 -0.156
ipip16 0.762 -0.352 0.151
ipip17 0.386 0.546
ipip18 0.181 0.162 0.284 -0.583
ipip19 -0.446 -0.206 -0.347 0.103
ipip20 0.124 -0.523 0.348 0.117 0.265
ipip21 -0.646 0.476
ipip22 0.291 -0.414 -0.352
ipip23 -0.286 -0.145 -0.172 0.616
ipip24 0.467 0.166 0.339 -0.158
ipip25 -0.396 0.553 -0.416 -0.182
ipip26 0.662 -0.254
ipip27 -0.176 0.385 0.214 0.401
ipip28 0.104 -0.623 0.178
ipip29 0.605 0.206 0.505 0.204
ipip30 0.278 -0.649 0.231 0.138
ipip31 -0.661 -0.192 0.472
ipip32 0.344 -0.388 -0.145 -0.331
ipip33 0.619 -0.103
ipip34 0.425 0.421 0.500 -0.139
ipip35 -0.425 0.306 -0.229 0.186
ipip36 0.538 -0.102 -0.314 0.243
ipip37 -0.307 0.230 0.264
ipip38 0.216 0.108 -0.444
ipip39 0.376 0.391 0.469 -0.228
ipip40 -0.160 0.221 0.174 -0.208
ipip41 -0.546 0.437 -0.224
ipip42 -0.231 0.553 0.191 0.440
ipip43 0.676 -0.158
ipip44 0.498 0.362 0.477 -0.129
ipip45 0.178 0.495 0.143
ipip46 0.557 -0.393 0.162
ipip47 -0.500 0.408
ipip48 -0.142 0.509
ipip49 0.619 0.304 0.318 -0.160
ipip50 -0.347 0.681 -0.318 -0.101 -0.200
Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 8.281 4.755 4.553 3.899 2.162
Proportion Var 0.166 0.095 0.091 0.078 0.043
Cumulative Var 0.166 0.261 0.352 0.430 0.473
Test of the hypothesis that 5 factors are sufficient.
The chi square statistic is 1389.91 on 985 degrees of freedom.
The p-value is 2.3e-16
>
> # Variance explained by each factor after rotatoin.
> # (see Proportion Var)
> factanal(personality[items], factors = 5, rotation = "promax");
Call:
factanal(x = personality[items], factors = 5, rotation = "promax")
Uniquenesses:
ipip1 ipip2 ipip3 ipip4 ipip5 ipip6 ipip7 ipip8 ipip9 ipip10 ipip11 ipip12 ipip13 ipip14 ipip15 ipip16 ipip17 ipip18 ipip19 ipip20 ipip21 ipip22 ipip23 ipip24 ipip25
0.345 0.873 0.644 0.367 0.819 0.414 0.480 0.529 0.590 0.537 0.458 0.789 0.645 0.431 0.504 0.266 0.535 0.519 0.619 0.506 0.347 0.615 0.484 0.615 0.329
ipip26 ipip27 ipip28 ipip29 ipip30 ipip31 ipip32 ipip33 ipip34 ipip35 ipip36 ipip37 ipip38 ipip39 ipip40 ipip41 ipip42 ipip43 ipip44 ipip45 ipip46 ipip47 ipip48 ipip49 ipip50
0.480 0.613 0.556 0.293 0.421 0.298 0.592 0.600 0.372 0.637 0.541 0.775 0.735 0.434 0.846 0.457 0.408 0.508 0.370 0.699 0.499 0.580 0.715 0.397 0.264
Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 0.811
ipip2 0.165 -0.167 -0.249
ipip3 0.562 0.139
ipip4 0.770 -0.192 0.141
ipip5 0.184 0.344
ipip6 -0.730 0.119
ipip7 0.177 0.636
ipip8 0.164 -0.672 0.148
ipip9 -0.601 -0.200 -0.136
ipip10 0.157 -0.675 0.173
ipip11 0.655 -0.112 -0.127 0.144
ipip12 0.411 0.224 -0.298
ipip13 -0.181 0.464 0.255
ipip14 -0.148 0.636 0.170 -0.289 0.209
ipip15 0.108 -0.176 0.646 0.160
ipip16 -0.831
ipip17 -0.150 -0.102 0.725
ipip18 0.201 -0.648
ipip19 -0.560 0.158
ipip20 0.106 -0.726
ipip21 0.736 -0.137 0.226
ipip22 -0.137 -0.566
ipip23 0.109 -0.147 0.674
ipip24 0.498 -0.239
ipip25 -0.181 0.765
ipip26 -0.630 -0.133
ipip27 0.152 -0.117 0.602
ipip28 -0.680 0.138
ipip29 0.808 -0.195
ipip30 0.143 -0.705 -0.166
ipip31 0.831 -0.161
ipip32 -0.122 -0.560
ipip33 0.131 0.644
ipip34 0.111 0.795 -0.115 0.102
ipip35 -0.176 0.195 0.341 0.218
ipip36 -0.681 -0.194
ipip37 0.134 0.409
ipip38 -0.113 -0.486
ipip39 0.162 0.761 0.152
ipip40 0.237 0.195 0.190 0.243
ipip41 0.772 0.153
ipip42 0.127 0.126 0.722
ipip43 0.126 0.719
ipip44 0.807
ipip45 -0.178 0.329 0.340 0.148
ipip46 -0.733
ipip47 0.189 -0.296 -0.156 0.490
ipip48 0.514 0.102
ipip49 -0.131 0.711
ipip50 0.831
Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 6.182 5.548 4.188 4.140 3.473
Proportion Var 0.124 0.111 0.084 0.083 0.069
Cumulative Var 0.124 0.235 0.318 0.401 0.471
Test of the hypothesis that 5 factors are sufficient.
The chi square statistic is 1389.91 on 985 degrees of freedom.
The p-value is 2.3e-16
>
> # Loadings prior to rotation.
> fitNoRotation <- factanal(personality[items],
+ factors = 5, rotation = "none");
> print(fitNoRotation$loadings, cutoff = .30, sort = TRUE);
Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 -0.679 0.403
ipip4 0.517 0.510
ipip6 0.634 -0.419
ipip11 -0.641 0.342
ipip14 0.538 0.380
ipip16 0.762 -0.352
ipip21 -0.646 0.476
ipip26 0.662
ipip29 0.605 0.505
ipip31 -0.661 0.472
ipip36 0.538 -0.314
ipip41 -0.546 0.437
ipip46 0.557 -0.393
ipip49 0.619 0.304 0.318
ipip15 0.646
ipip20 -0.523 0.348
ipip25 -0.396 0.553 -0.416
ipip30 -0.649
ipip42 0.553 0.440
ipip50 -0.347 0.681 -0.318
ipip3 0.564
ipip8 -0.632
ipip18 -0.583
ipip23 0.616
ipip28 -0.623
ipip33 0.619
ipip43 0.676
ipip48 0.509
ipip17 0.386 0.546
ipip2
ipip5
ipip7 -0.448 0.362 0.415
ipip9 -0.404
ipip10 -0.370 0.427 0.308
ipip12 0.323
ipip13 0.445
ipip19 -0.446 -0.347
ipip22 -0.414 -0.352
ipip24 0.467 0.339
ipip27 0.385 0.401
ipip32 0.344 -0.388 -0.331
ipip34 0.425 0.421 0.500
ipip35 -0.425 0.306
ipip37 -0.307
ipip38 -0.444
ipip39 0.376 0.391 0.469
ipip40
ipip44 0.498 0.362 0.477
ipip45 0.495
ipip47 -0.500 0.408
Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 8.281 4.755 4.553 3.899 2.162
Proportion Var 0.166 0.095 0.091 0.078 0.043
Cumulative Var 0.166 0.261 0.352 0.430 0.473
>
> # Loadings after rotation.
> fitAfterRotation <- factanal(personality[items],
+ factors = 5, rotation = "promax");
> print(fitAfterRotation$loadings, cutoff = .30, sort = TRUE);
Loadings:
Factor1 Factor2 Factor3 Factor4 Factor5
ipip1 0.811
ipip6 -0.730
ipip11 0.655
ipip16 -0.831
ipip21 0.736
ipip26 -0.630
ipip31 0.831
ipip36 -0.681
ipip41 0.772
ipip46 -0.733
ipip4 0.770
ipip9 -0.601
ipip14 0.636
ipip19 -0.560
ipip29 0.808
ipip34 0.795
ipip39 0.761
ipip44 0.807
ipip49 0.711
ipip3 0.562
ipip8 -0.672
ipip18 -0.648
ipip23 0.674
ipip28 -0.680
ipip33 0.644
ipip43 0.719
ipip48 0.514
ipip10 -0.675
ipip15 0.646
ipip20 -0.726
ipip25 0.765
ipip30 -0.705
ipip50 0.831
ipip7 0.636
ipip17 0.725
ipip22 -0.566
ipip27 0.602
ipip32 -0.560
ipip42 0.722
ipip2
ipip5 0.344
ipip12 0.411
ipip13 0.464
ipip24 0.498
ipip35 0.341
ipip37 0.409
ipip38 -0.486
ipip40
ipip45 0.329 0.340
ipip47 0.490
Factor1 Factor2 Factor3 Factor4 Factor5
SS loadings 6.182 5.548 4.188 4.140 3.473
Proportion Var 0.124 0.111 0.084 0.083 0.069
Cumulative Var 0.124 0.235 0.318 0.401 0.471
>
> # Correlations between factors
> # This assumes use of a correlated rotation method such as promax
> factorCorrelationsRegression <- cor(factanal(
+ personality[items], factors = 5,
+ rotation = "promax", scores = "regression")$scores);
> round(factorCorrelationsRegression,2);
Factor1 Factor2 Factor3 Factor4 Factor5
Factor1 1.00 0.26 0.04 -0.01 -0.22
Factor2 0.26 1.00 0.13 0.04 -0.02
Factor3 0.04 0.13 1.00 -0.10 -0.11
Factor4 -0.01 0.04 -0.10 1.00 -0.24
Factor5 -0.22 -0.02 -0.11 -0.24 1.00
```

The Scree Plot:

Dear Jeromy Anglim

ReplyDeleteThank you very much indeed for putting this site together. I am in the process (at the beginning) of learning R and learning exploratory factor analysis in R. So I have been working through the example you provided.

I have gotten hung up on dataForScreePlot$uniquenesses- which does not work and I am unable so far to find any information about it. Can you help? Do I need to have a step prior to this command? Any help would be greatly appreciated.

Deb Milne in South Africa

@Deborah. Thanks for picking this up. I left out the code to create the object (dataForScreePlot). Sorry about that. I think I used something like this:

ReplyDeletedataForScreePlot <- factanal(personality[items], factors = 10, rotation = "promax")

Thus, the communality values are those for a 10 factor solution.

Thank you, I will try that

ReplyDeleteHello again

ReplyDeleteAdding the code you provided did the trick.

I have another question that I am hoping you may help with. I followed your example for a factor analysis with a dataset (dbf) with 22 variables and just over 3,000 records and it mostly worked fine. I was using this small dataset (a portion of the larger dataset I eventually wanted to use) just to get familiar with R.

I have just tried the same thing with the larger dataset- 11 variables and 15,506 records. But R does not seem to like it. I get an error 'Error in cov.wt(z) : 'x' must contain finite values only'.

Does R have a limitation on the number of records it can handle?

Also, in a separate exercise I created a tetrachoric correlation matrix so I could use binary variables in my factor analysis. Again the factor analysis had a problem with this although I have read that factanal will work with a dataset or a correlation matrix.

Any suggestions or advice would be very helpful

Thank you

Deborah

Hi Deborah,

ReplyDeleteHere are a few thoughts:

R does not have a limit on the number of cases, although it is possible to run out of memory on your machine, or for the computations to take a really long time.

But, the factanal procedure probably just creates a correlation or covariance matrix. And I can't image R having any trouble doing that with 15,000 cases.

Perhaps, you're asking for too many factors for 11 variables? A line of my code above asks for 10 factors. This wouldn't converge with only 11 variables.

With regards to tetrachoric correlation matrices, you can sometimes run into the problem that the matrix is not positive definite. I think that this is related to the fact that the correlations are estimated pairwise as opposed to a matrix-level estimation. I believe there are some functions out there that will attempt to restore order to your matrix. A quick Google brought this up: perhaps have a look:

http://bm2.genes.nig.ac.jp/RGM2/R_current/library/QRMlib/man/eigenmeth.html

Anyway, I'm not an expert on all the details above, but hopefully they are of some help.

Great tutorial, though it would be nice to make syntax available for download, so users could get bare output with sink().

ReplyDeleteHi AL3xa,

ReplyDeleteI'm still experimenting with the best ways to share R code examples. This was an earlier post.

Here's a copy of the code from gist.github

with colour coding.

http://bit.ly/bcJBE6

and here's the direct link

to the R code that could be used to directly import the file into R:

http://bit.ly/ad39sk

With my newer R code examples, such as the one analysing Winter Olympics Medals, I'm adding a copy of the data to Google Docs and getting the script to import the data from the web.

In combination with gist.github, this should mean that readers can directly import the script into R, run the code on the data,

and play around with the data even further.

I may update this factor analysis example at a later point so that the raw data is shared,

but not right now.

Deborah,

ReplyDeleteI had exactly the same problem. It appears that if you have any NA's in the data frame then R will return that message. What i did was use na.omit(yourdataframe) and it all seemed to work fine.

Love the blog by the way, its always nice to see other psychologists using R.

Hello all,

ReplyDeleteAlong with Deborah and Richie, I am trying to figure out how to deal with missing data. Is there a way to perform factor analysis using pairwise deletion of missing data rather than list-wise deletion (as in na.omit)?

@ Mark

ReplyDeleteTwo strategies come to mind.

1. you could replace missing data using an imputation strategy.

See some of the suggestions at the bottom of:

http://www.statmethods.net/input/missingdata.html

2. You could obtain the pairwise-deleted correlation or covariance matrix using the cor or cov functions

and setting the the "use" argument to equal "pairwise.complete.obs".

e.g.,

x <- cor(df, use = "pairwise.complete.obs")

Then supply this correlation or covariance matrix to the factanal function using the "covmat" argument. E.g., for three factors:

factanal(factors = 3, covmat = x, n.obs = ...)

I'm not exactly sure what value you should put in for "n.obs" (number of observations), because it would vary between correlations.

It's also possible that you could run into issues that your correlation matrix is not positive definite because correlations have been calculated on different data.

see ?factanal and ?cor for more info.

Can someone please tell me how do we get variance contribution of a single factor to the observed variable?

ReplyDeleteHi, i'm new in R, so can you explain how i can get this file of spps?

ReplyDelete@Filipe, if you are referring to the specific data file, it's not currently available.

ReplyDelete