diff --git a/DESCRIPTION b/DESCRIPTION index 1ccb367..2b6a68f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: PCAtest Title: Statistical significance of PCA -Version: 0.0.1 +Version: 0.0.2 Authors@R: person(given = "Arley", family = "Camargo", @@ -15,7 +15,7 @@ License: GPL (>= 3) Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.1 +RoxygenNote: 7.3.2 Suggests: knitr, rmarkdown, diff --git a/R/PCAtest.R b/R/PCAtest.R index d9749d4..48ff6c7 100644 --- a/R/PCAtest.R +++ b/R/PCAtest.R @@ -41,19 +41,32 @@ #' #' \item{pervarboot}{The percentage of variance explained by each PC based on the bootstrapped data.} #' -#' \item{pervarperm}{The percentage of data explained by each PC based on the permuted data.} +#' \item{pervarbootci}{Confidence intervals of the percentage of variance explained by each PC based on the bootstrapped data.} +#' +#' \item{pervarperm}{The percentage of variance explained by each PC based on the randomized data.} +#' +#' \item{pervarpermci}{Confidence intervals of the percentage of variance explained by each PC based on the randomized data.} #' #' \item{indexloadobs}{The index of the loadings of the observed data.} #' #' \item{indexloadboot}{The index of the loadings of the bootstrapped data.} #' -#' \item{indexloadperm}{The index of the loadings of the permuted data.} +#' \item{indexloadbootci}{Confidence intervals of the index of the loadings based on the bootstrapped data.} +#' +#' \item{indexloadperm}{The index of the loadings based on the randomized data.} +#' +#' \item{indexloadpermci}{Confidence intervals of the index of the loadings based on the randomized data.} #' #' \item{corobs}{If varcorr=TRUE, the correlations of the observed variables with each significant PC.} #' #' \item{corboot}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on the bootstrapped data.} #' -#' \item{corperm}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on permuted data.} +#' \item{corbootci}{If varcorr=TRUE, the confidence intervals of the correlations of the variables with each significant PC based on the bootstrapped data.} +#' +#' \item{corperm}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on randomized data.} +#' +#' \item{corpermci}{If varcorr=TRUE, the confidence intervals of the correlations of the variables with each significant PC based on randomized data.} +#' #'} #' #' @author @@ -96,424 +109,459 @@ PCAtest <- function(x, nperm=1000, nboot=1000, alpha=0.05, indload=TRUE, varcorr =FALSE, counter=TRUE, plot=TRUE) { -# check dependencies + # check dependencies -requireNamespace("stats", quietly = TRUE) -requireNamespace("grDevices", quietly = TRUE) -requireNamespace("graphics", quietly = TRUE) -requireNamespace("utils", quietly = TRUE) + requireNamespace("stats", quietly = TRUE) + requireNamespace("grDevices", quietly = TRUE) + requireNamespace("graphics", quietly = TRUE) + requireNamespace("utils", quietly = TRUE) -# empirical eigenvalues, loadings, Psi, and Phi + # empirical eigenvalues, loadings, Psi, and Phi -pcaemp <- stats::prcomp(na.omit(x), scale=T, center=T) -eigenvalues <- pcaemp$sdev^2 + pcaemp <- stats::prcomp(na.omit(x), scale=T, center=T) + eigenvalues <- pcaemp$sdev^2 -if (dim(x)[1] < dim(x)[2]) { - eigenvalues <- eigenvalues[-length(eigenvalues)] - } + if (dim(x)[1] < dim(x)[2]) { + eigenvalues <- eigenvalues[-length(eigenvalues)] + } -eigenobs <- eigenvalues -pervarobs <- eigenvalues / sum(eigenvalues) * 100 + eigenobs <- eigenvalues + pervarobs <- eigenvalues / sum(eigenvalues) * 100 -# empirical index loadings + # empirical index loadings -indexloadobs<-c() -for (i in 1:length(eigenvalues)) { - indexloadobs <- rbind(indexloadobs, pcaemp$rotation[,i]^2 * eigenvalues[i]^2) - } + indexloadobs<-c() + for (i in 1:length(eigenvalues)) { + indexloadobs <- rbind(indexloadobs, pcaemp$rotation[,i]^2 * eigenvalues[i]^2) + } -# empirical correlations + # empirical correlations -if (varcorr==T) { - corobs<-c() - for (i in 1:length(eigenvalues)) { - corobs <- rbind(corobs, pcaemp$rotation[,i] * sqrt(eigenvalues[i])) - } -} + if (varcorr==T) { + corobs<-c() + for (i in 1:length(eigenvalues)) { + corobs <- rbind(corobs, pcaemp$rotation[,i] * sqrt(eigenvalues[i])) + } + } -# empirical Psi + # empirical Psi -Psi<-0 + Psi<-0 -for (i in 1:length(eigenvalues)) { - Psi <- Psi + (eigenvalues[i]-1)^2 + for (i in 1:length(eigenvalues)) { + Psi <- Psi + (eigenvalues[i]-1)^2 } -Psiobs<-Psi + Psiobs<-Psi -# empirical Phi + # empirical Phi -Phi<-0 + Phi<-0 -for (i in 1:length(eigenvalues)) { - Phi <- Phi + eigenvalues[i]^2 + for (i in 1:length(eigenvalues)) { + Phi <- Phi + eigenvalues[i]^2 } -Phi <- sqrt((Phi - dim(x)[2]) / (dim(x)[2] * (dim(x)[2]-1))) -Phiobs <- Phi + Phi <- sqrt((Phi - dim(x)[2]) / (dim(x)[2] * (dim(x)[2]-1))) + Phiobs <- Phi -# bootstraped data for confidence intervals of empirical eigenvalues, index loadings, -# and correlations + # bootstraped data for confidence intervals of empirical eigenvalues, index loadings, + # and correlations -pervarboot<-c() -indexloadboot<-vector("list", nboot) -corboot<-vector("list", nboot) + pervarboot<-c() + indexloadboot<-vector("list", nboot) + corboot<-vector("list", nboot) -cat("\nSampling bootstrap replicates... Please wait\n") + cat("\nSampling bootstrap replicates... Please wait\n") -for (i in 1:nboot) { + for (i in 1:nboot) { - if (counter==T) { - cat("\r",i, "of", nboot, "bootstrap replicates\r") - utils::flush.console() - } + if (counter==T) { + cat("\r",i, "of", nboot, "bootstrap replicates\r") + utils::flush.console() + } - bootdata <- x[sample(nrow(x),size=dim(x)[1],replace=TRUE),] - pcaboot <- stats::prcomp(na.omit(bootdata), scale=T, center=T) - eigenvalues <- pcaboot$sdev^2 + bootdata <- x[sample(nrow(x),size=dim(x)[1],replace=TRUE),] + pcaboot <- stats::prcomp(na.omit(bootdata), scale=T, center=T) + eigenvalues <- pcaboot$sdev^2 - if (dim(x)[1] < dim(x)[2]) { - eigenvalues <- eigenvalues[-length(eigenvalues)] - } + if (dim(x)[1] < dim(x)[2]) { + eigenvalues <- eigenvalues[-length(eigenvalues)] + } - pervarboot <- rbind(pervarboot,eigenvalues / sum(eigenvalues) * 100) + pervarboot <- rbind(pervarboot,eigenvalues / sum(eigenvalues) * 100) - if (indload==T) { - indexload<-c() - for (j in 1:length(eigenvalues)) { - indexload <- rbind(indexload, pcaboot$rotation[,j]^2 * eigenvalues[j]^2) - } - indexloadboot [[i]]<- indexload - } + if (indload==T) { + indexload<-c() + for (j in 1:length(eigenvalues)) { + indexload <- rbind(indexload, pcaboot$rotation[,j]^2 * eigenvalues[j]^2) + } + indexloadboot [[i]]<- indexload + } - if (varcorr==T) { - corload<-c() - for (j in 1:length(eigenvalues)) { - corload <- rbind(corload, pcaboot$rotation[,j] * sqrt(eigenvalues[j])) - } + if (varcorr==T) { + corload<-c() + for (j in 1:length(eigenvalues)) { + corload <- rbind(corload, pcaboot$rotation[,j] * sqrt(eigenvalues[j])) + } - corboot [[i]]<- corload - } -} + corboot [[i]]<- corload + } + } -cat("\nCalculating confidence intervals of empirical statistics... Please wait\n") - -# confidence intervals of percentage of variation -confint <- apply(pervarboot,MARGIN=2,FUN=stats::quantile, probs=c(0.025,0.975)) - -if (indload==T) { - confintindboot<-c() # confidence intervals of index loadings - for (j in 1:length(eigenvalues)) { - for (k in 1:dim(x)[2]) { - indices<-c() - for (i in 1:nboot) { - indices <- c(indices, indexloadboot[[i]][j,k]) - } - confintindboot<- rbind (confintindboot, stats::quantile (indices, probs=c(0.025,0.975))) - } - } -} + cat("\nCalculating confidence intervals of empirical statistics... Please wait\n") + + # confidence intervals of percentage of variation + confint <- apply(pervarboot,MARGIN=2,FUN=stats::quantile, probs=c(0.025,0.975)) + + if (indload==T) { + confintindboot<-c() # confidence intervals of index loadings + for (j in 1:length(eigenvalues)) { + for (k in 1:dim(x)[2]) { + indices<-c() + for (i in 1:nboot) { + indices <- c(indices, indexloadboot[[i]][j,k]) + } + confintindboot<- rbind (confintindboot, stats::quantile (indices, probs=c(0.025,0.975))) + } + } + } -if (varcorr==T) { - confintcorboot<-c() # confidence intervals of correlations - for (j in 1:length(eigenvalues)) { - for (k in 1:dim(x)[2]) { - cors<-c() - for (i in 1:nboot) { - cors <- c(cors, corboot[[i]][j,k]) - } - confintcorboot<- rbind (confintcorboot, stats::quantile (cors, probs=c(0.025,0.975))) - } - } -} + if (varcorr==T) { + confintcorboot<-c() # confidence intervals of correlations + for (j in 1:length(eigenvalues)) { + for (k in 1:dim(x)[2]) { + cors<-c() + for (i in 1:nboot) { + cors <- c(cors, corboot[[i]][j,k]) + } + confintcorboot<- rbind (confintcorboot, stats::quantile (cors, probs=c(0.025,0.975))) + } + } + } -# null distributions based on randomizations of Psi, Phi, eigenvalues, percentage of variation, -# and index loadings + # null distributions based on randomizations of Psi, Phi, eigenvalues, percentage of variation, + # and index loadings -Psi<-c() -Phi<-c() -eigenrand<-c() -eigenprob<-c() -pervarperm<-c() -indexloadperm<-vector("list", nperm) -corperm<-vector("list", nperm) + Psi<-c() + Phi<-c() + eigenrand<-c() + eigenprob<-c() + pervarperm<-c() + indexloadperm<-vector("list", nperm) + corperm<-vector("list", nperm) -cat("\nSampling random permutations... Please wait\n") + cat("\nSampling random permutations... Please wait\n") -for (i in 1:nperm) { + for (i in 1:nperm) { - if (counter==T) { - cat("\r", i, "of", nperm, "random permutations \r") - utils::flush.console() + if (counter==T) { + cat("\r", i, "of", nperm, "random permutations \r") + utils::flush.console() } - repvalue<-0 - perm<-apply(x,MARGIN=2,FUN=sample) - pcaperm <- stats::prcomp(na.omit(perm), scale=T, center=T) - eigenvalues <- pcaperm$sdev^2 # eigenvalues - - if (dim(x)[1] < dim(x)[2]) { - eigenvalues <- eigenvalues[-length(eigenvalues)] - } + repvalue<-0 + perm<-apply(x,MARGIN=2,FUN=sample) + pcaperm <- stats::prcomp(na.omit(perm), scale=T, center=T) + eigenvalues <- pcaperm$sdev^2 # eigenvalues - pervarperm <- rbind (pervarperm, eigenvalues / sum (eigenvalues) * 100) - - for (j in 1:length(eigenvalues)) { - repvalue <- repvalue + (eigenvalues[j]-1)^2 - } + if (dim(x)[1] < dim(x)[2]) { + eigenvalues <- eigenvalues[-length(eigenvalues)] + } - Psi[i] <- repvalue + pervarperm <- rbind (pervarperm, eigenvalues / sum (eigenvalues) * 100) - repvalue<-0 + for (j in 1:length(eigenvalues)) { + repvalue <- repvalue + (eigenvalues[j]-1)^2 + } - for (j in 1:length(eigenvalues)) { - repvalue <- repvalue + eigenvalues[j]^2 - } + Psi[i] <- repvalue - Phi[i] <- sqrt((repvalue - dim(x)[2]) / (dim(x)[2]*(dim(x)[2]-1))) + repvalue<-0 - eigenrand <- rbind (eigenrand, eigenvalues) + for (j in 1:length(eigenvalues)) { + repvalue <- repvalue + eigenvalues[j]^2 + } - if (indload==T) { - indexload<-c() - for (j in 1:length(eigenvalues)) { - indexload <- rbind(indexload, pcaperm$rotation[,j]^2 * eigenvalues[j]^2) - } - indexloadperm [[i]]<- indexload - } + Phi[i] <- sqrt((repvalue - dim(x)[2]) / (dim(x)[2]*(dim(x)[2]-1))) - if (varcorr==T) { - cor<-c() - for (j in 1:length(eigenvalues)) { - cor <- rbind(cor, pcaperm$rotation[,j] * sqrt(eigenvalues[j])) - } + eigenrand <- rbind (eigenrand, eigenvalues) - corperm [[i]]<- cor - } -} + if (indload==T) { + indexload<-c() + for (j in 1:length(eigenvalues)) { + indexload <- rbind(indexload, pcaperm$rotation[,j]^2 * eigenvalues[j]^2) + } + indexloadperm [[i]]<- indexload + } -cat("\nComparing empirical statistics with their null distributions... Please wait\n") - -confintperm <- apply (pervarperm, MARGIN=2, FUN=stats::quantile, probs=c(0.025,0.975)) - -if (indload==T) { - confintind<-c() - meanind<-c() - for (j in 1:length(eigenvalues)) { - for (k in 1:dim(x)[2]) { - indices<-c() - for (i in 1:nperm) { - indices <- c(indices, indexloadperm[[i]][j,k]) - } - confintind<- rbind (confintind, stats::quantile (indices, probs=c(0.025,0.975))) - meanind<- c(meanind, mean(indices)) - } - } -} + if (varcorr==T) { + cor<-c() + for (j in 1:length(eigenvalues)) { + cor <- rbind(cor, pcaperm$rotation[,j] * sqrt(eigenvalues[j])) + } -if (varcorr==T) { - confintcor<-c() - meancor<-c() - for (j in 1:length(eigenvalues)) { - for (k in 1:dim(x)[2]) { - cors<-c() - for (i in 1:nperm) { - cors <- c(cors, corperm[[i]][j,k]) - } - confintcor<- rbind (confintcor, stats::quantile (cors, probs=c(0.025,0.975))) - meancor<- c(meancor, mean(cors)) - } - } -} - -# comparing empirical Psi, Phi and eigenvalues with their null distributions to calculate -# p-values + corperm [[i]]<- cor + } + } -Psiprob <- length (which (Psi>Psiobs)) / nperm -Phiprob <- length (which (Phi>Phiobs)) / nperm + cat("\nComparing empirical statistics with their null distributions... Please wait\n") + + confintperm <- apply (pervarperm, MARGIN=2, FUN=stats::quantile, probs=c(0.025,0.975)) + + if (indload==T) { + confintind<-c() + meanind<-c() + for (j in 1:length(eigenvalues)) { + for (k in 1:dim(x)[2]) { + indices<-c() + for (i in 1:nperm) { + indices <- c(indices, indexloadperm[[i]][j,k]) + } + confintind<- rbind (confintind, stats::quantile (indices, probs=c(0.025,0.975))) + meanind<- c(meanind, mean(indices)) + } + } + } -for (k in 1:length(eigenvalues)) { - eigenprob[k] <- length (which (eigenrand[,k] > eigenobs[k])) / nperm - } + if (varcorr==T) { + confintcor<-c() + meancor<-c() + for (j in 1:length(eigenvalues)) { + for (k in 1:dim(x)[2]) { + cors<-c() + for (i in 1:nperm) { + cors <- c(cors, corperm[[i]][j,k]) + } + confintcor<- rbind (confintcor, stats::quantile (cors, probs=c(0.025,0.975))) + meancor<- c(meancor, mean(cors)) + } + } + } -if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant + # comparing empirical Psi, Phi and eigenvalues with their null distributions to calculate + # p-values -# find out which PCs are significant + Psiprob <- length (which (Psi>Psiobs)) / nperm + Phiprob <- length (which (Phi>Phiobs)) / nperm - sigaxes <- 0 - for (i in 1:length(eigenprob)) { - if (eigenprob[i] < alpha) { - sigaxes <- sigaxes + 1 - } - } + for (k in 1:length(eigenvalues)) { + eigenprob[k] <- length (which (eigenrand[,k] > eigenobs[k])) / nperm + } -# find out which index loadings are significant for each significant axis + if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant -if (indload==T) { - sigload <- list() + # find out which PCs are significant - for (j in 1:sigaxes) { - conteo<-c() - for (i in 1:nperm) { - conteo <- c(conteo, which(indexloadperm[[i]][j,] > indexloadobs[j,])) - } - sigload [[j]]<- setdiff(c(1:dim(x)[2]), names(table(conteo)[table(conteo)/nperm > alpha])) - } -} + # sigaxes <- 0 + # for (i in 1:length(eigenprob)) { + # if (eigenprob[i] < alpha) { + # sigaxes <- sigaxes + 1 + # } + # } -# find out which correlations are significant for each significant axis + sigaxes <- c() + for (i in 1:length(eigenprob)) { + if (eigenprob[i] < alpha) { + sigaxes[i] <- i + } else { + sigaxes[i] <- 0 + } + } -if (varcorr==T) { - sigcor <- list() + # find out which index loadings are significant for each significant axis + + if (indload==T) { + sigload <- list() + for (j in 1:length(sigaxes)) { + if (sigaxes[j]!=0) { + conteo<-c() + for (i in 1:nperm) { + conteo <- c(conteo, which(indexloadperm[[i]][j,] > indexloadobs[j,])) + } + sigload [[j]]<- setdiff(c(1:dim(x)[2]), names(table(conteo)[table(conteo) / nperm > alpha])) + } else { + sigload [[j]]<-c("NA")} + } + } - for (j in 1:sigaxes) { - conteo<-c() - for (i in 1:nperm) { - conteo <- c(conteo, which(corperm[[i]][j,] > corobs[j,])) - } - sigcor [[j]]<- setdiff(c(1:dim(x)[2]), names(table(conteo)[table(conteo)/nperm > alpha])) - } -} -} + # find out which correlations are significant for each significant axis + + if (varcorr==T) { + sigcor <- list() + for (j in 1:length(sigaxes)) { + if (sigaxes[j]!=0) { + conteo<-c() + for (i in 1:nperm) { + conteo <- c(conteo, which(corperm[[i]][j,] > corobs[j,])) + } + sigcor [[j]]<- setdiff(c(1:dim(x)[2]), names(table(conteo)[table(conteo) / nperm > alpha])) + } else { + sigcor [[j]]<-c("NA")} + } + } + } -# screen output + # screen output -cat(paste("\n", "========================================================", sep=""), - paste("Test of PCA significance: ", dim(x)[2], " variables, ", dim(x)[1], " observations", sep=""), - paste(nboot, " bootstrap replicates, ", nperm, " random permutations", sep=""), - paste("========================================================", sep=""), sep="\n") + cat(paste("\n", "========================================================", sep=""), + paste("Test of PCA significance: ", dim(x)[2], " variables, ", dim(x)[1], " observations", sep=""), + paste(nboot, " bootstrap replicates, ", nperm, " random permutations", sep=""), + paste("========================================================", sep=""), sep="\n") - cat(paste("\n", "Empirical Psi = ", format(round(Psiobs,4),nsmall=4), ", Max null Psi = ", format(round(max(Psi),4),nsmall=4), ", Min null Psi = ", format(round(min(Psi),4),nsmall=4), ", p-value = ", Psiprob, sep=""), - paste("Empirical Phi = ", format(round(Phiobs,4),nsmall=4),", Max null Phi = ", format(round(max(Phi),4),nsmall=4),", Min null Phi = ", format(round(min(Phi),4),nsmall=4),", p-value = ", Phiprob, sep=""), sep="\n") + cat(paste("\n", "Empirical Psi = ", format(round(Psiobs,4),nsmall=4), ", Max null Psi = ", format(round(max(Psi),4),nsmall=4), ", Min null Psi = ", format(round(min(Psi),4),nsmall=4), ", p-value = ", Psiprob, sep=""), + paste("Empirical Phi = ", format(round(Phiobs,4),nsmall=4),", Max null Phi = ", format(round(max(Phi),4),nsmall=4),", Min null Phi = ", format(round(min(Phi),4),nsmall=4),", p-value = ", Phiprob, sep=""), sep="\n") -if (Psiprob >= alpha & Phiprob >= alpha) { # if both Psi and Phi are not significant - cat(paste ("\n", "PCA is not significant!", sep="")) + if (Psiprob >= alpha & Phiprob >= alpha) { # if both Psi and Phi are not significant + cat(paste ("\n", "PCA is not significant!", sep="")) } -if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant - - for (i in 1:length(eigenobs)) { - cat(paste ("\n", "Empirical eigenvalue #", i, " = ", round(eigenobs[i],5), ", Max null eigenvalue = ", round(max(eigenrand[,i]), 5), ", p-value = ", eigenprob[i], sep="")) - } + if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant - cat("\n") - - for (i in 1:sigaxes) { - cat(paste ("\n", "PC ", i, " is significant and accounts for ", round(pervarobs[i], digits=1), "% (95%-CI:",round(confint[1,i],digits=1),"-",round(confint[2,i],digits=1),")"," of the total variation", sep="")) - } + for (i in 1:length(eigenobs)) { + cat(paste ("\n", "Empirical eigenvalue #", i, " = ", round(eigenobs[i],5), ", Max null eigenvalue = ", round(max(eigenrand[,i]), 5), ", p-value = ", eigenprob[i], sep="")) + } - if (sigaxes > 1) { - cat("\n") - cat(paste ("\n", "The first ", sigaxes, " PC axes are significant and account for ", round(sum(pervarobs[1:sigaxes]), digits=1), "% of the total variation", sep="")) - } + cat("\n") - if (indload==T) { - cat("\n") + for (i in sigaxes[sigaxes!=0]) { + cat(paste ("\n", "PC ", i, " is significant and accounts for ", round(pervarobs[i], digits=1), "% (95%-CI:",round(confint[1,i],digits=1),"-",round(confint[2,i],digits=1),")"," of the total variation", sep="")) + } - for (i in 1:sigaxes) { - cat(paste("\n", "Variables ", paste(sigload[[i]][1:length(sigload[[i]])-1], collapse=", "), ", and ", paste(sigload[[i]][length(sigload[[i]])])," have significant loadings on PC ", i, sep="")) - } - } + if (length(sigaxes[sigaxes!=0]) > 1) { + cat("\n") + cat(paste ("\n", length(sigaxes[sigaxes!=0]), " PC axes are significant and account for ", round(sum(pervarobs[sigaxes[sigaxes!=0]]), digits=1), "% of the total variation", sep="")) + } - if (varcorr==T) { - cat("\n") + if (indload==T) { + cat("\n") + + for (i in sigaxes[sigaxes!=0]) { + if (length(sigload[[i]])==0) { + cat(paste("\n", "None of the variables have significant loadings on PC ", i, sep="")) + } else { + if (length(sigload[[i]])==1) { + cat(paste("\n", "Variable ", sigload[[i]]," has a significant loading on PC ", i, sep="")) + } else { + cat(paste("\n", "Variables ", paste(sigload[[i]][1:length(sigload[[i]])-1], collapse=", "), ", and ", paste(sigload[[i]][length(sigload[[i]])])," have significant loadings on PC ", i, sep="")) + } + } + } + } - for (i in 1:sigaxes) { - cat(paste("\n", "Variables ", paste(sigcor[[i]][1:length(sigcor[[i]])-1], collapse=", "), ", and ", paste(sigcor[[i]][length(sigcor[[i]])])," have significant correlations with PC ", i, sep="")) - } - } + if (varcorr==T) { + cat("\n") + + for (i in sigaxes[sigaxes!=0]) { + if (length(sigcor[[i]])==0) { + cat(paste("\n", "None of the variables have significant correlations with PC ", i, sep="")) + } else { + if (length(sigcor[[i]])==1) { + cat(paste("\n", "Variable ", sigcor[[i]]," has a significant correlation with PC ", i, sep="")) + } else { + cat(paste("\n", "Variables ", paste(sigcor[[i]][1:length(sigcor[[i]])-1], collapse=", "), ", and ", paste(sigcor[[i]][length(sigcor[[i]])])," have significant correlations with PC ", i, sep="")) + } + } + } + } - cat("\n\n") -} + cat("\n\n") + } -# plots + # plots -if (plot==T) { -graphics::par(mfrow=c(2,2), mar=c(5, 4, 1, 2) + 0.1) + if (plot==T) { + graphics::par(mfrow=c(2,2), mar=c(5, 4, 1, 2) + 0.1) -# plot of empirical and randomized Psi + # plot of empirical and randomized Psi -h <- graphics::hist(Psi,plot=FALSE) -h$density = h$counts/sum(h$counts)*100 -plot(h,freq=FALSE, col="gray45", xlab="Psi", xlim=c(0, max(max(Psi), Psiobs)), ylab="Percentage of permutations", main="") -graphics::arrows(x0=Psiobs, y0=max(h$density)/10, y1=0, col="red", lwd=2, length=0.1) -graphics::legend("topright", legend=c("Null distribution","Empirical value"), fill=c("gray45","red"), bty="n", cex=0.8) + h <- graphics::hist(Psi,plot=FALSE) + h$density = h$counts/sum(h$counts)*100 + plot(h,freq=FALSE, col="gray45", xlab="Psi", xlim=c(0, max(max(Psi), Psiobs)), ylab="Percentage of permutations", main="") + graphics::arrows(x0=Psiobs, y0=max(h$density)/10, y1=0, col="red", lwd=2, length=0.1) + graphics::legend("topright", legend=c("Null distribution","Empirical value"), fill=c("gray45","red"), bty="n", cex=0.8) -# plot of empirical and randomized Phi + # plot of empirical and randomized Phi -h <- graphics::hist(Phi,plot=FALSE) -h$density <- h$counts/sum(h$counts)*100 -plot(h,freq=FALSE, col="gray45", xlab="Phi", xlim=c(0, max(max(Phi), Phiobs)), ylab="Percentage of permutations", main="") -graphics::arrows (x0=Phiobs, y0=max(h$density)/10, y1=0, col="red", lwd=2, length=0.1) + h <- graphics::hist(Phi,plot=FALSE) + h$density <- h$counts/sum(h$counts)*100 + plot(h,freq=FALSE, col="gray45", xlab="Phi", xlim=c(0, max(max(Phi), Phiobs)), ylab="Percentage of permutations", main="") + graphics::arrows (x0=Phiobs, y0=max(h$density)/10, y1=0, col="red", lwd=2, length=0.1) -# plot of bootstrapped and randomized percentage of variation for all PC's + # plot of bootstrapped and randomized percentage of variation for all PCs -if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant + if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant - plot(pervarobs, ylab="Percentage of total variation", xlab="PC", bty="n", ylim=c(0, max(confint)), type="b", pch=19, lty="dashed", col="red", xaxt = "n") - graphics::axis(1, at = 1:length(eigenobs)) - graphics::lines (apply(pervarperm,MARGIN=2,FUN=mean), type="b", pch=19, lty="dashed", col="gray45") - suppressWarnings(graphics::arrows (x0=c(1:length(eigenvalues)), y0=confint[2,], y1=confint[1,], code=3, angle=90, length=0.05, col="red")) - suppressWarnings(graphics::arrows (x0=c(1:length(eigenvalues)), y0=confintperm[2,], y1=confintperm[1,], code=3, angle=90, length=0.05, col="gray45")) + plot(pervarobs, ylab="Percentage of total variation", xlab="PC", bty="n", ylim=c(0, max(confint)), type="b", pch=19, lty="dashed", col="red", xaxt = "n") + graphics::axis(1, at = 1:length(eigenobs)) + graphics::lines (apply(pervarperm,MARGIN=2,FUN=mean), type="b", pch=19, lty="dashed", col="gray45") + suppressWarnings(graphics::arrows (x0=c(1:length(eigenvalues)), y0=confint[2,], y1=confint[1,], code=3, angle=90, length=0.05, col="red")) + suppressWarnings(graphics::arrows (x0=c(1:length(eigenvalues)), y0=confintperm[2,], y1=confintperm[1,], code=3, angle=90, length=0.05, col="gray45")) -# plot of bootstrapped and randomized index loadings for significant PC's only -if (indload==T) { - k=1 - for (i in 1:sigaxes) { + # plot of bootstrapped and randomized index loadings for significant PCs only + if (indload==T) { + k=1 + for (i in 1:length(sigaxes[sigaxes!=0])) { - if (i %in% seq(2,max(2,sigaxes),4)) { # if sigaxes is 2, 6, 10,... make another window to display more plots + if (i %in% seq(2,max(2,length(sigaxes[sigaxes!=0])),4)) { # if sigaxes is 2, 6, 10,... make another window to display more plots - grDevices::dev.new() - graphics::par (mfrow=c(2,2), mar=c(5, 4, 1, 2) + 0.1) - plot (indexloadobs[i,], ylab=paste("Index loadings of PC",i), xlab="Variable", bty="n", ylim=c(0, max(confintindboot)), pch=19, col="red", xaxt = "n") - graphics::axis(1, at = 1:dim(x)[2]) - graphics::lines (meanind[k:(k-1+dim(x)[2])], type="p", pch=19, col="gray45") - suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintindboot[k:(k-1+dim(x)[2]),1], y1=confintindboot[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="red")) - suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintind[k:(k-1+dim(x)[2]),1], y1=confintind[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="gray45")) + grDevices::dev.new() + graphics::par (mfrow=c(2,2), mar=c(5, 4, 1, 2) + 0.1) + plot (indexloadobs[sigaxes[sigaxes!=0][i],], ylab=paste("Index loadings of PC", sigaxes[sigaxes!=0][i]), xlab="Variable", bty="n", ylim=c(0, max(confintindboot)), pch=19, col="red", xaxt = "n") + graphics::axis(1, at = 1:dim(x)[2]) + graphics::lines (meanind[k:(k-1+dim(x)[2])], type="p", pch=19, col="gray45") + suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintindboot[k:(k-1+dim(x)[2]),1], y1=confintindboot[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="red")) + suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintind[k:(k-1+dim(x)[2]),1], y1=confintind[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="gray45")) - } else { + } else { - plot (indexloadobs[i,], ylab=paste("Index loadings of PC",i), xlab="Variable", bty="n", ylim=c(0, max(confintindboot)), pch=19, col="red", xaxt = "n") - graphics::axis(1, at = 1:dim(x)[2]) - graphics::lines (meanind[k:(k-1+dim(x)[2])], type="p", pch=19, col="gray45") - suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintindboot[k:(k-1+dim(x)[2]),1], y1=confintindboot[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="red")) - suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintind[k:(k-1+dim(x)[2]),1], y1=confintind[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="gray45")) + plot (indexloadobs[sigaxes[sigaxes!=0][i],], ylab=paste("Index loadings of PC", sigaxes[sigaxes!=0][i]), xlab="Variable", bty="n", ylim=c(0, max(confintindboot)), pch=19, col="red", xaxt = "n") + graphics::axis(1, at = 1:dim(x)[2]) + graphics::lines (meanind[k:(k-1+dim(x)[2])], type="p", pch=19, col="gray45") + suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintindboot[k:(k-1+dim(x)[2]),1], y1=confintindboot[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="red")) + suppressWarnings(graphics::arrows (x0=c(1:dim(x)[2]), y0=confintind[k:(k-1+dim(x)[2]),1], y1=confintind[k:(k-1+dim(x)[2]),2], code=3, angle=90, length=0.05, col="gray45")) - } - k = k + dim(x)[2] - } - } -} -} + } + k = k + dim(x)[2] + } + } + } + } -results <- list() + results <- list() -results[["Empirical Psi"]] <- Psiobs -results[["Empirical Phi"]] <- Phiobs -results[["Null Psi"]] <- Psi -results[["Null Phi"]] <- Phi + results[["Empirical Psi"]] <- Psiobs + results[["Empirical Phi"]] <- Phiobs + results[["Null Psi"]] <- Psi + results[["Null Phi"]] <- Phi -if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant + if (Psiprob < alpha & Phiprob < alpha) { # test PC axes if both Psi and Phi are significant - results[["Percentage of variation of empirical PC's"]] <- pervarobs - results[["Percentage of variation of bootstrapped data"]] <- pervarboot - results[["Percentage of variation of randomized data"]] <- pervarperm + results[["Percentage of variation of empirical PCs"]] <- pervarobs + results[["Percentage of variation of bootstrapped data"]] <- pervarboot + results[["Observed confidence intervals of percentage of variation"]] <- confint + results[["Percentage of variation of randomized data"]] <- pervarperm + results[["Randomized confidence intervals of percentage of variation"]] <- confintperm - if (indload==T) { - results[["Index loadings of empirical PC's"]] <- indexloadobs - results[["Index loadings with bootstrapped data"]] <- indexloadboot - results[["Index loadings with randomized data"]] <- indexloadperm - } + if (indload==T) { + results[["Index loadings of empirical PCs"]] <- indexloadobs + results[["Index loadings with bootstrapped data"]] <- indexloadboot + results[["Observed confidence intervals of index loadings"]] <- confintindboot + results[["Index loadings with randomized data"]] <- indexloadperm + results[["Randomized confidence intervals of index loadings"]] <- confintind + } - if (varcorr==T) { - results[["Correlations of empirical PC's with variables"]] <- corobs - results[["Correlations in bootstrapped data"]] <- corboot - results[["Correlations in randomized data"]] <- corperm - } + if (varcorr==T) { + results[["Correlations of empirical PCs with variables"]] <- corobs + results[["Correlations in bootstrapped data"]] <- corboot + results[["Observed confidence intervals of variable correlations"]] <- confintcorboot + results[["Correlations in randomized data"]] <- corperm + results[["randomized confidence intervals of variable correlations"]] <- confintcor + } -} + } -return (results) + return (results) } diff --git a/README.Rmd b/README.Rmd index b4bc446..0ac727b 100644 --- a/README.Rmd +++ b/README.Rmd @@ -33,7 +33,7 @@ sampling variance around mean observed statistics based on bootstrap replicates. ## Installation -You can install the released and the development versions from [GitHub] +You can install the released (version 0.02) and the development versions from [GitHub] (https://github.com/) with: ``` r diff --git a/README.md b/README.md index 43dc878..804a163 100644 --- a/README.md +++ b/README.md @@ -23,8 +23,8 @@ statistics based on bootstrap replicates. ## Installation -You can install the released and the development versions from -\[GitHub\] () with: +You can install the released (version 0.02) and the development versions +from \[GitHub\] () with: ``` r # install.packages("devtools") @@ -55,8 +55,8 @@ result<-PCAtest(ex0, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE) #> 100 bootstrap replicates, 100 random permutations #> ======================================================== #> -#> Empirical Psi = 0.1445, Max null Psi = 0.4359, Min null Psi = 0.0499, p-value = 0.73 -#> Empirical Phi = 0.0850, Max null Phi = 0.1476, Min null Phi = 0.0499, p-value = 0.73 +#> Empirical Psi = 0.0906, Max null Psi = 0.5364, Min null Psi = 0.0391, p-value = 0.95 +#> Empirical Phi = 0.0673, Max null Phi = 0.1638, Min null Phi = 0.0442, p-value = 0.95 #> #> PCA is not significant! ``` @@ -82,18 +82,18 @@ result<-PCAtest(ex025, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE) #> 100 bootstrap replicates, 100 random permutations #> ======================================================== #> -#> Empirical Psi = 0.7999, Max null Psi = 0.4694, Min null Psi = 0.0575, p-value = 0 -#> Empirical Phi = 0.2000, Max null Phi = 0.1532, Min null Phi = 0.0536, p-value = 0 +#> Empirical Psi = 1.4947, Max null Psi = 0.4902, Min null Psi = 0.0617, p-value = 0 +#> Empirical Phi = 0.2734, Max null Phi = 0.1566, Min null Phi = 0.0555, p-value = 0 #> -#> Empirical eigenvalue #1 = 1.78342, Max null eigenvalue = 1.51785, p-value = 0 -#> Empirical eigenvalue #2 = 0.94318, Max null eigenvalue = 1.27171, p-value = 1 -#> Empirical eigenvalue #3 = 0.81888, Max null eigenvalue = 1.09835, p-value = 1 -#> Empirical eigenvalue #4 = 0.75285, Max null eigenvalue = 0.9815, p-value = 0.99 -#> Empirical eigenvalue #5 = 0.70167, Max null eigenvalue = 0.85862, p-value = 0.72 +#> Empirical eigenvalue #1 = 2.05104, Max null eigenvalue = 1.49457, p-value = 0 +#> Empirical eigenvalue #2 = 0.95665, Max null eigenvalue = 1.23612, p-value = 1 +#> Empirical eigenvalue #3 = 0.78234, Max null eigenvalue = 1.10792, p-value = 1 +#> Empirical eigenvalue #4 = 0.72472, Max null eigenvalue = 0.97997, p-value = 1 +#> Empirical eigenvalue #5 = 0.48526, Max null eigenvalue = 0.87503, p-value = 1 #> -#> PC 1 is significant and accounts for 35.7% (95%-CI:30.3-43.6) of the total variation +#> PC 1 is significant and accounts for 41% (95%-CI:34.5-47.9) of the total variation #> -#> Variables , and 3 have significant loadings on PC 1 +#> Variables 2, 3, and 4 have significant loadings on PC 1 ``` @@ -117,16 +117,16 @@ result<-PCAtest(ex05, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE) #> 100 bootstrap replicates, 100 random permutations #> ======================================================== #> -#> Empirical Psi = 5.0033, Max null Psi = 0.6097, Min null Psi = 0.0526, p-value = 0 -#> Empirical Phi = 0.5002, Max null Phi = 0.1746, Min null Phi = 0.0513, p-value = 0 +#> Empirical Psi = 4.8505, Max null Psi = 0.6045, Min null Psi = 0.0382, p-value = 0 +#> Empirical Phi = 0.4925, Max null Phi = 0.1739, Min null Phi = 0.0437, p-value = 0 #> -#> Empirical eigenvalue #1 = 2.98492, Max null eigenvalue = 1.5516, p-value = 0 -#> Empirical eigenvalue #2 = 0.67789, Max null eigenvalue = 1.28816, p-value = 1 -#> Empirical eigenvalue #3 = 0.54255, Max null eigenvalue = 1.11634, p-value = 1 -#> Empirical eigenvalue #4 = 0.50678, Max null eigenvalue = 0.98476, p-value = 1 -#> Empirical eigenvalue #5 = 0.28785, Max null eigenvalue = 0.85856, p-value = 1 +#> Empirical eigenvalue #1 = 2.9652, Max null eigenvalue = 1.59307, p-value = 0 +#> Empirical eigenvalue #2 = 0.62114, Max null eigenvalue = 1.25937, p-value = 1 +#> Empirical eigenvalue #3 = 0.53125, Max null eigenvalue = 1.09766, p-value = 1 +#> Empirical eigenvalue #4 = 0.46017, Max null eigenvalue = 0.97471, p-value = 1 +#> Empirical eigenvalue #5 = 0.42223, Max null eigenvalue = 0.88145, p-value = 1 #> -#> PC 1 is significant and accounts for 59.7% (95%-CI:53.2-65.2) of the total variation +#> PC 1 is significant and accounts for 59.3% (95%-CI:51.1-67.2) of the total variation #> #> Variables 1, 2, 3, 4, and 5 have significant loadings on PC 1 ``` @@ -152,18 +152,18 @@ result<-PCAtest(ants, 100, 100, 0.05, varcorr=FALSE, counter=FALSE, plot=TRUE) #> 100 bootstrap replicates, 100 random permutations #> ======================================================== #> -#> Empirical Psi = 10.9186, Max null Psi = 2.6938, Min null Psi = 0.6441, p-value = 0 -#> Empirical Phi = 0.5099, Max null Phi = 0.2533, Min null Phi = 0.1238, p-value = 0 +#> Empirical Psi = 10.9186, Max null Psi = 2.6398, Min null Psi = 0.6762, p-value = 0 +#> Empirical Phi = 0.5099, Max null Phi = 0.2507, Min null Phi = 0.1269, p-value = 0 #> -#> Empirical eigenvalue #1 = 3.84712, Max null eigenvalue = 2.13026, p-value = 0 -#> Empirical eigenvalue #2 = 1.52017, Max null eigenvalue = 1.72539, p-value = 0.17 -#> Empirical eigenvalue #3 = 0.70634, Max null eigenvalue = 1.43578, p-value = 1 -#> Empirical eigenvalue #4 = 0.41356, Max null eigenvalue = 1.13288, p-value = 1 -#> Empirical eigenvalue #5 = 0.34001, Max null eigenvalue = 0.95056, p-value = 1 -#> Empirical eigenvalue #6 = 0.14515, Max null eigenvalue = 0.76715, p-value = 1 -#> Empirical eigenvalue #7 = 0.02765, Max null eigenvalue = 0.65331, p-value = 1 +#> Empirical eigenvalue #1 = 3.84712, Max null eigenvalue = 2.08838, p-value = 0 +#> Empirical eigenvalue #2 = 1.52017, Max null eigenvalue = 1.70742, p-value = 0.14 +#> Empirical eigenvalue #3 = 0.70634, Max null eigenvalue = 1.3338, p-value = 1 +#> Empirical eigenvalue #4 = 0.41356, Max null eigenvalue = 1.14976, p-value = 1 +#> Empirical eigenvalue #5 = 0.34001, Max null eigenvalue = 0.96754, p-value = 1 +#> Empirical eigenvalue #6 = 0.14515, Max null eigenvalue = 0.72488, p-value = 1 +#> Empirical eigenvalue #7 = 0.02765, Max null eigenvalue = 0.63519, p-value = 1 #> -#> PC 1 is significant and accounts for 55% (95%-CI:43.6-64) of the total variation +#> PC 1 is significant and accounts for 55% (95%-CI:42.1-65.6) of the total variation #> #> Variables 1, 2, 3, 4, 5, and 7 have significant loadings on PC 1 ``` diff --git a/man/PCAtest.Rd b/man/PCAtest.Rd index 9dc134a..10b7759 100644 --- a/man/PCAtest.Rd +++ b/man/PCAtest.Rd @@ -47,19 +47,32 @@ An object of class “list” with the following elements: \item{pervarboot}{The percentage of variance explained by each PC based on the bootstrapped data.} -\item{pervarperm}{The percentage of data explained by each PC based on the permuted data.} +\item{pervarbootci}{Confidence intervals of the percentage of variance explained by each PC based on the bootstrapped data.} + +\item{pervarperm}{The percentage of variance explained by each PC based on the randomized data.} + +\item{pervarpermci}{Confidence intervals of the percentage of variance explained by each PC based on the randomized data.} \item{indexloadobs}{The index of the loadings of the observed data.} \item{indexloadboot}{The index of the loadings of the bootstrapped data.} -\item{indexloadperm}{The index of the loadings of the permuted data.} +\item{indexloadbootci}{Confidence intervals of the index of the loadings based on the bootstrapped data.} + +\item{indexloadperm}{The index of the loadings based on the randomized data.} + +\item{indexloadpermci}{Confidence intervals of the index of the loadings based on the randomized data.} \item{corobs}{If varcorr=TRUE, the correlations of the observed variables with each significant PC.} \item{corboot}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on the bootstrapped data.} -\item{corperm}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on permuted data.} +\item{corbootci}{If varcorr=TRUE, the confidence intervals of the correlations of the variables with each significant PC based on the bootstrapped data.} + +\item{corperm}{If varcorr=TRUE, the correlations of the observed variables with each significant PC based on randomized data.} + +\item{corpermci}{If varcorr=TRUE, the confidence intervals of the correlations of the variables with each significant PC based on randomized data.} + } } \description{ diff --git a/man/figures/README-example1-1.png b/man/figures/README-example1-1.png index dc86768..8b8c1c1 100644 Binary files a/man/figures/README-example1-1.png and b/man/figures/README-example1-1.png differ diff --git a/man/figures/README-example2-1.png b/man/figures/README-example2-1.png index cfe325d..1241db7 100644 Binary files a/man/figures/README-example2-1.png and b/man/figures/README-example2-1.png differ diff --git a/man/figures/README-example3-1.png b/man/figures/README-example3-1.png index c9677f3..94d4524 100644 Binary files a/man/figures/README-example3-1.png and b/man/figures/README-example3-1.png differ diff --git a/man/figures/README-example4-1.png b/man/figures/README-example4-1.png index f84afed..40dfb5c 100644 Binary files a/man/figures/README-example4-1.png and b/man/figures/README-example4-1.png differ