Skip to content

Commit

Permalink
Updates
Browse files Browse the repository at this point in the history
New z.delete argument, column minima default dl, regular data check
  • Loading branch information
Japal committed Dec 7, 2023
1 parent 773f2af commit 5bec33b
Show file tree
Hide file tree
Showing 23 changed files with 340 additions and 135 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
Package: zCompositions
Type: Package
Title: Treatment of Zeros, Left-Censored and Missing Values in Compositional Data Sets
Version: 1.4.1
Date: 2023-08-23
Version: 1.5
Date: 2023-12-07
Authors@R:
c(person(given = "Javier",
family = "Palarea-Albaladejo",
Expand Down
13 changes: 11 additions & 2 deletions NEWS
Original file line number Diff line number Diff line change
@@ -1,20 +1,29 @@
RELEASE HISTORY OF THE "zCompositions" PACKAGE
==============================================

CHANGES IN zCompositions VERSION 1.5 [2023-12]:
----------------------------------------------------

MODIFICATIONS

* Parametric methods requiring regular data sets (no. obs > no. vars) now warn the user about this and stop.
* When no dl vector/matrix provided then column minima used automatically as thresholds and a warning message is shown.
* Handling excess of zeros/unobs modified: new logical argument z.delete allows the user to decide what to do with columns/rows containing zero/unobs values over the threshold set by z.warning.

CHANGES IN zCompositions VERSION 1.4.1 [2023-08]:
----------------------------------------------------

MODIFICATIONS

* Handling excess of zeros: columns/rows individually exceeding the given zero/unobserved values proportion threshold are now automatically deleted. A warning message details the particular columns/rows involved.
* Handling excess of zeros/unobs (z.warning argument): columns/rows individually exceeding the given zero/unobserved values proportion threshold are now automatically deleted. A warning message details the particular columns/rows involved.
* Minor bugs fixed and edits to the documentation.

CHANGES IN zCompositions VERSION 1.4.0-1 [2022-03]:
----------------------------------------------------

NEW FEATURES

* lrSVD and lrSVDplus: imputation algorithm based on singular value decomposition for censored, missing data, or both simultaneously. Particularly thought for wide data sets (more columns than rows).
* lrSVD and lrSVDplus: imputation algorithm based on singular value decomposition for censored, missing data, or both simultaneously. Particularly thought for wide data sets (no. obs < no. vars).

MODIFICATIONS

Expand Down
24 changes: 19 additions & 5 deletions R/cmultRepl.R
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
cmultRepl <- function(X,label=0,method=c("GBM","SQ","BL","CZM","user"),output=c("prop", "p-counts"),
frac=0.65,threshold=0.5,adjust=TRUE,t=NULL,s=NULL,z.warning=0.8,
suppress.print=FALSE,delta=NULL)
z.delete=TRUE,suppress.print=FALSE,delta=NULL)
{

if (any(X<0, na.rm=T)) stop("X contains negative values")
Expand All @@ -26,15 +26,29 @@ cmultRepl <- function(X,label=0,method=c("GBM","SQ","BL","CZM","user"),output=c(
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) >= z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) >= z.warning)
X <- X[,-cases]
warning(paste("Column ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) >= z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) >= z.warning)
X <- X[-cases,]
warning(paste("Row ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

N <- nrow(X); D <- ncol(X)
Expand Down
44 changes: 33 additions & 11 deletions R/lrDA.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
lrDA <-
function(X,label=NULL,dl=NULL,ini.cov=c("lrEM","complete.obs","multRepl"),frac=0.65,
imp.missing=FALSE,n.iters=1000,m=1,store.mi=FALSE,closure=NULL,z.warning=0.8,delta=NULL){
imp.missing=FALSE,n.iters=1000,m=1,store.mi=FALSE,closure=NULL,z.warning=0.8,
z.delete=TRUE,delta=NULL){

if (any(X<0, na.rm=T)) stop("X contains negative values")
if (imp.missing==FALSE){
if (is.character(dl) || is.null(dl)) stop("dl must be a numeric vector or matrix")
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}


if ((is.vector(X)) | (nrow(X)==1)) stop("X must be a data matrix")
if (is.null(label)) stop("A value for label must be given")
if (!is.na(label)){
Expand All @@ -20,6 +16,17 @@ lrDA <-
if (any(X==0,na.rm=T)) stop("Zero values not labelled as censored or missing values were found in the data set")
if (!any(is.na(X),na.rm=T)) stop(paste("Label",label,"was not found in the data set"))
}

if (imp.missing==FALSE){
if (is.character(dl)) stop("dl must be a numeric vector or matrix")
if (is.null(dl)){ # If dl not given use min per column
dl <- apply(X,2, function(x) min(x[x!=label]))
warning("No dl vector or matrix provided. The minimum observed values for each column used as detection limits.")
}
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}

if (imp.missing==FALSE){
if (ncol(dl)!=ncol(X)) stop("The number of columns in X and dl do not agree")
if ((nrow(dl)>1) & (nrow(dl)!=nrow(X))) stop("The number of rows in X and dl do not agree")
Expand Down Expand Up @@ -141,6 +148,7 @@ lrDA <-

X <- as.data.frame(X,stringsAsFactors=TRUE)
nn <- nrow(X); p <- ncol(X)
if (nn <= p) stop("The lrDA algorithm works on regular data sets (no. rows > no. columns). You can consider lrSVD for wide dat sets.")

X[X==label] <- NA
X <- apply(X,2,as.numeric)
Expand All @@ -149,15 +157,29 @@ lrDA <-
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) >= z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) >= z.warning)
X <- X[,-cases]
warning(paste("Column ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) >= z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) >= z.warning)
X <- X[-cases,]
warning(paste("Row ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

if (imp.missing==FALSE) {if (nrow(dl)==1) dl <- matrix(rep(1,nn),ncol=1)%*%dl}
Expand Down
43 changes: 32 additions & 11 deletions R/lrEM.R
Original file line number Diff line number Diff line change
@@ -1,14 +1,9 @@
lrEM <- function(X,label=NULL,dl=NULL,rob=FALSE,ini.cov=c("complete.obs","multRepl"),frac=0.65,tolerance=0.0001,
max.iter=50,rlm.maxit=150,imp.missing=FALSE,suppress.print=FALSE,
closure=NULL,z.warning=0.8,delta=NULL){
closure=NULL,z.warning=0.8,z.delete=TRUE,delta=NULL){

if (any(X<0, na.rm=T)) stop("X contains negative values")
if (imp.missing==FALSE){
if (is.character(dl) || is.null(dl)) stop("dl must be a numeric vector or matrix")
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}


if ((is.vector(X)) | (nrow(X)==1)) stop("X must be a data matrix")
if (is.null(label)) stop("A value for label must be given")
if (!is.na(label)){
Expand All @@ -20,6 +15,17 @@ lrEM <- function(X,label=NULL,dl=NULL,rob=FALSE,ini.cov=c("complete.obs","multRe
if (any(X==0,na.rm=T)) stop("Zero values not labelled as censored or missing values were found in the data set")
if (!any(is.na(X),na.rm=T)) stop(paste("Label",label,"was not found in the data set"))
}

if (imp.missing==FALSE){
if (is.character(dl)) stop("dl must be a numeric vector or matrix")
if (is.null(dl)){ # If dl not given use min per column
dl <- apply(X,2, function(x) min(x[x!=label]))
warning("No dl vector or matrix provided. The minimum observed values for each column used as detection limits.")
}
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}

if (imp.missing==FALSE){
if (ncol(dl)!=ncol(X)) stop("The number of columns in X and dl do not agree")
if ((nrow(dl)>1) & (nrow(dl)!=nrow(X))) stop("The number of rows in X and dl do not agree")
Expand Down Expand Up @@ -133,6 +139,7 @@ lrEM <- function(X,label=NULL,dl=NULL,rob=FALSE,ini.cov=c("complete.obs","multRe

X <- as.data.frame(X,stringsAsFactors=TRUE)
nn <- nrow(X); D <- ncol(X)
if (nn <= D) stop("The lrEM algorithm works on regular data sets (no. rows > no. columns). You can consider lrSVD for wide dat sets.")

X[X==label] <- NA
X <- as.data.frame(apply(X,2,as.numeric),stringsAsFactors=TRUE)
Expand All @@ -141,15 +148,29 @@ lrEM <- function(X,label=NULL,dl=NULL,rob=FALSE,ini.cov=c("complete.obs","multRe
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) >= z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) >= z.warning)
X <- X[,-cases]
warning(paste("Column ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) >= z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) >= z.warning)
X <- X[-cases,]
warning(paste("Row ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

if (imp.missing==FALSE) {if (nrow(dl)==1) dl <- matrix(rep(1,nn),ncol=1)%*%dl}
Expand Down
35 changes: 26 additions & 9 deletions R/lrEMplus.R
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
lrEMplus <- function(X, dl = NULL, rob = FALSE, ini.cov = c("complete.obs", "multRepl"), frac = 0.65,
tolerance = 0.0001, max.iter = 50,
rlm.maxit=150, suppress.print = FALSE, closure=NULL, z.warning=0.8, delta=NULL){
rlm.maxit=150, suppress.print = FALSE, closure=NULL, z.warning=0.8, z.delete=TRUE, delta=NULL){

if (any(X<0, na.rm=T)) stop("X contains negative values")
if (is.character(dl) || is.null(dl)) stop("dl must be a numeric vector or matrix")
if (is.character(dl)) stop("dl must be a numeric vector or matrix")
if (is.null(dl)){ # If dl not given use min per column
dl <- apply(X,2, function(x) min(x[x!=0]))
warning("No dl vector or matrix provided. The minimum observed values for each column used as detection limits.")
}
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
if ((is.vector(X)) | (nrow(X)==1)) stop("X must be a data matrix")
Expand Down Expand Up @@ -32,19 +36,32 @@ lrEMplus <- function(X, dl = NULL, rob = FALSE, ini.cov = c("complete.obs", "mul
X <- as.data.frame(apply(X,2,as.numeric),stringsAsFactors=TRUE)
c <- apply(X,1,sum,na.rm=TRUE)

# Number of zeros or missing per column for warning
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x) | (x==0)))
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) >= z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) >= z.warning)
X <- X[,-cases]
warning(paste("Column ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x) | (x==0)))
checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) >= z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) >= z.warning)
X <- X[-cases,]
warning(paste("Row ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

if (nrow(dl)==1) dl <- matrix(rep(1,nn),ncol=1)%*%dl
Expand Down
43 changes: 31 additions & 12 deletions R/lrSVD.R
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
lrSVD <- function(X, label = NULL, dl = NULL, frac = 0.65, ncp = 2,
imp.missing = FALSE, beta = 0.5, method = c("ridge", "EM"),
row.w = NULL, coeff.ridge = 1, threshold = 1e-4, seed = NULL, nb.init = 1,
max.iter = 1000, z.warning = 0.8, ...) {
max.iter = 1000, z.warning = 0.8, z.delete=TRUE, ...) {

if (any(X < 0, na.rm = T)) stop("X contains negative values")

if (imp.missing == FALSE) {
if (is.character(dl) || is.null(dl)) stop("dl must be a numeric vector or matrix")
if (is.vector(dl)) dl <- matrix(dl, nrow = 1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}

if ((is.vector(X)) | (nrow(X) == 1)) stop("X must be a data matrix")
if (is.null(label)) stop("A value for label must be given")
if (!is.na(label)) {
Expand All @@ -28,6 +22,17 @@ lrSVD <- function(X, label = NULL, dl = NULL, frac = 0.65, ncp = 2,
if (!any(is.na(X), na.rm = T))
stop(paste("Label", label, "was not found in the data set"))
}

if (imp.missing==FALSE){
if (is.character(dl)) stop("dl must be a numeric vector or matrix")
if (is.null(dl)){ # If dl not given use min per column
dl <- apply(X,2, function(x) min(x[x!=label]))
warning("No dl vector or matrix provided. The minimum observed values for each column used as detection limits.")
}
if (is.vector(dl)) dl <- matrix(dl,nrow=1)
dl <- as.matrix(dl) # Avoids problems when dl might be multiple classes
}

if (imp.missing == FALSE) {
if (ncol(dl) != ncol(X)) stop("The number of columns in X and dl do not agree")
if ((nrow(dl) > 1) & (nrow(dl) != nrow(X))) stop("The number of rows in X and dl do not agree")
Expand Down Expand Up @@ -323,15 +328,29 @@ lrSVD <- function(X, label = NULL, dl = NULL, frac = 0.65, ncp = 2,
checkNumZerosCol <- apply(X,2,function(x) sum(is.na(x)))
if (any(checkNumZerosCol/nrow(X) >= z.warning)) {
cases <- which(checkNumZerosCol/nrow(X) >= z.warning)
X <- X[,-cases]
warning(paste("Column ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

checkNumZerosRow <- apply(X,1,function(x) sum(is.na(x)))
if (any(checkNumZerosRow/ncol(X) >= z.warning)) {
cases <- which(checkNumZerosRow/ncol(X) >= z.warning)
X <- X[-cases,]
warning(paste("Row ",cases," containing more than ",z.warning*100,"% zeros/unobserved values was deleted (pre-check out using function zPatterns/modify threshold using argument z.warning).\n",sep=""))
if (z.delete == TRUE){
X <- X[,-cases]
action <- "deleted"
warning(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning).\n",sep=""))
}
else{
action <- "found"
stop(paste("Column no. ",cases," containing >",z.warning*100,"% zeros/unobserved values ",action," (can modify threshold using argument z.warning. Check out with zPatterns()).\n",sep=""))
}
}

# Check for closure
Expand Down
Loading

0 comments on commit 5bec33b

Please sign in to comment.