Skip to content

Commit

Permalink
multipleCramer produces Cramer s V between all pairs
Browse files Browse the repository at this point in the history
of levels of LCZ given by any workflow
it also returns the n most significant associations
that is pair of different levels from different workflows who yet show a quite strong association
  • Loading branch information
MGousseff committed Dec 3, 2024
1 parent 95c9802 commit b99be38
Show file tree
Hide file tree
Showing 6 changed files with 79 additions and 61 deletions.
8 changes: 1 addition & 7 deletions R/compareLCZ.R
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ compareLCZ<-function(sf1,geomID1="",column1,confid1="",wf1="bdtopo_2_2",
write.table(x=echIntExpo, file =nom, append = TRUE, quote = TRUE, sep = ";",
eol = "\n", na = "NA", dec = ".",
qmethod = c("escape", "double"),
fileEncoding = "", col.names=FALSE,row.names=F)
fileEncoding = "", col.names=FALSE, row.names=F)
}
}
###################################################
Expand Down Expand Up @@ -393,12 +393,6 @@ areas<-matConfOut$areas
percAgg<-matConfOut$percAgg








################################################
# GRAPHICS
################################################
Expand Down
2 changes: 0 additions & 2 deletions R/compareMultipleLCZ.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPerc=0.05){
echInt<-createIntersect(sfList = sfList, columns = LCZcolumns , refCrs= refCrs, sfWf = sfWf)
print(nrow(echInt))
echInt$area<-st_area(echInt)
echInt <- echInt %>% subset(area>quantile(echInt$area, probs=trimPerc) & !is.na(area))
print(nrow(echInt))
echIntnogeom<-st_drop_geometry(echInt)
for (i in 1:(length(sfList) - 1)) {
for(j in (i+1):length(sfList)){
Expand Down
6 changes: 4 additions & 2 deletions R/createIntersect.R
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
#' are assigned to geometries resulting from intersection of all input geometries
#' @export
#' @examples
createIntersect<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
createIntersect<-function(sfList, columns, refCrs=NULL, sfWf=NULL, minZeroArea=0.0001){
sfInt<-sfList[[1]] %>% select(columns[1])
if (is.null(refCrs)){refCrs<-st_crs(sfInt)}
for (i in 2:length(sfList)){
Expand All @@ -19,8 +19,10 @@ createIntersect<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
sfInt<-st_intersection(sfInt,sfProv)
}
if (!is.null(sfWf) & length(sfWf) == length(sfList)){
names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",sfWf)
names(sfInt)[1:(ncol(sfInt)-1)]<-sfWf
} else { names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",1:length(sfList)) }
sfInt$area<-units::drop_units(st_area(sfInt$geometry))
sfInt<-sfInt[sfInt$area>minZeroArea,]
return(sfInt)
}

Expand Down
55 changes: 33 additions & 22 deletions R/multipleCramer.R
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
#' Computes Cramer's V for all pairs of one hot encoded levels of input LCZ classifications
#' @param sfInt an sf object with several LCZ classifications to compare on the same intersected geometries,
#' typically an output of the createIntersec function
#' Computes Cramer's V for all pairs of onehot encoded levels of input LCZ classifications
#' @param sfInt an sf object with several LCZ classifications on the same (intersected) geometries,
#' typically an output of the createIntersect function
#' @param columns a vector which contains names of LCZ classification columns
#' @param nbOutAssociations the number of significant associations we want to extract, ie pair of levels
#' from two different workflows whose Cramer's V is high (association between LCZ 101 from workflow 1
#' and LCZ 101 from workflow 2 are exclude, for instance, but can be read from the cramerLong output)
#' @importFrom ggplot2 geom_sf guides ggtitle aes
#' @importFrom caret dummyVars
#' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
#' @return an sf file with values of LCZ from all the input
#' are assigned to geometries resulting from intersection of all input geometries
#' @return Cramer's V between pairs of levels, in a matrix (cramerMatrix) or long form (cramerLong),
#' and a dataframe with the nbOutAssociation most significant association
#' @export
#' @examples
multipleCramer<-function(sfInt, columns, nbOutAssociations ){
Expand All @@ -27,29 +30,37 @@ multipleCramer<-function(sfInt, columns, nbOutAssociations ){
# print(table(sfDummy[,c(i,j)]))
}
}

rownames(Vs)<-names(sfDummy)
colnames(Vs)<-names(sfDummy)

threshold <- Vs %>% as.vector %>% unique %>% sort(decreasing=TRUE) %>% head(n = (nbOutAssociations+1)) %>% min
print(paste0("threshold = ", threshold))
VsLong<-data.frame(
LCZ_type_1 = rownames(Vs),
LCZ_type_2 = rep(colnames(Vs), each = nrow(Vs)),
cramerVs = as.vector(Vs)) %>% arrange(desc(cramerVs))%>% na.omit()

Mask<-Vs
Mask<-(Mask>=threshold)
Mask[Vs<threshold]<-NA
Mask[Vs==1]<- NA
# Remove association between modalities of a same LCZ classification
VsLong$wf1<-gsub(x = VsLong$LCZ_type_1,
pattern = "(\\w[0-9]*)\\.([0-9]+)", replacement = "\\1")
VsLong$wf2<-gsub(x = VsLong$LCZ_type_2,
pattern="(\\w[0-9]*)\\.([0-9]+)", replacement = "\\1")
VsLong$LCZ1<-gsub(x = VsLong$LCZ_type_1,
pattern = "([A-Za-z]*\\d*)(\\.)(\\d+)", replacement = "\\3")
VsLong$LCZ2<-gsub(x = VsLong$LCZ_type_2,
pattern = "([A-Za-z]*\\d*)(\\.)([0-9]+)", replacement = "\\3")
VsLong<-VsLong[,c("wf1", "LCZ1", "wf2", "LCZ2", "cramerVs", "LCZ_type_1","LCZ_type_2")]
VsLong<-VsLong[VsLong$wf1!=VsLong$wf2,]


keptRows<-apply(
X = apply(X = Mask, MARGIN = 1, is.na),
MARGIN = 1, sum)<ncol(Vs)
keptCols<-apply(
X = apply(X = Mask, MARGIN = 2, is.na ),
MARGIN = 2, sum)<nrow(Vs)
print("keptRows = ") ; print(keptRows)
print("keptCols = ") ; print(keptCols)
signifAssoc<-Vs[keptRows, keptCols]
signifAssoc[(signifAssoc>=1 | signifAssoc<threshold)]<-NA

VsLong<-VsLong[VsLong$LCZ_type_1!=VsLong$LCZ_type_2,]
threshold <-
VsLong$cramerVs[VsLong$LCZ1!=VsLong$LCZ2] %>% as.vector %>%
unique %>% sort(decreasing=TRUE) %>% head(n = (nbOutAssociations+1)) %>% min
signifAssoc<-VsLong[VsLong$cramerVs>=threshold & VsLong$LCZ1!=VsLong$LCZ2,]
print(paste0("threshold = ", threshold))

output<-list(cramerMatrix = Vs, signifAssoc = signifAssoc)
output<-list(cramerMatrix = Vs, cramerLong = VsLong, threshold = threshold, signifAssoc = signifAssoc)

return(output)
}
10 changes: 6 additions & 4 deletions inst/tinytest/test_compareMultiple.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,11 @@ prov2<-apply(X = test, MARGIN = 1, function(x) max(table(x)) )
head(prov1)
head(prov2)

plot1<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT22", wf="22")
plot2<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT11", wf="11")

ggplot(data=multicompare_test$echInt) +
plot1<-showLCZ(sf = multicompare_test$echInt, column="BDT22", wf="BDT22")
plot2<-showLCZ(sf = multicompare_test$echInt, column="BDT11", wf="BDT1111")
plot3<-showLCZ(sf = multicompare_test$echInt, column="OSM22", wf="OSM22")
plot4<-showLCZ(sf = multicompare_test$echInt, column="WUDAPT", wf="WUDAPT")
plot5<-ggplot(data=multicompare_test$echInt) +
geom_sf(aes(fill=nbAgree, color=after_scale(fill)))+
scale_fill_gradient(low = "red" , high = "green", na.value = NA)
plot_grid(plot1, plot2, plot3, plot4, plot5)
59 changes: 35 additions & 24 deletions inst/tinytest/test_multipleCramer.R
Original file line number Diff line number Diff line change
Expand Up @@ -22,30 +22,41 @@ sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Au


intersected<-createIntersect(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
intersectedSample
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"), minZeroArea=0.1)
summary(intersected$area)
min(intersected$area)
intersected[intersected$area==0,]


testVs<-multipleCramer(intersected, columns = names(intersected)[names(intersected)!="geometry"], nbOutAssociations = 5)
testVs<-multipleCramer(intersected, columns = names(intersected)[names(intersected)!="geometry" &names(intersected)!="area"],
nbOutAssociations = 30)
testVs$signifAssoc
str(testVs)

names6<-grep(x = rownames(testVs), pattern = "6$", value = TRUE)

testVsSampled<-testVs[1:6,1:6]

testMask<-testVsSampled
ncol(testVsSampled)
nrow(testVsSampled)
testMask<-(testMask>0.004 & testMask<1)
testMask[!testMask]<-NA

keptRows<-apply(
X = apply(X = testMask, MARGIN = 1, is.na),
MARGIN = 1, sum)<ncol(testVs)
keptCols<-apply(
X = apply(X = testMask, MARGIN = 2, is.na ),
MARGIN = 2, sum)<nrow(testVs)
signifAssoc<-testVsSampled[keptRows, keptCols]
signifAssoc[(signifAssoc>=1 | signifAssoc<0.004)]<-NA
testVs$cramerLong %>% head(10)

intersectedDf<-st_drop_geometry(intersected)
str(intersectedDf)
summary(intersectedDf$area)
min(intersectedDf$area)
dataTest<-intersectedDf[ ,-6]
dataTest <- as.data.frame(lapply(X = dataTest, factor))
summary(dataTest)
str(dataTest)
weights<-(intersectedDf$area/sum(intersectedDf$area))
length(weights)
auffargisMCA<-MCA(X = dataTest[, names(dataTest)!="area"], ncp = 5, row.w = weights)
plot.MCA(auffargisMCA, invisible = c("ind"))

dataTestNo107<-dataTest[apply(dataTest, 1, function(x) all(x!="107")),]
nrow(dataTestNo107)
weightsNo107<-(intersectedDf$area/sum(intersectedDf$area))[
apply(dataTest, 1, function(x) all(x!="107"))]
length(weightsNo107)

auffargisMCANo107<-MCA(X = dataTestNo107[,names(dataTest)!="area"], ncp = 5) #, row.w = weightsNo107)
plot.MCA(auffargisMCANo107, invisible= c("ind"))
auffargisMCANo107Weights<-MCA(X = dataTestNo107[,names(dataTest)!="area"], ncp = 5, row.w = weightsNo107)
plot.MCA(auffargisMCANo107Weights, invisible= c("ind"))


str(auffargisMCANo107)


0 comments on commit b99be38

Please sign in to comment.