Skip to content

Commit

Permalink
multiple cramer Pb with significative associations
Browse files Browse the repository at this point in the history
  • Loading branch information
MGousseff committed Dec 2, 2024
1 parent 872ecef commit 95c9802
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 56 deletions.
51 changes: 2 additions & 49 deletions R/compareMultipleLCZ.R
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#' @examples
#'
compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPerc=0.05){
echInt<-createIntersec(sfList = sfList, LCZcolumns = LCZcolumns , refCrs= refCrs, sfWf = sfWf)
echInt<-createIntersect(sfList = sfList, columns = LCZcolumns , refCrs= refCrs, sfWf = sfWf)
print(nrow(echInt))
echInt$area<-st_area(echInt)
echInt <- echInt %>% subset(area>quantile(echInt$area, probs=trimPerc) & !is.na(area))
Expand All @@ -32,7 +32,7 @@ compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPer
echIntnogeom[,compName]<-echIntnogeom[,i] == echIntnogeom[,j]
}
}
rangeCol<-(length(listSfs)+3):ncol(echIntnogeom)
rangeCol<-(length(sfList)+3):ncol(echIntnogeom)
print(rangeCol)
# print(names(echIntnogeom[,rangeCol]))
echIntnogeom$nbAgree<-apply(echIntnogeom[,rangeCol],MARGIN=1,sum)
Expand All @@ -53,50 +53,3 @@ compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPer
}


sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
class(sfBDT_11_78030)
sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
file ="wudapt_Auffargis.fgb", column="lcz_primary")

sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
WUDAPT = sf_WUDAPT_78030)
showLCZ(sfList[[1]])



intersected<-createIntersec(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))


# test_list<-list(a=c(1,2),b="top",c=TRUE)
# length(test_list)
# for (i in test_list[2:3]) print(str(i))

multicompare_test<-compareMultipleLCZ(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"),trimPerc = 0.5)
multicompare_test

test<-multicompare_test$echIntLong
test2<-test %>% subset(agree==TRUE) %>% group_by(LCZvalue) %>% summarize(agreementArea=sum(area)) %>% mutate(percAgreementArea=agreementArea/sum(agreementArea))

test<-multicompare_test$echInt[,1:5] %>% st_drop_geometry()
prov1<-apply(X = test, MARGIN = 1, table )
prov2<-apply(X = test, MARGIN = 1, function(x) max(table(x)) )

head(prov1)
head(prov2)

plot1<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT22", wf="22")
plot2<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT11", wf="11")

ggplot(data=multicompare_test$echInt) +
geom_sf(aes(fill=maxAgree, color=after_scale(fill)))+
scale_fill_gradient(low = "red" , high = "green", na.value = NA)

26 changes: 19 additions & 7 deletions R/shinyGC/createIntersect.R → R/createIntersect.R
Original file line number Diff line number Diff line change
@@ -1,15 +1,27 @@
createIntersec<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
echInt<-sfList[[1]] %>% select(columns[1])
if (is.null(refCrs)){refCrs<-st_crs(echInt)}
#' Intersect multiple sf files in which Local Climate Zones are set for polygon geometries
#' @param sfList a list which contains the classifications to compare, as sf objects
#' @param columns a vector which contains, for each sf of sfList, the name of the column of the classification to compare
#' @param refCrs a number which indicates which sf object from sfList will provide the CRS in which all the sf objects will be projected before comparison
#' By defautl it is set to an empty string and no ID is loaded.
#' @param sfWf a vector of strings which contains the names of the workflows used to produce the sf objects
#' @importFrom ggplot2 geom_sf guides ggtitle aes
#' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
#' @return an sf file with values of LCZ from all the input
#' are assigned to geometries resulting from intersection of all input geometries
#' @export
#' @examples
createIntersect<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
sfInt<-sfList[[1]] %>% select(columns[1])
if (is.null(refCrs)){refCrs<-st_crs(sfInt)}
for (i in 2:length(sfList)){
sfProv<-sfList[[i]] %>% select(columns[i])
if (st_crs(sfProv) != refCrs ) {sfProv<-st_transform(sfProv, crs=refCrs)}
echInt<-st_intersection(echInt,sfProv)
sfInt<-st_intersection(sfInt,sfProv)
}
if (!is.null(sfWf) & length(sfWf) == length(sfList)){
names(echInt)[1:(ncol(echInt)-1)]<-paste0("LCZ",sfWf)
} else { names(echInt)[1:(ncol(echInt)-1)]<-paste0("LCZ",1:length(sfList)) }
echInt
names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",sfWf)
} else { names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",1:length(sfList)) }
return(sfInt)
}

# sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2011/bdtopo_2_78030",
Expand Down
55 changes: 55 additions & 0 deletions R/multipleCramer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
#' Computes Cramer's V for all pairs of one hot encoded levels of input LCZ classifications
#' @param sfInt an sf object with several LCZ classifications to compare on the same intersected geometries,
#' typically an output of the createIntersec function
#' @param columns a vector which contains names of LCZ classification columns
#' @importFrom ggplot2 geom_sf guides ggtitle aes
#' @importFrom caret dummyVars
#' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
#' @return an sf file with values of LCZ from all the input
#' are assigned to geometries resulting from intersection of all input geometries
#' @export
#' @examples
multipleCramer<-function(sfInt, columns, nbOutAssociations ){
if("sf"%in%class(sfInt)){sfInt<-as.data.frame(st_drop_geometry(sfInt))}
sfInt<-sfInt[,columns]
formula_dummy<-reformulate(columns)
sfDummy<-caret::dummyVars(formula = formula_dummy, data = sfInt) %>% predict(newdata = sfInt) %>% as.data.frame
notAlwaysZero<-apply(X = sfDummy, MARGIN = 2, sum)!=0
sfDummy<-sfDummy[,notAlwaysZero]
# print(summary(sfDummy))


dimVs<-ncol(sfDummy)
Vs<-matrix(ncol = dimVs, nrow = dimVs)
for (i in 1:length(names(sfDummy))){
for (j in i:length(names(sfDummy))){
Vs[i,j]<-confintr::cramersv(chisq.test(table(sfDummy[,c(i,j)]), correct = FALSE))
# print(table(sfDummy[,c(i,j)]))
}
}
rownames(Vs)<-names(sfDummy)
colnames(Vs)<-names(sfDummy)

threshold <- Vs %>% as.vector %>% unique %>% sort(decreasing=TRUE) %>% head(n = (nbOutAssociations+1)) %>% min
print(paste0("threshold = ", threshold))

Mask<-Vs
Mask<-(Mask>=threshold)
Mask[Vs<threshold]<-NA
Mask[Vs==1]<- NA

keptRows<-apply(
X = apply(X = Mask, MARGIN = 1, is.na),
MARGIN = 1, sum)<ncol(Vs)
keptCols<-apply(
X = apply(X = Mask, MARGIN = 2, is.na ),
MARGIN = 2, sum)<nrow(Vs)
print("keptRows = ") ; print(keptRows)
print("keptCols = ") ; print(keptCols)
signifAssoc<-Vs[keptRows, keptCols]
signifAssoc[(signifAssoc>=1 | signifAssoc<threshold)]<-NA

output<-list(cramerMatrix = Vs, signifAssoc = signifAssoc)

return(output)
}
52 changes: 52 additions & 0 deletions inst/tinytest/test_compareMultiple.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# This tests the function createIntersect
# library(tinytest)
#
# library(sf)

sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
class(sfBDT_11_78030)
sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
file ="wudapt_Auffargis.fgb", column="lcz_primary")

sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
WUDAPT = sf_WUDAPT_78030)
# showLCZ(sfList[[1]])



intersected<-createIntersect(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
names(intersected) %>% reformulate()


# test_list<-list(a=c(1,2),b="top",c=TRUE)
# length(test_list)
# for (i in test_list[2:3]) print(str(i))

multicompare_test<-compareMultipleLCZ(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"),trimPerc = 0.5)
multicompare_test

test<-multicompare_test$echIntLong
test2<-test %>% subset(agree==TRUE) %>% group_by(LCZvalue) %>% summarize(agreementArea=sum(area)) %>% mutate(percAgreementArea=agreementArea/sum(agreementArea))

test<-multicompare_test$echInt[,1:5] %>% st_drop_geometry()
prov1<-apply(X = test, MARGIN = 1, table )
prov2<-apply(X = test, MARGIN = 1, function(x) max(table(x)) )

head(prov1)
head(prov2)

plot1<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT22", wf="22")
plot2<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT11", wf="11")

ggplot(data=multicompare_test$echInt) +
geom_sf(aes(fill=nbAgree, color=after_scale(fill)))+
scale_fill_gradient(low = "red" , high = "green", na.value = NA)
21 changes: 21 additions & 0 deletions inst/tinytest/test_createIntersect.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# This tests the function createIntersect
# library(tinytest)
#
# library(sf)

sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2011/bdtopo_2_78030",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
class(sfBDT_11_78030)
sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2022/bdtopo_3_78030",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_11_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/OSM/2011/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/OSM/2022/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/WUDAPT",
file ="wudapt_78030.geojson", column="lcz_primary")

sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
WUDAPT = sf_WUDAPT_78030)
intersected<-createIntersec(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
51 changes: 51 additions & 0 deletions inst/tinytest/test_multipleCramer.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
# This tests the function multipleCramer
# library(tinytest)
#
# library(sf)

sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
class(sfBDT_11_78030)
sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
file="rsu_lcz.fgb", column="LCZ_PRIMARY")
sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
file ="wudapt_Auffargis.fgb", column="lcz_primary")

sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
WUDAPT = sf_WUDAPT_78030)
# showLCZ(sfList[[1]])



intersected<-createIntersect(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
intersectedSample


testVs<-multipleCramer(intersected, columns = names(intersected)[names(intersected)!="geometry"], nbOutAssociations = 5)
testVs$signifAssoc
str(testVs)

names6<-grep(x = rownames(testVs), pattern = "6$", value = TRUE)

testVsSampled<-testVs[1:6,1:6]

testMask<-testVsSampled
ncol(testVsSampled)
nrow(testVsSampled)
testMask<-(testMask>0.004 & testMask<1)
testMask[!testMask]<-NA

keptRows<-apply(
X = apply(X = testMask, MARGIN = 1, is.na),
MARGIN = 1, sum)<ncol(testVs)
keptCols<-apply(
X = apply(X = testMask, MARGIN = 2, is.na ),
MARGIN = 2, sum)<nrow(testVs)
signifAssoc<-testVsSampled[keptRows, keptCols]
signifAssoc[(signifAssoc>=1 | signifAssoc<0.004)]<-NA

0 comments on commit 95c9802

Please sign in to comment.