multiple cramer Pb with significative associations

orbisgis · Dec 2, 2024 · 95c9802 · 95c9802
1 parent 872ecef
commit 95c9802
Show file tree

Hide file tree

Showing 6 changed files with 200 additions and 56 deletions.
diff --git a/R/compareMultipleLCZ.R b/R/compareMultipleLCZ.R
@@ -19,7 +19,7 @@
 #' @examples
 #' 
 compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPerc=0.05){
-  echInt<-createIntersec(sfList = sfList, LCZcolumns = LCZcolumns , refCrs= refCrs, sfWf = sfWf)
+  echInt<-createIntersect(sfList = sfList, columns = LCZcolumns , refCrs= refCrs, sfWf = sfWf)
   print(nrow(echInt))
   echInt$area<-st_area(echInt)
   echInt <- echInt %>% subset(area>quantile(echInt$area, probs=trimPerc) & !is.na(area))
@@ -32,7 +32,7 @@ compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPer
       echIntnogeom[,compName]<-echIntnogeom[,i] == echIntnogeom[,j]
     }
   }
-  rangeCol<-(length(listSfs)+3):ncol(echIntnogeom)
+  rangeCol<-(length(sfList)+3):ncol(echIntnogeom)
   print(rangeCol)
   # print(names(echIntnogeom[,rangeCol]))
   echIntnogeom$nbAgree<-apply(echIntnogeom[,rangeCol],MARGIN=1,sum)
@@ -53,50 +53,3 @@ compareMultipleLCZ<-function(sfList, LCZcolumns, refCrs=NULL, sfWf=NULL, trimPer
 }
 
 
-sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
-                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
-class(sfBDT_11_78030)
-sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
-                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
-sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
-                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
-sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
-                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
-sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
-                               file ="wudapt_Auffargis.fgb", column="lcz_primary")
-
-sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
-             WUDAPT = sf_WUDAPT_78030)
-showLCZ(sfList[[1]])
-
-
-
-intersected<-createIntersec(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
-                            sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
-
-
-# test_list<-list(a=c(1,2),b="top",c=TRUE)
-# length(test_list)
-# for (i in test_list[2:3]) print(str(i))
-
-multicompare_test<-compareMultipleLCZ(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
-                                      sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"),trimPerc = 0.5)
-multicompare_test
-
-test<-multicompare_test$echIntLong
-test2<-test %>% subset(agree==TRUE) %>% group_by(LCZvalue) %>% summarize(agreementArea=sum(area)) %>% mutate(percAgreementArea=agreementArea/sum(agreementArea))
-
-test<-multicompare_test$echInt[,1:5] %>% st_drop_geometry()
-prov1<-apply(X = test, MARGIN = 1, table )
-prov2<-apply(X = test, MARGIN = 1, function(x) max(table(x)) )
-
-head(prov1)
-head(prov2)
-
-plot1<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT22", wf="22")
-plot2<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT11", wf="11")
-
-ggplot(data=multicompare_test$echInt) +
-  geom_sf(aes(fill=maxAgree, color=after_scale(fill)))+
-  scale_fill_gradient(low = "red" , high = "green", na.value = NA)
-
diff --git a/R/shinyGC/createIntersect.R → R/createIntersect.R b/R/shinyGC/createIntersect.R → R/createIntersect.R
@@ -1,15 +1,27 @@
-createIntersec<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
-  echInt<-sfList[[1]] %>% select(columns[1])
-  if (is.null(refCrs)){refCrs<-st_crs(echInt)}
+#' Intersect multiple sf files in which Local Climate Zones are set for polygon geometries 
+#' @param sfList a list which contains the classifications to compare, as sf objects
+#' @param columns a vector which contains, for each sf of sfList, the name of the column of the classification to compare
+#' @param refCrs a number which indicates which sf object from sfList will provide the CRS in which all the sf objects will be projected before comparison
+#' By defautl it is set to an empty string and no ID is loaded.
+#' @param sfWf a vector of strings which contains the names of the workflows used to produce the sf objects 
+#' @importFrom ggplot2 geom_sf guides ggtitle aes
+#' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
+#' @return an sf file with values of LCZ from all the input 
+#' are assigned to geometries resulting from intersection of all input geometries
+#' @export
+#' @examples
+createIntersect<-function(sfList, columns, refCrs=NULL, sfWf=NULL){
+  sfInt<-sfList[[1]] %>% select(columns[1])
+  if (is.null(refCrs)){refCrs<-st_crs(sfInt)}
   for (i in 2:length(sfList)){
     sfProv<-sfList[[i]] %>% select(columns[i])
     if (st_crs(sfProv) != refCrs ) {sfProv<-st_transform(sfProv, crs=refCrs)}
-    echInt<-st_intersection(echInt,sfProv)
+    sfInt<-st_intersection(sfInt,sfProv)
   }
   if (!is.null(sfWf) & length(sfWf) == length(sfList)){
-    names(echInt)[1:(ncol(echInt)-1)]<-paste0("LCZ",sfWf)
-  } else { names(echInt)[1:(ncol(echInt)-1)]<-paste0("LCZ",1:length(sfList)) }
-  echInt 
+    names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",sfWf)
+  } else { names(sfInt)[1:(ncol(sfInt)-1)]<-paste0("LCZ",1:length(sfList)) }
+  return(sfInt) 
 }
 
 # sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2011/bdtopo_2_78030",

diff --git a/R/multipleCramer.R b/R/multipleCramer.R
@@ -0,0 +1,55 @@
+#' Computes Cramer's V for all pairs of one hot encoded levels of input LCZ classifications
+#' @param sfInt an sf object with several LCZ classifications to compare on the same intersected geometries, 
+#' typically an output of the createIntersec function
+#' @param columns a vector which contains names of LCZ classification columns
+#' @importFrom ggplot2 geom_sf guides ggtitle aes
+#' @importFrom caret dummyVars
+#' @import sf dplyr cowplot forcats units tidyr RColorBrewer utils grDevices rlang
+#' @return an sf file with values of LCZ from all the input 
+#' are assigned to geometries resulting from intersection of all input geometries
+#' @export
+#' @examples
+multipleCramer<-function(sfInt, columns, nbOutAssociations ){ 
+   if("sf"%in%class(sfInt)){sfInt<-as.data.frame(st_drop_geometry(sfInt))}
+  sfInt<-sfInt[,columns]
+  formula_dummy<-reformulate(columns)
+  sfDummy<-caret::dummyVars(formula = formula_dummy, data = sfInt) %>% predict(newdata = sfInt) %>% as.data.frame
+  notAlwaysZero<-apply(X = sfDummy, MARGIN = 2, sum)!=0
+  sfDummy<-sfDummy[,notAlwaysZero]
+  # print(summary(sfDummy))
+
+
+  dimVs<-ncol(sfDummy)
+  Vs<-matrix(ncol = dimVs, nrow = dimVs)
+  for (i in 1:length(names(sfDummy))){
+    for (j in i:length(names(sfDummy))){
+      Vs[i,j]<-confintr::cramersv(chisq.test(table(sfDummy[,c(i,j)]), correct = FALSE))
+      # print(table(sfDummy[,c(i,j)]))
+    }
+  }
+  rownames(Vs)<-names(sfDummy)
+  colnames(Vs)<-names(sfDummy)
+
+  threshold <- Vs %>% as.vector %>% unique %>% sort(decreasing=TRUE) %>% head(n = (nbOutAssociations+1)) %>% min
+  print(paste0("threshold = ", threshold))
+
+  Mask<-Vs
+  Mask<-(Mask>=threshold)
+  Mask[Vs<threshold]<-NA
+  Mask[Vs==1]<- NA
+
+  keptRows<-apply(
+    X = apply(X = Mask, MARGIN = 1, is.na),
+    MARGIN = 1, sum)<ncol(Vs)
+  keptCols<-apply(
+    X = apply(X = Mask, MARGIN = 2, is.na ),
+    MARGIN = 2, sum)<nrow(Vs)
+  print("keptRows = ") ; print(keptRows)
+  print("keptCols = ") ; print(keptCols)
+  signifAssoc<-Vs[keptRows, keptCols]
+  signifAssoc[(signifAssoc>=1 | signifAssoc<threshold)]<-NA
+
+  output<-list(cramerMatrix = Vs, signifAssoc = signifAssoc)
+
+  return(output)
+}
diff --git a/inst/tinytest/test_compareMultiple.R b/inst/tinytest/test_compareMultiple.R
@@ -0,0 +1,52 @@
+# This tests the function createIntersect
+# library(tinytest)
+#
+# library(sf)
+
+sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
+                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+class(sfBDT_11_78030)
+sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
+                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
+                               file ="wudapt_Auffargis.fgb", column="lcz_primary")
+
+sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
+             WUDAPT = sf_WUDAPT_78030)
+# showLCZ(sfList[[1]])
+
+
+
+intersected<-createIntersect(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
+                             sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
+names(intersected) %>% reformulate()
+
+
+# test_list<-list(a=c(1,2),b="top",c=TRUE)
+# length(test_list)
+# for (i in test_list[2:3]) print(str(i))
+
+multicompare_test<-compareMultipleLCZ(sfList = sfList, LCZcolumns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
+                                      sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"),trimPerc = 0.5)
+multicompare_test
+
+test<-multicompare_test$echIntLong
+test2<-test %>% subset(agree==TRUE) %>% group_by(LCZvalue) %>% summarize(agreementArea=sum(area)) %>% mutate(percAgreementArea=agreementArea/sum(agreementArea))
+
+test<-multicompare_test$echInt[,1:5] %>% st_drop_geometry()
+prov1<-apply(X = test, MARGIN = 1, table )
+prov2<-apply(X = test, MARGIN = 1, function(x) max(table(x)) )
+
+head(prov1)
+head(prov2)
+
+plot1<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT22", wf="22")
+plot2<-showLCZ(sf = multicompare_test$echInt, column="LCZBDT11", wf="11")
+
+ggplot(data=multicompare_test$echInt) +
+  geom_sf(aes(fill=nbAgree, color=after_scale(fill)))+
+  scale_fill_gradient(low = "red" , high = "green", na.value = NA)
diff --git a/inst/tinytest/test_createIntersect.R b/inst/tinytest/test_createIntersect.R
@@ -0,0 +1,21 @@
+# This tests the function createIntersect
+# library(tinytest)
+#
+# library(sf)
+
+sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2011/bdtopo_2_78030",
+                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+class(sfBDT_11_78030)
+sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/BDT/2022/bdtopo_3_78030",
+                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_11_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/OSM/2011/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/OSM/2022/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/0_DocBiblioTutosPublis/0_ArticlesScientEtThèses/ArticleComparaisonLCZGCWUDAPTEXPERTS/WUDAPT", 
+                               file ="wudapt_78030.geojson", column="lcz_primary")
+
+sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis, 
+             WUDAPT = sf_WUDAPT_78030)
+intersected<-createIntersec(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"), 
+                            sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
diff --git a/inst/tinytest/test_multipleCramer.R b/inst/tinytest/test_multipleCramer.R
@@ -0,0 +1,51 @@
+# This tests the function multipleCramer
+# library(tinytest)
+#
+# library(sf)
+
+sfBDT_11_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/bdtopo_2_78030/",
+                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+class(sfBDT_11_78030)
+sfBDT_22_78030<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/bdtopo_3_78030/",
+                              file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_11_Auffargis<-importLCZvect(dirPath="//home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2011/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_OSM_22_Auffargis<-importLCZvect(dirPath="/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/GeoClimate/2022/osm_Auffargis/",
+                                   file="rsu_lcz.fgb", column="LCZ_PRIMARY")
+sf_WUDAPT_78030<-importLCZvect("/home/gousseff/Documents/3_data/data_article_LCZ_diff_algos/WUDAPT/",
+                               file ="wudapt_Auffargis.fgb", column="lcz_primary")
+
+sfList<-list(BDT11 = sfBDT_11_78030, BDT22 = sfBDT_22_78030, OSM11= sf_OSM_11_Auffargis, OSM22 = sf_OSM_22_Auffargis,
+             WUDAPT = sf_WUDAPT_78030)
+# showLCZ(sfList[[1]])
+
+
+
+intersected<-createIntersect(sfList = sfList, columns = c(rep("LCZ_PRIMARY",4),"lcz_primary"),
+                             sfWf = c("BDT11","BDT22","OSM11","OSM22","WUDAPT"))
+intersectedSample
+
+
+testVs<-multipleCramer(intersected, columns = names(intersected)[names(intersected)!="geometry"], nbOutAssociations = 5)
+testVs$signifAssoc
+str(testVs)
+
+names6<-grep(x = rownames(testVs), pattern = "6$", value = TRUE)
+
+testVsSampled<-testVs[1:6,1:6]
+
+testMask<-testVsSampled
+ncol(testVsSampled)
+nrow(testVsSampled)
+testMask<-(testMask>0.004 & testMask<1)
+testMask[!testMask]<-NA
+
+keptRows<-apply(
+  X = apply(X = testMask, MARGIN = 1, is.na), 
+  MARGIN = 1, sum)<ncol(testVs)
+keptCols<-apply(
+  X = apply(X = testMask, MARGIN = 2, is.na ),
+  MARGIN = 2, sum)<nrow(testVs)
+signifAssoc<-testVsSampled[keptRows, keptCols]
+signifAssoc[(signifAssoc>=1 | signifAssoc<0.004)]<-NA
+