Skip to content

Commit

Permalink
.
Browse files Browse the repository at this point in the history
  • Loading branch information
thornoe committed Feb 19, 2024
1 parent 80ff9ac commit 63e81a1
Show file tree
Hide file tree
Showing 12 changed files with 115 additions and 44 deletions.
4 changes: 2 additions & 2 deletions gis/coastal_CV.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,10 +36,10 @@
"blue": "#4477AA",
"cyan": "#66CCEE",
"green": "#228833",
"grey": "#BBBBBB", # moved up to be used for eco status of observed coastal waters
"yellow": "#CCBB44",
"red": "#EE6677",
"purple": "#AA3377",
"grey": "#BBBBBB", # moved up to be used for eco status of observed coastal waters
}

# Set the default property-cycle and figure size for pyplots
Expand Down Expand Up @@ -300,7 +300,7 @@ def stepwise_selection(subset, dummies, data, dfDummies, years):


########################################################################################
# 2. Multivariate feature imputation (note: Forward Stepwise Selection takes ~6 hours)
# 2. Multivariate feature imputation (note: Forward Stepwise Selection takes ~1 day)
########################################################################################
# Forward stepwise selection of dummies - CV over all observed values in coastal waters
kwargs = {
Expand Down
38 changes: 38 additions & 0 deletions gis/output/coastal_eco_imp_LessThanGood.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
,n,Obs,No dummies,Sediment,"Sediment, Deep"
1988,37.0,1.0,1.0,1.0,1.0
1989,59.0,0.9152542372881356,0.9166666666666666,0.9259259259259259,0.9259259259259259
1990,58.0,0.8793103448275862,0.8981481481481481,0.8888888888888888,0.8888888888888888
1991,61.0,0.9508196721311475,0.9537037037037037,0.9537037037037037,0.9537037037037037
1992,63.0,0.9206349206349206,0.9537037037037037,0.9537037037037037,0.9537037037037037
1993,61.0,0.8688524590163934,0.9166666666666666,0.9074074074074074,0.9074074074074074
1994,63.0,0.9365079365079365,0.9537037037037037,0.9537037037037037,0.9537037037037037
1995,60.0,0.9,0.9259259259259259,0.9259259259259259,0.9351851851851852
1996,61.0,0.8360655737704918,0.8796296296296297,0.8796296296296297,0.8888888888888888
1997,60.0,0.8833333333333333,0.9351851851851852,0.9259259259259259,0.9259259259259259
1998,53.0,0.9056603773584906,0.9166666666666666,0.9074074074074074,0.9074074074074074
1999,56.0,0.8928571428571429,0.9259259259259259,0.8796296296296297,0.8981481481481481
2000,55.0,0.8727272727272727,0.8981481481481481,0.8981481481481481,0.8981481481481481
2001,55.0,0.8363636363636363,0.8888888888888888,0.8796296296296297,0.8888888888888888
2002,58.0,0.8793103448275862,0.8888888888888888,0.8888888888888888,0.8796296296296297
2003,57.0,0.8947368421052632,0.9074074074074074,0.8981481481481481,0.9166666666666666
2004,53.0,0.8490566037735849,0.9074074074074074,0.9074074074074074,0.9074074074074074
2005,56.0,0.8214285714285714,0.8703703703703703,0.8703703703703703,0.8703703703703703
2006,57.0,0.9122807017543859,0.9351851851851852,0.9351851851851852,0.9351851851851852
2007,52.0,0.8846153846153846,0.8981481481481481,0.9074074074074074,0.8981481481481481
2008,36.0,0.8333333333333334,0.8981481481481481,0.8888888888888888,0.8888888888888888
2009,40.0,0.725,0.8333333333333334,0.8333333333333334,0.8425925925925926
2010,35.0,0.8,0.8703703703703703,0.8611111111111112,0.8703703703703703
2011,55.0,0.8727272727272727,0.8981481481481481,0.8981481481481481,0.8981481481481481
2012,62.0,0.6129032258064516,0.7037037037037037,0.6944444444444444,0.7037037037037037
2013,55.0,0.7636363636363637,0.8240740740740741,0.8055555555555556,0.7962962962962963
2014,57.0,0.7192982456140351,0.7777777777777778,0.7777777777777778,0.7870370370370371
2015,55.0,0.6727272727272727,0.75,0.75,0.7685185185185185
2016,73.0,0.863013698630137,0.9074074074074074,0.9074074074074074,0.8981481481481481
2017,67.0,0.7313432835820896,0.7870370370370371,0.7962962962962963,0.7870370370370371
2018,65.0,0.8461538461538461,0.8518518518518519,0.8518518518518519,0.8518518518518519
2019,69.0,0.7971014492753623,0.7870370370370371,0.7962962962962963,0.7870370370370371
2020,67.0,0.8208955223880597,0.8425925925925926,0.8425925925925926,0.8518518518518519
2021,66.0,0.8636363636363636,0.8888888888888888,0.8888888888888888,0.8981481481481481
2022,47.0,0.8723404255319149,0.8796296296296297,0.8703703703703703,0.8796296296296297
2023,62.0,0.8709677419354839,0.8611111111111112,0.8796296296296297,0.8796296296296297
Total,2046.0,0.8470185728250245,0.8798468556533073,0.8773894862604539,0.8797065638463487
Binary file modified gis/output/coastal_eco_imp_LessThanGood.pdf
Binary file not shown.
69 changes: 34 additions & 35 deletions gis/output/coastal_eco_imp_accuracy.csv
Original file line number Diff line number Diff line change
@@ -1,35 +1,34 @@
,n,No dummies,No,K,B,Ø,Fj,Vf,Vu,F,D,L,Se,Sa,T,DK2
1989,59,0.593220339,0.610169492,0.610169492,0.593220339,0.576271186,0.627118644,0.627118644,0.677966102,0.644067797,0.593220339,0.644067797,0.593220339,0.610169492,0.610169492,0.627118644
1990,58,0.568965517,0.534482759,0.568965517,0.568965517,0.551724138,0.5,0.586206897,0.586206897,0.448275862,0.568965517,0.534482759,0.517241379,0.482758621,0.534482759,0.517241379
1991,61,0.590163934,0.62295082,0.590163934,0.606557377,0.590163934,0.62295082,0.606557377,0.590163934,0.573770492,0.573770492,0.62295082,0.590163934,0.540983607,0.62295082,0.557377049
1992,63,0.587301587,0.619047619,0.603174603,0.650793651,0.619047619,0.619047619,0.603174603,0.587301587,0.603174603,0.619047619,0.53968254,0.619047619,0.571428571,0.619047619,0.587301587
1993,61,0.540983607,0.606557377,0.573770492,0.540983607,0.573770492,0.508196721,0.590163934,0.62295082,0.62295082,0.557377049,0.606557377,0.606557377,0.557377049,0.606557377,0.524590164
1994,63,0.587301587,0.587301587,0.571428571,0.634920635,0.603174603,0.634920635,0.619047619,0.603174603,0.587301587,0.571428571,0.603174603,0.571428571,0.555555556,0.587301587,0.555555556
1995,60,0.5,0.516666667,0.45,0.433333333,0.483333333,0.4,0.533333333,0.45,0.433333333,0.466666667,0.5,0.533333333,0.466666667,0.516666667,0.5
1996,61,0.540983607,0.62295082,0.590163934,0.573770492,0.606557377,0.508196721,0.540983607,0.573770492,0.508196721,0.557377049,0.639344262,0.573770492,0.590163934,0.62295082,0.590163934
1997,60,0.5,0.516666667,0.516666667,0.5,0.516666667,0.516666667,0.533333333,0.5,0.483333333,0.583333333,0.45,0.483333333,0.516666667,0.516666667,0.483333333
1998,53,0.566037736,0.603773585,0.603773585,0.622641509,0.622641509,0.509433962,0.603773585,0.58490566,0.603773585,0.641509434,0.603773585,0.622641509,0.547169811,0.603773585,0.547169811
1999,56,0.428571429,0.5,0.464285714,0.482142857,0.428571429,0.517857143,0.464285714,0.482142857,0.446428571,0.5,0.410714286,0.553571429,0.517857143,0.5,0.410714286
2000,55,0.636363636,0.654545455,0.636363636,0.672727273,0.654545455,0.672727273,0.672727273,0.618181818,0.672727273,0.6,0.654545455,0.618181818,0.636363636,0.654545455,0.636363636
2001,55,0.763636364,0.781818182,0.763636364,0.8,0.763636364,0.763636364,0.8,0.836363636,0.745454545,0.745454545,0.818181818,0.818181818,0.763636364,0.781818182,0.8
2002,58,0.775862069,0.793103448,0.793103448,0.793103448,0.810344828,0.775862069,0.775862069,0.793103448,0.724137931,0.810344828,0.775862069,0.810344828,0.75862069,0.793103448,0.75862069
2003,57,0.456140351,0.596491228,0.526315789,0.561403509,0.456140351,0.526315789,0.526315789,0.543859649,0.543859649,0.543859649,0.561403509,0.561403509,0.438596491,0.596491228,0.543859649
2004,53,0.547169811,0.528301887,0.528301887,0.58490566,0.509433962,0.452830189,0.528301887,0.547169811,0.547169811,0.509433962,0.528301887,0.566037736,0.547169811,0.528301887,0.528301887
2005,56,0.517857143,0.553571429,0.535714286,0.625,0.482142857,0.607142857,0.5,0.553571429,0.553571429,0.589285714,0.607142857,0.660714286,0.482142857,0.553571429,0.464285714
2006,57,0.473684211,0.49122807,0.438596491,0.526315789,0.456140351,0.526315789,0.50877193,0.49122807,0.456140351,0.526315789,0.438596491,0.50877193,0.49122807,0.49122807,0.50877193
2007,52,0.480769231,0.557692308,0.5,0.596153846,0.538461538,0.519230769,0.557692308,0.576923077,0.519230769,0.5,0.480769231,0.557692308,0.538461538,0.557692308,0.5
2008,36,0.75,0.75,0.75,0.777777778,0.75,0.777777778,0.75,0.75,0.777777778,0.722222222,0.777777778,0.805555556,0.777777778,0.75,0.694444444
2009,40,0.525,0.5,0.475,0.45,0.475,0.45,0.525,0.5,0.425,0.475,0.425,0.55,0.5,0.5,0.45
2010,35,0.657142857,0.657142857,0.628571429,0.657142857,0.628571429,0.628571429,0.628571429,0.657142857,0.657142857,0.542857143,0.6,0.657142857,0.657142857,0.657142857,0.6
2011,55,0.418181818,0.509090909,0.509090909,0.454545455,0.472727273,0.454545455,0.418181818,0.454545455,0.381818182,0.490909091,0.490909091,0.436363636,0.436363636,0.509090909,0.436363636
2012,62,0.516129032,0.532258065,0.5,0.548387097,0.532258065,0.532258065,0.5,0.5,0.5,0.5,0.580645161,0.516129032,0.5,0.532258065,0.516129032
2013,55,0.545454545,0.527272727,0.563636364,0.581818182,0.545454545,0.527272727,0.581818182,0.581818182,0.672727273,0.545454545,0.581818182,0.636363636,0.581818182,0.527272727,0.581818182
2014,57,0.649122807,0.631578947,0.701754386,0.736842105,0.649122807,0.649122807,0.649122807,0.649122807,0.614035088,0.736842105,0.631578947,0.701754386,0.666666667,0.631578947,0.596491228
2015,55,0.4,0.490909091,0.436363636,0.454545455,0.490909091,0.490909091,0.454545455,0.490909091,0.509090909,0.490909091,0.509090909,0.509090909,0.418181818,0.490909091,0.545454545
2016,73,0.424657534,0.410958904,0.438356164,0.369863014,0.410958904,0.397260274,0.397260274,0.452054795,0.410958904,0.397260274,0.424657534,0.383561644,0.369863014,0.410958904,0.424657534
2017,67,0.626865672,0.626865672,0.626865672,0.641791045,0.597014925,0.641791045,0.656716418,0.626865672,0.611940299,0.671641791,0.671641791,0.626865672,0.597014925,0.626865672,0.626865672
2018,65,0.569230769,0.615384615,0.538461538,0.6,0.569230769,0.523076923,0.553846154,0.553846154,0.523076923,0.584615385,0.6,0.646153846,0.553846154,0.615384615,0.538461538
2019,69,0.623188406,0.594202899,0.637681159,0.608695652,0.652173913,0.608695652,0.594202899,0.652173913,0.579710145,0.565217391,0.637681159,0.666666667,0.608695652,0.594202899,0.608695652
2020,67,0.641791045,0.701492537,0.582089552,0.731343284,0.686567164,0.671641791,0.626865672,0.656716418,0.626865672,0.671641791,0.582089552,0.671641791,0.537313433,0.701492537,0.626865672
Total,1834,0.55997819,0.586695747,0.568157034,0.590512541,0.570338059,0.565430752,0.575790622,0.583424209,0.559432933,0.575790622,0.577971647,0.595419847,0.552344602,0.586695747,0.557797165
Change,,0,0.026717557,0.008178844,0.030534351,0.010359869,0.005452563,0.015812432,0.02344602,-0.000545256,0.015812432,0.017993457,0.035441658,-0.007633588,0.026717557,-0.002181025
,n,No dummies,Sediment,"Sediment, Deep"
1989,59.0,0.5932203389830508,0.5932203389830508,0.5932203389830508
1990,58.0,0.5689655172413793,0.5172413793103449,0.5344827586206896
1991,61.0,0.5901639344262295,0.5901639344262295,0.6065573770491803
1992,63.0,0.5873015873015873,0.6190476190476191,0.6349206349206349
1993,61.0,0.5409836065573771,0.6065573770491803,0.5901639344262295
1994,63.0,0.5873015873015873,0.5714285714285714,0.6190476190476191
1995,60.0,0.5,0.5333333333333333,0.5
1996,61.0,0.5409836065573771,0.5737704918032787,0.639344262295082
1997,60.0,0.5,0.48333333333333334,0.55
1998,53.0,0.5660377358490566,0.6226415094339622,0.660377358490566
1999,56.0,0.42857142857142855,0.5535714285714286,0.5535714285714286
2000,55.0,0.6363636363636364,0.6181818181818182,0.6727272727272727
2001,55.0,0.7636363636363637,0.8181818181818182,0.7818181818181819
2002,58.0,0.7758620689655172,0.8103448275862069,0.7931034482758621
2003,57.0,0.45614035087719296,0.5614035087719298,0.631578947368421
2004,53.0,0.5471698113207547,0.5660377358490566,0.6037735849056604
2005,56.0,0.5178571428571429,0.6607142857142857,0.6607142857142857
2006,57.0,0.47368421052631576,0.5087719298245614,0.5614035087719298
2007,52.0,0.4807692307692308,0.5576923076923077,0.5769230769230769
2008,36.0,0.75,0.8055555555555556,0.8055555555555556
2009,40.0,0.525,0.55,0.45
2010,35.0,0.6571428571428571,0.6571428571428571,0.6
2011,55.0,0.41818181818181815,0.43636363636363634,0.45454545454545453
2012,62.0,0.5161290322580645,0.5161290322580645,0.46774193548387094
2013,55.0,0.5454545454545454,0.6363636363636364,0.6181818181818182
2014,57.0,0.6491228070175439,0.7017543859649122,0.7192982456140351
2015,55.0,0.4,0.509090909090909,0.5454545454545454
2016,73.0,0.4246575342465753,0.3835616438356164,0.4657534246575342
2017,67.0,0.6268656716417911,0.6268656716417911,0.6417910447761194
2018,65.0,0.5692307692307692,0.6461538461538462,0.6307692307692307
2019,69.0,0.6231884057971014,0.6666666666666666,0.6231884057971014
2020,67.0,0.6417910447761194,0.6716417910447762,0.6716417910447762
Total,1834.0,0.5599781897491821,0.5954198473282443,0.6063249727371864
Binary file modified gis/output/coastal_eco_imp_accuracy.pdf
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
,n,Obs,"Soft bottom, Natural","Soft bottom, Natural, Small"
1988,5.0,0.8,0.6666666666666666,0.8333333333333334
1989,5.0,0.6,0.6666666666666666,0.6666666666666666
1990,5.0,0.6,0.6666666666666666,0.6666666666666666
1991,5.0,0.6,0.6666666666666666,0.6666666666666666
1992,5.0,0.6,0.6666666666666666,0.6666666666666666
1993,6.0,0.6666666666666666,0.6666666666666666,0.6666666666666666
Total,,0.6451612903225806,0.6666666666666666,0.6935483870967741
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
,n,Obs,"Soft bottom, Natural"
1988,5.0,0.8,0.6666666666666666
1989,5.0,0.6,0.6666666666666666
1990,5.0,0.6,0.6666666666666666
1991,5.0,0.6,0.6666666666666666
1992,5.0,0.6,0.6666666666666666
1993,6.0,0.6666666666666666,0.6666666666666666
Total,,0.6451612903225806,0.6666666666666666
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,n,"Soft bottom, Natural","Soft bottom, Natural, Small"
1989,5.0,1.0,1.0
1990,5.0,1.0,1.0
1991,5.0,1.0,1.0
1992,5.0,1.0,1.0
1993,6.0,1.0,1.0
Total,,1.0,1.0
7 changes: 7 additions & 0 deletions gis/output/streams_eco_imp_accuracy_Soft bottom, Natural.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
,n,"Soft bottom, Natural"
1989,5.0,1.0
1990,5.0,1.0
1991,5.0,1.0
1992,5.0,1.0
1993,6.0,1.0
Total,,1.0
3 changes: 2 additions & 1 deletion gis/script_line-by-line.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
linkage = {
"coastal": ["coastal_stations_VP3.csv", "coastal_chlorophyll_limits.csv"],
"lakes": ["lakes_stations_VP3.csv", "lakes_stations_XY.csv"],
"streams": ["streams_stations_VP3.csv", "streams_stations_XY.csv"],
"streams": ["streams_stations_VP3.csv"],
}

# WFS service URL for the current water body plan (VP2 is for 2015-2021)
Expand Down Expand Up @@ -219,6 +219,7 @@
df[["x", "y"]] = df[["x", "y"]].replace(0, np.nan)
# Set up a longitudinal df with every station and its last non-null entry
long = df[cols].groupby(level="station").last()

# For each year t, add a column with observations for the indicator
for t in df["year"].unique():
# Subset to year t
Expand Down
4 changes: 2 additions & 2 deletions gis/script_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -1008,7 +1008,7 @@ def indicator_to_status(self, j, dfIndicator, dfVP):
# Merge df for biophysical indicator with df for typology
df = dfIndicator.merge(dfVP[["ov_typ"]], on="wb")

def SetThreshold(row):
def set_threshold(row):
if row["ov_typ"] in ["LWTYPE9", "LWTYPE11", "LWTYPE13", "LWTYPE15"]:
return pd.Series(
{
Expand All @@ -1029,7 +1029,7 @@ def SetThreshold(row):
)

# For df, add the series of thresholds relative to High ecological status
df[cols] = df.apply(SetThreshold, axis=1)
df[cols] = df.apply(set_threshold, axis=1)
df = df.drop(columns=["ov_typ"]) # drop typology column

else: # coastal waters
Expand Down
11 changes: 7 additions & 4 deletions gis/streams_CV.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,6 +361,7 @@ def stepwise_selection(subset, dummies, data, dfDummies, years, select_all=False
scores
status


### temporary correction of a mistake in the original code
# DataFrame for storing accuracy scores by year and calculating weighted average
scores = pd.DataFrame(dfIndObs.count(), index=years, columns=["n"]).astype(int)
Expand All @@ -371,11 +372,13 @@ def stepwise_selection(subset, dummies, data, dfDummies, years, select_all=False
# Mean ecological status by year for the n observations that don'nt have missing values
status["Obs"] = (dfIndObs < 4.5).sum() / status["n"] # ecological status < good
status.loc["Total", "Obs"] = (status["Obs"] * status["n"]).sum() / status["n"].sum()

# Leave-one-out cross-validation (LOO-CV) loop over every observed stream and year
dfTypology.drop(columns=["Medium", "Large"], inplace=True)
predictors_used = ["Soft bottom", "Small"]
dfTypology.name = ", ".join(predictors_used) # name model after predictors used
for df in dfDistrict: # LOO-CV with different dummies
df1 = dfNatural.drop(columns=["Small", "Medium", "Large"])
df2 = dfNatural.drop(columns=["Medium", "Large"])
df1.name = ", ".join(["Soft bottom", "Natural"]) # name model
df2.name = ", ".join(["Soft bottom", "Natural", "Small"]) # name model
for df in (df1, df2): # LOO-CV with different dummies
# Impute missing values based on all observations (without cross-validation)
df_imp = pd.DataFrame(
imputer.fit_transform(np.array(df)), index=df.index, columns=df.columns
Expand Down

0 comments on commit 63e81a1

Please sign in to comment.