-
Notifications
You must be signed in to change notification settings - Fork 4
/
insectIndicators.Rmd
1933 lines (1458 loc) · 82.8 KB
/
insectIndicators.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Insect indicators
<br />
_Author and date:_
Jens Åström
Marie Davey
`r Sys.Date()`
<br />
<!-- Load all you dependencies here -->
```{r pkg_check, include=FALSE}
#Check if Norimon and bombLepiSurv is installed
if (!"Norimon" %in% installed.packages()[,"Package"]){
devtools::install_github("jenast/Norimon")
}
if (!"bombLepiSurv" %in% installed.packages()[,"Package"]){
devtools::install_github("jenast/bombLepiSurv")
}
#Alternatively, load below with pacman, requires pacman :)
#pacman::p_load_gh("NINAnor/Norimon")
#pacman::p_load_gh("jenast/bombLepiSurv")
```
```{r setup, include=FALSE}
library(knitr)
knitr::opts_chunk$set(echo = TRUE)
suppressPackageStartupMessages({
require(Norimon)
require(bombLepiSurv)
require(tmap)
require(sf)
require(tidyverse)
require(flextable)
})
```
<!-- Fill in which ecosystem the indicator belongs to, as well as the ecosystem characteristic it should be linked to. It's OK to use some Norwegian here -->
```{r, echo=F}
Ecosystem <- rep("Semi-naturlig mark", 3)
Egenskap <- c("Biologisk mangfold",
"Funksjonelle grupper innen trofiske nivåer",
"Funksjonelt viktige arter og biofysiske strukturer")
ECT <- rep("Functional state characteristics", 3)
Contact <- rep("Jens Åström", 3)
```
```{r, echo=F}
metaData <- data.frame(Ecosystem,
"Økologisk egenskap" = Egenskap,
"ECT class" = ECT,
check.names = F)
flextable(metaData) %>%
bg(bg = "lightblue", part="header") %>%
theme_vanilla() %>%
set_table_properties(layout = "autofit")
```
<!-- Don't remove these three html lines -->
<br />
<br />
<hr />
## Introduction
We here show how to calculate a set of indicators for terrestrial insects in Norway. Two of them are adapted from earlier work on the Nature Index for Norway and use data from the Norwegian monitoring program on bumblebees and butterflies (**NMBB**). The others are developed during 2023, and use data from the Norwegian insect monitoring program (**NorIns**).
This workflow is shortened by putting most of the underlying code in separate R-packages, which are freely available through Github. The relevant packages are `bombLepiSurv` https://github.com/jenast/bombLepiSurv for the bumblebee and butterfly indicators, and `Norimon` https://github.com/jenast/Norimon for the rest of the indicators.
For the indicators from the NMBB program, community reference values has been elicited from experts, and is explained further below. For the indicators from the NorIns program, this is significantly harder to do, because of the size of the communities, lack of historical timeseries and changes in sampling and identification techniques. We therefore currently lack reference values for many of these indicators.
Below is a list of the indicators calculated in this document, and their state of development. Some details is further expanded on below.
```{r, echo=F}
indDataset <- c(rep("NMBB", 2),
rep("NorIns", 7))
Indicator <- c("Bumblebees in semi-natural grasslands",
"Butterflies in semi-natural grasslands",
"Biomass of flying insects",
"Species richness of flying insects",
"Species richness of pollinating insects",
"Species richness of dung associated insects",
"Quotent of Apocrita vs Symphyta species richness",
"Number of alien species",
"Intraspesific genetic variation")
nor_cat <- c("Funksjonelt viktige arter og biofysiske stukturer",
"Biologisk mangfold",
"Funksjonelt viktige arter og biofysiske stukturer",
"Biologisk mangfold",
"Biologisk mangfold",
"Biologisk mangfold",
"Funksjonelt viktige arter og biofysiske strukturer",
"Biologisk mangfold",
"Biologisk mangfold"
)
ect_cat <- c("B2",
"B1",
"B2",
"B1",
"B1",
"B1",
"B2",
"B1",
"B1"
)
Geo_scope <- c(rep("Sørlandet, Østfold-Vestfold, Vestlandet, Trøndelag", 2),
rep("Sørlandet, Østlandet, Trøndelag, Nord-Norge", 7))
Ind_data <- c(rep("Present", 2),
rep("Present", 7))
Ref_val <- c(rep("Present", 2),
rep("Lacking", 7))
```
```{r, echo=F}
indList <- tibble("Dataset" = indDataset,
"Indicator" = Indicator,
"Norwegian type" = nor_cat,
"ECT" = ect_cat,
"Current geographical scope" = Geo_scope,
"Indicator data" = Ind_data,
"Reference values" = Ref_val)
knitr::kable(indList)
```
Many of these indicators are self-explanatory and don't require much justification. **Bumblebees in semi-natural grasslands** and **Butterflies in semi-natural grasslands** are functionally important pollinators, and are as a community dependent on a varied herbaceous flora. They therefore both represent key functionality, as well as indicators of habitat quality. **Biomass of flying insects** represent a key functional characteristic in ecosystem functioning as food for insectivores, and as a response to habitat quality and management. **Species richness of flying insects** represents the total breadth of the insect community, and **Species richness of pollinating insects** complements the butterfly and bumblebee indicators in taxonomic breadth and spatial resolution.
**Species richness of dung associated insects** represent a key functional group in decomposition, and function as an indicator of grazing intensity and diversity. **Quotent of Apocrita vs Symphyta species richness** tries to go beyond simple amounts, and capture compositional changes in the insect community. Apocrita covers predatory and parasitic wasps, while Symphyta is plant-feeding wasps. A relative decrease in predatory/parasitic wasps could indicate a degradation in insect prey communities, and a decrease in the potential for natural pest control. This indicator is optionally given as just the species richness of Apocrita. **Number of alien species** takes into account the number of alien species found in a region and optionally, the frequency of sampling events they are observed in. This is based on the alien species list from the Norwegian Biodiversity Information Center, which lists known alien species and an increasing list of door-knocker species. Note that due to large historical knowledge gaps of insect species distributions, there could potentially be many more alien species in Norway, which are not picked up on this alien species list. Lastly, **Intraspecific genetic variation** captures biodiversity that until now has been hidden, due to lack of practical technologies to measure it. The need to conserve genetic diversity is identified in Norwegian law, and decreases in intraspecific diversity could signal population decreases, and a decreased potential to adapt to environmental changes, before whole species are lost to an area.
## About the underlying data
The data used here comes from the ongoing monitoring programs "Norwegian insect monitoring" ([NorIns](https://www.nina.no/Naturmangfold/Insekter/Overv%C3%A5king-av-insekter)) and "Norwegian monitoring of bumblebees and butterflies" ([NMBB](https://www.nina.no/V%C3%A5re-fagomr%C3%A5der/Milj%C3%B8overv%C3%A5king-p%C3%A5-land/Humler-og-dagsommerfugler)). Both programs are initiated and financed by the Environmental agency, with the aim to produce continuous area representative time-series of insect community data. The **NorIns** program use malaise and window traps to sample a broad community of insect species, which are identified through metabarcoding of DNA. The **NMBB** program use sweep netting along transect walks where the species are identified manually in the field. More information is available in the links above.
### Representativity in time and space
The **NorIns** program started in 2020 with monitoring in semi-natural land and forests in the south-eastern region of Norway (Østlandet), with the long-term ambition to cover the entire country. In 2021, the program was expanded to semi-natural land in central Norway (Trøndelag), in 2022 to semi-natural land in south-western Norway (Sørlandet), and in 2023 to semi-natural land in northern Norway (Nord-Norge), with a possible expansion to western Norway (Vestlandet) in 2024. So far, forest habitats are only monitored in south-eastern Norway.
The monitoring program is designed to produce estimates with a spatial resolution of these 5 large regions. Finer resolution could be evaluated in the future, although it is unlikely to be feasible without more intense sampling. The program uses a staggered sampling scheme where we visit different locations each year, and return to each location every 5 years. This sampling scheme, coupled with the strong yearly variation in insect abundances makes it reasonable to use a temporal resolution of 5 years. The indicator values from **NorIns** are therefore calculated on 5 year long rolling windows.
The **NMBB** program started in 2009 in a subset of south-eastern Norway (counties of Vestfold and Østfold), and was expanded to central Norway (Trøndelag) in 2010, south-western Norway in 2013, and to western Norway (Vestland and Møre og Romsdal fylke) in 2022. The spatial resolution aligns with these (as of now) 4 regions, and it is not recommended to downscale the predictions due to the limited number of sampling locations in each region. Since the same locations are visited each year, yearly estimates of the condition could be calculated, but should be interpreted by caution due to natural yearly variability.
Both programs are designed to be area representative, within their respective habitat types and regions. Both use stratified randomized selection of localities. The **NorIns** program selects localities by random draws within habitat strata, and the **NMBB** by a predefined continuous network of survey squares, within which individual transects are then manually placed. In both programs, some subjective decisions are made in the definition of habitat criteria and in discarding unpractical survey locations.
### Original units
The original units from **NorIns** include number of species, total biomass as wet weight, and inter-specific genetic diversity (unit to be determined). For the **NMBB** program, original units are number of individuals of each species within a transect, although only presences in transects (as presence frequencies) are used for the indicator calculations.
### Temporal coverage
2009-present for the **NMBB** data, and 2020-present for the **NorIns** data.
## Ecosystem characteristic
### Norwegian standard
Three of the indicators are tied to the *Økologisk egenskap* (Ecological characteristic) of **Funksjonelt viktige arter og biofysiske stukturer** (Funktionally important species and structures), and the other six to **Biologisk mangfold** (Biodiversity).
### UN standard
Six of the indicators are tied to the UN standard (ECT-classes) **B1 Compositional state characteristics**, and the two remaining to **B2 Structural state characteristics**.
## Collinearities with other indicators
None measured so far, but it is reasonable to assume that some of these indicators will share some causal factors, and therefore correlate to some degree. They might also correlate to other indicators as well, not described here.
## Reference condition and values
The reference condition is meant to be one with minimal negative human impact, but this has a special interpretation for semi-natural land. These habitats are formed and maintained through human activities. Thus, "minimal negative human impact" is replaced by a state of "good maintenance", here understood as resembling the traditional agricultural maintenance regime, existing for several hundred years up until the late 1800s. This is characterized by extensive grazing, meadows and lays, crop rotation with legumes to bind nitrogen in addition to fertilization from manure, relatively small field sizes and abundant field margins, and a lack of artificial fertilization and mechanized tilling.
### Reference condition
Assessing the current state of insect communities is a complicated task, due to their taxonomic and functional breadth and high temporal and spatial variability. Still, even more challenging is establishing reference values, when the reference states are extinct or prohibitively difficult to measure empirically. We currently lack reference values for all indicators from the **NorIns** program, except for the alien species indicators where the reference value is zero by definition. It is unlikely that this can be solved by "simply" surveying a state in a reference area, since intact reference areas of sufficient size and numbers likely no longer exist. The question of how to handle these reference values is currently unresolved, and we present these indicator value calculations with made up reference values as placeholders in the code.
For the **NMBB** indicators, the communities are small enough and well-known enough to identify reference communities by expert opinion. These communities specify the expected rate of observing each species. The current rates of observation are then compared to the expected rates. The calculations are further explained below.
### Reference values, thresholds for defining _good ecological condition_, minimum and/or maximum values
<!-- Text here -->
The specific reference communities for the **NMBB** indicators won't be spelled out here, see instead the scripts below. We have not specified a value for _good ecological condition_ as of yet, since we lack a straightforward empirical basis to do so. By default then, the values is set to 0.6 , following the general framework.
## Uncertainties
The uncertainties for the indicator values is calculated by bootstrapping the values for each locality within a year. This takes the variability between the localities into account. For the **NorIns** indicators, these uncertainties are given as standard deviation, as well as confidence intervals. The uncertainties for the **NMBB** indicators are constituted by a distribution of discrete values, and therefore we only provide confidence intervals and not a standard deviation.
## References
* French, C. M., Bertola, L. D., Carnaval, A. C., Economo, E. P., Kass, J. M., Lohman, D. J., … Hickerson, M. J. 2023, juli 17. Global determinants of insect mitochondrial genetic diversity. bioRxiv. bioRxiv. https://doi.org/10.1101/2022.02.09.479762
* Miraldo, A., Li, S., Borregaard, M. K., Flórez-Rodríguez, A., Gopalakrishnan, S., Rizvanovic, M., … Nogués-Bravo, D. 2016. An Anthropocene map of genetic diversity. Science 353(6307): 1532–1535. https://doi.org/10.1126/science.aaf4381
* Rohde, K. 1992. Latitudinal Gradients in Species Diversity: The Search for the Primary Cause. Oikos 65(3): 514. https://doi.org/10.2307/3545569
* Theodoridis, S., Fordham, D. A., Brown, S. C., Li, S., Rahbek, C., & Nogues-Bravo, D. 2020. Evolutionary history and past climate change shape the distribution of genetic diversity in terrestrial mammals. Nature Communications 11(1): 2557. https://doi.org/10.1038/s41467-020-16449-5
* Åström, Jens; Birkemoe, Tone; Brandsegg, Hege; Dahle, Sondre; Davey, Marie Louise; Ekrem, Torbjørn; Fossøy, Frode; Hanssen, Oddvar; Laugsand, Arne Endre; Majaneva, Markus; Staverløkk, Arnstein; Sverdrup-Thygeson, Anne; Ødegaard, Frode.
Insektovervåking på Østlandet, Sørlandet og i Trøndelag. Rapport fra feltsesong 2022. Trondheim: Norsk institutt for naturforskning (NINA) 2023 (ISBN 978‐82‐426‐5037‐5) 99 s. NINA rapport(2241)
* Åström, Sandra Charlotte Helene; Åström, Jens; Bøhn, Kristoffer; Gjershaug, Jan Ove; Staverløkk, Arnstein; Dahle, Sondre; Ødegaard, Frode. Nasjonal overvåking av dagsommerfugler og humler i Norge. Oppsummering av aktiviteten i 2022. Trondheim: Norsk institutt for naturforskning (NINA) 2023 (ISBN 978-82-426-5009-2) 54 s. NINA rapport(2214)
* Öberg, S., Gjershaug, J. O., Diserud, O. & Ødegaard, F. 2011. Videreutvikling av metodikk for
arealrepresentativ overvåking av dagsommerfugler og humler. Naturindeks for Norge. – NINA
Rapport 663.
## Analyses
### Calculation principles for NorIns indicators
The indicators from the Norwegian insect monitoring are calculated with functions in the `Norimon` R-package [link](https://github.com/NINAnor/Norimon). This methodology is meant to facilitate the calculation of a broad variety of insect indicators using data from **NorIns**.
**Main steps of workflow:**
1. Fetch data of biomass or community diversity from a centralized database. Diversity data can be filtered on several taxonomic levels, but biomass is only available for whole samples.
2. Bootstrap observations and calculate mean and uncertainty of point estimates.
3. Compare observations to reference points.
4. Display and plot the results
```{r flow_chart, out.width = '400pt', dev='png', echo=F}
knitr::include_graphics("images/NorIns_flow_chart.png")
```
Most of the ecology comes into step 1, in choosing what data to use to describe a quality. This can be a custom selection of species, or a set of higher taxa such as specific genuses, families or even order. The goal is to choose a set of taxa that represent specific qualities of the community that can indicate the ecological status of the ecosystem. In addition to the selection of taxa, we have to decide on the level of spatial and temporal aggregation, e.g. if we should aggregate the data spatially on the region scale, habitat scale or locality scale, and if we should aggregate it temporally on yearly or even the individual sample occasions within years. For the ecological indicators, we will aggregate the raw observation values to the year locality scale, where the data summarize all catches in a locality in a year.
The second step is to get an estimate of the uncertainty of the data, were we use the bootstrap method. This method is also flexible, as we could bootstrap the samples on different sampling levels (e.g. samples within localities, localities within a region, or regions within country). When working with raw data on the year locality level, the most granular bootstrap would be to summarize the variables to the yearly, habitat, and regional level, for example expressing the mean values of insect biomass in semi-natural land and forests in Østlandet in 2021, with uncertainty.
Ecological knowledge also comes into step 3, comparing the values to a reference state. Here we have several options. We could for example use a single defined value as the reference state. But we can also use a point in time as a reference point (e.g. the start of a time series).
Taken together, this framework is meant to facilitate the calculation of an arbitrary set of insect indicators, based on the combination of choices in data to fetch, aggregation level, and reference comparison. We showcase the framework by working through an example with pollinators.
In the example we start with connecting to the database, in order to fetch the data.
```{r, eval=FALSE}
connect_to_insect_db()
```
```{r, echo=FALSE}
connect_to_insect_db(user=Sys.getenv("INSECT_USER"), password=Sys.getenv("INSECT_PASSWORD"))
```
For convenience, the pollinator families can be retrieved by the `get_pollinators()` function.
```{r}
pollinators <- get_pollinators()
pollinators_fam <- pollinators %>%
select(family_latin) %>%
pull()
```
```{r, echo = FALSE}
kable(pollinators)
```
<br/>
We then fetch the community data for these families through the `get_observations` function. We additionally subset the habitat type that we are interested in, here semi-natural land. The result is a tibble of the aggregated number of species, shannon diversity, and the mean number of genetic variants per species. The default aggregation level is "year_locality", meaning the total observations for a locality within a year.
Note that the output currently contains some experimental values, awaiting a more robust methodology. "Shannon diversity" doesn't really make sense without counts or amounts, etc. This will be replaced by measurements of genetic diversity that are based on published peer-reviewed methodology (French et al. 2023), in the (hopefully near) future.
```{r}
poll_loc_year <- get_observations(subset_families = pollinators_fam,
subset_habitat = "Semi-nat")
```
```{r}
poll_loc_year %>%
slice(1:5) %>%
kableExtra::kbl()
```
#### Bootstrap observations
The localities in the NorIns program are (semi-)randomly selected and can be viewed as samples of a larger population. Individually, they represent a single measurement of an insect community in a habitat type in a region. To get the sampling uncertainty for this representation, we can bootstrap the values by this simple process: we choose a random set of localities within a year and region (with replacement) and calculate the average value. We then repeat this process a large number of times to get a bootstrap sample of values, which can be used to express the uncertainty in the dataset.
We typically have 10 localities for a given habitat type and region each year, which is not very much to base our uncertainty estimates on. But bootstrapping works fairly well on small samples and has the advantage that is doesn't make assumptions of the statistical distribution of the errors. We will use this level to showcase the functionality and to visualize some variation between years. However, since the sampling scheme of the monitoring program semi-randomly selects 10 new localities for each of the 5 years the rotating scheme, each years estimate will include some randomness that is caused by variability between localities. This means that the values between individual years might appear more random than the underlying overall trend. To get robust estimates that averages over this variability, we will summarize the actual indicator values to 5 year periods, further described below.
When **NorIns** is fully scaled up, it will have 50 localities for each covered habitat type in every country region, with 10 localities every year, in a rotating survey scheme over 5 years. Since the program is rolled out sequentially starting in 2020, for semi-natural land it currently covers 4/5 of the country, with varying amounts of localities within each region. The map below displays the localities surveyed so far.
```{r}
semi_loc <- get_localities(dataset = "NasIns",
habitat_type = "Semi-nat")
```
```{r}
norway_regions <- get_map()
```
```{r location_map}
tm_shape(norway_regions) +
tm_polygons("region") +
tm_shape(semi_loc) +
tm_symbols(col = "red",
shape = 22,
size = 0.8)
```
The bootstrap routine is implemented in the function `bootstrap_value()`, which takes a community (from `get_observation`) or weight dataset (from `get_biomass`) as its first input. It also needs to know what measurement in the dataset to bootstrap, and what, if any, grouping structure to aggregate the results on. In this example, we bootstrap the number of pollinator species, and aggregate the results on the year and regional scale.
```{r}
poll_richness_boot <- bootstrap_value(poll_loc_year,
value = no_species,
groups = c("year",
"region_name"),
rolling_year_window = FALSE
)
```
This creates an object of type `boot_stat`. Calling it prints a simple summary of the bootstrap values.
```{r}
poll_richness_boot
```
But the `boot_stat` object also stores the individual bootstrap values for later computation. By default, we use 999 bootstrap samples, which here results in 999 samples * 6 groups = 5994 rows of bootstrap values.
```{r}
poll_richness_boot[2]
```
#### Comparing bootstrap values to a reference point
Setting aside the practical difficulties in establishing robust reference values, the next step in the methodology is to compare the observed values (with bootstrapped uncertainty) to a chosen reference value. This can be made in several ways. Most simply, if we have a single numeric value as a reference value, we can simply subtract that from the observed values. For example, if we observe 13 species of pollinators in 2022 at a location, and the reference point is 10, the 2022 value has increased by 13 - 10 = 3 species. Such subtractions should be made on the set of bootstrapped values, followed by new summary statistics being calculated, preserving the uncertainty from the bootstrap. The `boot_stat` object class has its own subtraction method `-` do to just that. In this example, we set the reference value arbitrarily to 30 species.
```{r}
poll_richness_boot
```
```{r}
diff_poll_richness_boot <- poll_richness_boot - 30
```
```{r}
diff_poll_richness_boot
```
Alternatively, we could use a reference point in the time series itself. Say for example that we want to use the values for species richness of pollinators in semi-natural land i Østlandet 2020 as a reference point. We can then calculate the difference (the contrast) between this level and all the other levels. We do this by the function `boot_contrast()`
**NB! This functionality is in development. It currently works for single rows as reference points, but needs updating to allow for referencing several values simultaneously, e.g using the start values for all regions and habitat types as their own reference points.**
```{r}
diff_poll_richness_boot2 <- poll_richness_boot %>%
boot_contrast(year == 2020 & region_name == 'Østlandet')
```
```{r}
diff_poll_richness_boot2
```
#### Normalizing the values
After the comparison to a reference value, we need to normalize the indicator values so that they lie between 0 and 1. This can be done in several ways (se e.g. [eaTools](https://github.com/NINAnor/eaTools)). The simplest case is to use a linear scaling, with a natural zero, which e.g. can be done by dividing the indicator values by the highest value state. A `boot_stat` class has a `/` function that divides each bootstrap value by a given value, truncates the highest values to 1, and recalculates the summary values. We can similarly divide by a predefined reference state. Here, we exemplify the method by dividing the value by 30.
```{r poll_richness_scaling}
diff_poll_richness_boot3 <- poll_richness_boot / 30
```
```{r}
diff_poll_richness_boot3
```
#### Display and plot bootstrap values
The boot_stat class also has its own plot function. It tries to plot a comparison of the bootstrap distributions over years, for each group. For example, if we plot the object `diff_poll_richness_boot`, we can look at the yearly differences in beetles species richness in the two geographic regions: (Note that we have only 1 year of data from Sørlandet so far)
```{r poll_richness_boot}
plot(diff_poll_richness_boot)
```
In the cases where we have used a single row as a reference point, this shows up as a sharp spike at 0.
```{r diff_beetle_richness_boot2}
plot(diff_poll_richness_boot2)
```
#### Time-series plots
The default plots are good for showing the uncertainty distributions, but can be a bit difficult to track the time series trends this way. We can instad use the `ts_plot` function for a simple time-series plot.
```{r}
ts_plot(poll_richness_boot)
```
#### Map plots
In addition to these plots, we can also display the values geographically. The `map_plot()` function takes a `boot_stat` object and plots the values according to its region names.
```{r map_plot_cutout}
map_plot(poll_richness_boot)
```
By default, it only shows the regions with data, but this can be overridden manually:
```{r map_plot_whole}
map_plot(poll_richness_boot,
whole_country = TRUE)
```
We can also choose a different palette, for example from the NinaR package, and visualize the uncertainty by setting the transparency of the colors from the bootstrap standard deviations. Most of these functions can be piped as well:
```{r diff_beetle_richness_boot_map}
diff_poll_richness_boot %>%
map_plot(palette = "orange-green",
whole_country = FALSE,
alpha_from_sd = TRUE)
```
More plotting options are available, see the `Norimon` indicator workflow vignette.
#### 5 year rolling window summaries
As described above, to take the staggered 5 year survey scheme into account and to average over random yearly variation, it makes sense to summarize the state over a whole 5 year period. This does not need to adhere to predefined periods, since any 5 year long period will result in a complete data set. We can therefore calculate 5 year long summaries in rolling windows, where each focal year is surrounded by +- 2 years. The bootstrap_value does this by default, if we don't specify `rolling_year_average = FALSE`
```{r}
poll_richness_boot_5_year <- bootstrap_value(poll_loc_year,
value = no_species,
groups = c("year",
"region_name"),
rolling_year_window = TRUE
)
```
Since we are only on the third year in the survey scheme however, the rolling windows are too large to show any differences between years. This will change, starting with the 2023 year data.
```{r}
plot(poll_richness_boot_5_year)
```
The averaging over 5-years also shows up in the time-series plots. Note the very wide confidence intervals of the Sørlandet region, with only 1-year worth of data.
```{r}
ts_plot(poll_richness_boot_5_year)
```
This concludes the tour of the `Norimon` functionality.
### Calculation principles for NBBM indicators
These indicators were developed for the Nature Index of Norway, back in 2010 by Ola Diserud and Sandra Öberg (Öberg et. al 2011), and are calculated routinely on a yearly basis (see Åström et. al 2022 for the latest report).
These indicators summarize the state of a bumblebee or butterfly community by estimating the difference from a reference community. Formally, the community indicator (CI) is expressed as the relative change of the community from a state of reference (SR), were the change is calculated by a state of change (SC).
$$CI = \frac{SR-SC}{SR}$$
The state of reference (SR) represents a community of species which can be expected to be observed in a given habitat type and region. It is calculated by assigning each species to class of expected commonality: common (C), sporadic (S) and rare (R) species. This classification is done by expert opinion, informed by known present and past species distributions. Note that this state of reference only contains a subset of highlighted species, making up a historical reference community. Potential observations of "new" species therefore don't inform the indicator value at all. Common species are expected to be observed in at least 5 % of the surveyed transects in a habitat type and region. Sporadic species similarly are defined as having a presence below 5 % but above 1 %. Rare species are seen in no more than 1 % of the transects, and lastly, species not seen in any transect are assumed as lost (L) for the purpose of this calculation.
Each commonality class gets its own weight, so that common species inform the reference state more than sporadic species, followed by rare species. The state of reference (SR) is thus a value where each species adds to the state according to its commonality. Formally, it is defined as:
$$SR = n_C * w_{C, SR} + n_S * w_{S, SR} + n_R * w_{R, SR} = \sum_{i = (C, S, R)} n_i * w_{i, SR}, $$
where $n_i$ is the number of species in a commonality class (C, S, R), and the weights $[w_{C, SR}, w_{S, SR}, w_{R, SR}]$ specify their respective contribution to the reference state (SR). The weights used are $[w_{C, SR}, w_{S, SR}, w_{R, SR}] = [1.0, 0.75, 0.50]$, i.e. a sporadic species has 75 % the weight of a common species, and a rare species has 50 % the weight of a common species.
The state of change (SC) is calculated as
$$SC = n_{CS} * w_{CS} + n_{CR} * w_{CR} + n_{CL} * w_{CL} + n_{SR} * w_{SR} + n_{SL} * w_{SL} + n_{RL} * w_{RL}, $$
where $n_{CS}$ is the number of expected common species (C) that is observed sporadically (S), and $w_{CS}$ is the weight of this change. Similarly, $n_{CR}$ is the number of expected common species that is observed rarely, $n_{RL}$ is the number of rare species that is lost, and so on. Potential increases in observation rates do not inform the indicator. Changes for common species are weighted more heavily than changes for less common species, and larger decreases are weighted more heavily than smaller decreases, such that $[w_{CS}, w_{CR}, w_{CL}, w_{SR}, w_{SL}, w_{RL}]= [0.50, 0.75, 1.00, 0.50, 0.75, 0.50]$.
The indicator values are estimated with uncertainty, by bootstrapping the set of localities within a region, and calculating the indicator for each of 9999 such samples.
#### Calculation example
These calculations with bootstrap sampling are implemented in the R-package `bombLepiSurv` ([Bombus and Lepidoptera Survey](https://github.com/jenast/bombLepiSurv)). We briefly show this functionality with bumblebees in semi-natural lands as an example.
We connect to the internal database with a convenience function.
```{r, eval=FALSE}
bombLepiSurv::humlesommerfConnect()
```
```{r, echo=FALSE}
bombLepiSurv::humlesommerfConnect(username = Sys.getenv("BOMB_USER"), Sys.getenv("BOMB_PASSWORD"))
```
We can fetch all observations of bumblebees in semi-natural land with the `getAllData` function. Species names are here shown in Norwegian.
```{r, fig.cap = "Bumblebees in grasslands 2021"}
allBombusGrassland2022 <- getAllData(type = "bumblebees",
habitat = "gressmark",
year = 2022,
language = "norsk")
```
A summary of the records is most easily plotted by `surveyBarPlot`:
```{r}
surveyBarplot(allBombusGrassland2022)
```
Note that for calculating the indicator values, we only consider the transects that were surveyed for the standard 3 times per year, to cover the phenology of the entire season without bias. Data of these complete survey rounds can be fetched from the database by the function `getComplData`, which fetches data from one habitat type and region at a time.
```{r}
bombus_trond_2022 <- getComplData(type = "Humler",
region_short = "Trond",
habitat = "Gressmark",
year = 2022)
```
The observed relative to the expected occurrences can be visualized through the `plotArt` function. This requires a reference community, which can be fetched through the `getExpValues` function.
```{r}
exp_bombus_trond <- getExpValues(type = "Humler",
region_short = "Trond",
habitat = "Gressmark")
```
Species in green bars are species that are expected to be common, who need to reach the green areas in the plot not to decrease the indicator value (from 1). Similarly with the blue (sporadic species), and red (rare species). We see in this case that _Bombus soroeensis_ is expected to be a sporadic species, but that it was only observed rarely.
```{r}
plotArt(bombus_trond_2022,
exp_bombus_trond)
```
The actual calculation and bootstrapping of the indicator is done in the function `calcInd`. This function calls the `getComplData` and `getExpectedValues` functions internally. It also requires the weights for the commonality classes in the reference community (SR), and the weights for the changes in state (CS). This is fetched internally via the functions `getAmountWeights` and `getDiffWeights`, respectively. Here the classes are coded in Norwegian (v = common, m = sporadic, s = rare).
```{r}
getDiffWeights()
```
```{r}
getAmountWeights()
```
Lastly, we specify the number of samples for the bootstrap. To speed up, only 999 samples are used here.
```{r}
nIter = 999
```
```{r}
hInd2022TrondGress <- calcInd(type = "Humler",
region_short = "Trond",
habitat = "Gressmark",
year = 2022,
nIter = nIter,
save.draws = T)
```
The result is an object of class "comm_index" (community index). It comes with some (still rudimentary) print and plotting functions, showing the point estimate and the limits of a 95% confidence interval. Due to the limited number of species and the fixed "weights" of each species and state of change, the indicator calculation returns a distribution of discrete values. Therefore it can happen that the 95% and the 90% confidence intervals are the same, as in this case.
```{r}
hInd2022TrondGress
```
```{r}
plot(hInd2022TrondGress)
```
Plotting functions for a series of indicator values is shown below in the indicator calculations. This concludes the tour of the `bombLepiSurv` package.
### Data sets
There are a few different ways to access the data required for these indicators. Both the NMBB and the NorIns project store their data in an internal database at NINA. They both also export most of their data to GBIF, but those exports need to be restructured before they can be processed in the following scripts.
#### NorIns data
The `Norimon` package has convenience functions to fetch data from the database. This database is currently not available outside NINA, but we will implement a solution for this in the future. Either we will make the database externally available, or we will provide an alternative route to the data from the GBIF export. For now, we will fetch the data through the `Norimon` functions.
#### NMBB data
The data for the bumblebee and butterfly indicators can similarly be accessed through the R-package `bombLepiSurv`.
#### Regions
<!-- In case you need to map the indicator value to regions, you can do that here. Remove this chapter if not relevant. -->
We here show how to import a shape file with the regional delineation. The indicators associated with the NorIns project can be attributed to the 5 country regions of Norway. Currently however, the data program only covers 4 of the 5 regions.
```{r, eval=FALSE}
connect_to_insect_db()
norway_regions <- Norimon::get_map()
```
```{r, echo=FALSE}
connect_to_insect_db(user=Sys.getenv("INSECT_USER"), password=Sys.getenv("INSECT_PASSWORD"))
norway_regions <- Norimon::get_map()
```
```{r}
norway_reg_NorIns <- norway_regions %>%
filter(region != "Vestlandet") %>%
select(region) %>%
group_by(region) %>%
summarize(geom = st_union(geom))
```
```{r}
tm_shape(norway_reg_NorIns) +
tm_polygons(col = "region")
```
The indicators from the NMBB program are similarly connected to regions, with the exception that the south-east region only covers the old counties of Vestfold and Østfold, and that Nord-Norge isn't covered yet.
```{r, eval=FALSE}
bombLepiSurv::humlesommerfConnect()
```
```{r, echo=FALSE}
bombLepiSurv::humlesommerfConnect(username = Sys.getenv("BOMB_USER"), Sys.getenv("BOMB_PASSWORD"))
```
```{r}
nbbm_norway_regions <- bombLepiSurv::get_map()
```
```{r}
norway_reg_NBBM <- nbbm_norway_regions %>%
filter(region %in% c("Øst", "Sørlandet", "Trøndelag", "Vestlandet")) %>%
select(region) %>%
group_by(region) %>%
summarize(geom = st_union(geom))
```
```{r}
tm_shape(norway_reg_NBBM) +
tm_polygons(col = "region")
```
### Calculation of NorIns indicators
Here we calculate the indicators in abbreviated form, following the general framework outlined above.
#### Biomass of flying insects in semi-natural land
```{r, eval=FALSE}
connect_to_insect_db()
```
```{r, echo=FALSE}
connect_to_insect_db(user=Sys.getenv("INSECT_USER"), password=Sys.getenv("INSECT_PASSWORD"))
```
Fetch the data.
```{r}
biomass_sn <- get_biomass(subset_year = 2020:2022,
subset_region = NULL,
subset_habitat = "Semi-nat")
```
Calculate the indicator values for each region and year.
```{r}
biomass_sn_boot <- bootstrap_value(df = biomass_sn,
value = avg_wet_weight,
groups = c("year",
"region_name")
)
```
Compare values to a reference state. The reference state is here set uniformly for all regions. Alternatively, we could calculate the indicator values for each region separately, compare to individual reference states, and then put it all together again.
To exemplify, we here use a single "made-up" value as a reference state and normalize the values at the same time, using the `/` function.
```{r}
biomass_ref <- 50
biomass_sn_diff <- biomass_sn_boot / biomass_ref
```
Plot the results and display uncertainty.
```{r}
biomass_sn_diff
```
```{r biomass_diff}
plot(biomass_sn_diff)
```
Prepare export format.
```{r}
biomass_sn_to_exp <- biomass_sn_boot$bootstrap_summary
biomass_sn_to_exp <- biomass_sn_to_exp %>%
mutate(reference_high = NA,
reference_low = 0,
thr = 0.6,
i = NA) %>%
select(year,
region = region_name,
v = boot_value,
sd = boot_sd,
reference_high,
reference_low,
thr,
i
)
biomass_sn_to_exp <- norway_reg_NorIns %>%
inner_join(biomass_sn_to_exp,
by = c("region" = "region"),
multiple = "all")
insect_biomass_semi_nat <- biomass_sn_to_exp
```
#### Species richness of flying insects in semi-natural land
Fetch the data.
```{r}
richness_sn <- get_observations(subset_year = 2020:2022,
subset_region = NULL,
subset_habitat = "Semi-nat")
```
Calculate the indicator values for each region and year.
```{r}
richness_sn_boot <- bootstrap_value(df = richness_sn,
value = no_species,
groups = c("year",
"region_name")
)
```
Compare to arbitrary reference value and normalize.
```{r}
richness_ref <- 4000
richness_sn_diff <- richness_sn_boot / richness_ref
```
Plot the results and display uncertainty.
```{r}
richness_sn_diff
```
```{r}
plot(richness_sn_diff,
limit = c(0, 1))
```
```{r richness}
map_plot(richness_sn_diff,
whole_country = T,
limit = c(0, 1))
```
Prepare export format.
```{r}
richness_sn_to_exp <- richness_sn_boot$bootstrap_summary
richness_sn_to_exp <- richness_sn_to_exp %>%
mutate(reference_high = NA,
reference_low = 0,
thr = 0.6,
i = NA) %>%
select(year,
region = region_name,
v = boot_value,
sd = boot_sd,
reference_high,
reference_low,
thr,
i
)
richness_sn_to_exp <- norway_reg_NorIns %>%
inner_join(richness_sn_to_exp,
by = c("region" = "region"),
multiple = "all")
insect_richness_semi_nat <- richness_sn_to_exp
```
#### Species richness of pollinators in semi-natural land
Fetch the data.
```{r}
pollinator_fam <- get_pollinators() %>%
select(family_latin) %>%
pull()
```
```{r}
pollinators_sn <- get_observations(subset_year = 2020:2022,
subset_families = pollinator_fam,
subset_habitat = "Semi-nat")
```
Calculate the indicator values for each region and year.
```{r}
pollinators_sn_boot <- bootstrap_value(df = pollinators_sn,
value = no_species,
groups = c("year",
"region_name")
)
```
Compare to arbitrary reference value and normalize.
```{r}
pollinators_ref <- 50
pollinators_sn_diff <- pollinators_sn_boot / pollinators_ref
```
Plot the results and display uncertainty.
```{r}
pollinators_sn_diff
```
```{r}
plot(pollinators_sn_diff,
limit = c(0, 1))
```
Prepare export format.
```{r}
pollinators_sn_to_exp <- pollinators_sn_boot$bootstrap_summary
pollinators_sn_to_exp <- pollinators_sn_to_exp %>%
mutate(reference_high = NA,
reference_low = 0,
thr = 0.6,
i = NA) %>%
select(year,
region = region_name,
v = boot_value,
sd = boot_sd,
reference_high,
reference_low,
thr,
i
)
pollinators_sn_to_exp <- norway_reg_NorIns %>%
inner_join(pollinators_sn_to_exp,
by = c("region" = "region"),
multiple = "all")
insect_pollinators_semi_nat <- pollinators_sn_to_exp
```
#### Species richness of dung associated insects
We here note all observations of a set of species, known to be associated with dung. This species list can be expanded after a further taxonomic review. Here we take all observed species in the house fly/stable fly family Muscidae, together with a selection of 70 beetle species known to be associated with dung.
```{r}
muscidae_species <- tbl(con,
Id(schema = "views",
table = "observed_muscidae_species")) %>%
pull()
coleoptera_dung_species <- tbl(con,
Id(schema = "lookup",
table = "coleoptera_dung_species")) %>%
select(species_latin) %>%
pull()
dung_species <- c(coleoptera_dung_species, muscidae_species)
```
```{r}
dung_sn <- get_observations(subset_year = 2020:2022,
subset_species = dung_species,
subset_habitat = "Semi-nat")
```
Calculate the indicator values for each region and year.
```{r}
dung_sn_boot <- bootstrap_value(df = dung_sn,
value = no_species,
groups = c("year",
"region_name")
)
```
Compare to arbitrary reference value and normalize.
```{r}
dung_ref <- 100
dung_sn_diff <- dung_sn_boot / dung_ref
```
Plot the results and display uncertainty.
```{r}
dung_sn_diff
```
```{r}
plot(dung_sn_diff,
limit = c(0, 1))
```
Prepare export format.
```{r}
dung_sn_to_exp <- dung_sn_boot$bootstrap_summary
dung_sn_to_exp <- dung_sn_to_exp %>%
mutate(reference_high = NA,
reference_low = 0,
thr = 0.6,
i = NA) %>%
select(year,
region = region_name,
v = boot_value,
sd = boot_sd,
reference_high,
reference_low,
thr,
i
)
dung_sn_to_exp <- norway_reg_NorIns %>%
inner_join(dung_sn_to_exp,
by = c("region" = "region"),
multiple = "all")
insect_dung_assoc_semi_nat <- dung_sn_to_exp
```
#### Relationship between Symphyta and Parasitica in semi-natural land
Parasitica and Symphyta wasps are here defined as a set of families.
```{r}
symphyta_fam <- c("Argidae", "Cephidae", "Cimbicidae",
"Diprionidae", "Orussidae", "Pamphiliidae",
"Pergidae", "Siricidae", "Anaxyelidae",
"Tenthredinidae", "Xiphydriidae", "Xyelidae")
symphyta_sn <- get_observations(subset_year = 2020:2022,
subset_families = symphyta_fam,
subset_habitat = "Semi-nat")
symphyta_sn
```
```{r}
parasitica_fam <- c("Braconidae", "Ichneumonidae", "Chalcididae",
"Eulophidae", "Pteromalidae", "Aphelinidae",
"Scelionidae", "Eupelmidae", "Encyrtidae",
"Mymaridae", "Diapriidae", "Bethylidae",
"Evaniidae", "Ceraphronidae", "Torymidae",
"Dryinidae", "Eucharitidae", "Mymarommatidae",
"Orussidae", "Megaspilidae", "Stephanidae",
"Trigonalidae", "Platygastridae", "Aulacidae",
"Gasteruptiidae", "Rhopalosomatidae", "Larridae",
"Agaonidae", "Pompilidae", "Bradynobaenidae"
)
parasitica_sn <- get_observations(subset_year = 2020:2022,
subset_families = parasitica_fam,
subset_habitat = "Semi-nat")
parasitica_sn
```
Divide the richness of Parasitica by Symphyta. [Could make a function for this later on.]
```{r}
par_sym <- symphyta_sn %>%
full_join(parasitica_sn,
by = c("year" ="year",
"locality" = "locality",
"habitat_type" = "habitat_type",
"region_name" = "region_name"),
suffix = c("_sym", "_par")
) %>%
mutate(par_per_sym_richn = no_species_par / no_species_sym)
```
Bootstrap the fraction of Parasitica to Symphyta.
```{r}
par_sym_sn_boot <- bootstrap_value(par_sym,
value = par_per_sym_richn,
groups = c("year",
"region_name"))
```
Compare to arbitrary reference value and normalize.