generated from NINAnor/NINA-ebook-template
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup.qmd
203 lines (154 loc) · 5.54 KB
/
setup.qmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# Setup {#sec-setup}
In this chapter we set up the database schemas.
We start with the SSB500 dataset
## SSB500
### Setup schema
Setting up a new schema called ssb_grids.
Here we can store SSB500, but also SSB10km which may become relevant.
```{r}
new_schemas <- "CREATE SCHEMA ssb_grids"
dbSendQuery(con, new_schemas)
```
Write queries to grant read only access to all.
```{r settingupPriveleges}
priv <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_ro"
priv2 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_rw"
priv3 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA ssb_grids GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_admin"
priv4 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_admin"
priv5 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_rw"
priv6 <- "GRANT USAGE ON SCHEMA ssb_grids TO ag_pgsql_ano_moduler_ro"
dbSendStatement(con, priv)
dbSendStatement(con, priv2)
dbSendStatement(con, priv3)
dbSendStatement(con, priv4)
dbSendStatement(con, priv5)
dbSendStatement(con, priv6)
```
### Read data into R
We use RStudio as the interface when adding new data to the database.
We start by bringing the data into our environment.
First we can get the entire SSB500 dataset.
```{r}
SSBpath <- "/data/P-Prosjekter2/412421_okologisk_tilstand_2024/Data/SSB0500M_L/ruter500m_Norge.shp"
SSB500 <- read_sf(SSBpath) |>
st_transform(25833)
```
Strip down the number of columns
```{r}
SSB500 <- SSB500 |>
select(ssbid = SSBid) # postgre doesnt like capital letters
# the geometry column needs to be named 'geom'
st_geometry(SSB500) <- "geom"
```
This data consists of perfect 500x500 grid cells arranged on rounded coordinates in the UTM sone 33 CRS.
```{r}
#| include: false
SSB500 |>
slice_head(n=10)|>
ggplot()+
geom_sf()
```
### Define table properties
First we define the table properties
```{r}
q1 <- "create table ssb_grids.ssb_500 (
ssbid character varying(50) primary key,
geom geometry(polygon,25833)
);"
# indices makes the database work faster. It should be added to all tables that are looked up frequently
q2 <- "create index on ssb_grids.ssb_500 using btree(ssbid);"
q3 <- "create index on ssb_grids.ssb_500 using gist(geom);"
```
```{r}
# sending the queries:
dbSendStatement(con, q1)
dbSendStatement(con, q2)
dbSendStatement(con, q3)
```
We defined geom to be polygon. Now let's just check that that is trua, and there are no multi-polygons for example.
```{r}
st_geometry_type(SSB500, by_geometry = F)
```
Yes, they are all polygons.
### Write to db
Then we write data to the ssb_500 table.
```{r}
write_sf(SSB500, dsn = con,
layer = Id(schema = "ssb_grids", table = "ssb_500"),
append = T)
```
## Sampling frames
We will have different sampling frames for the different ANO-modules.
For ANO Havstrand, SSB500 has been masked to only include grid celle that overlap with the Norwegian coastline.
The script for doing that is written in python ad can be found here: */data/P-Prosjekter2/412421_okologisk_tilstand_2024/Jan/*
### Setup schema
```{r}
schema_ur <- "CREATE SCHEMA sampling_frames"
dbSendQuery(con, schema_ur)
```
Write queries to grant read only access to all.
```{r}
priv <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_ro"
priv2 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_rw"
priv3 <- "ALTER DEFAULT PRIVILEGES IN SCHEMA sampling_frames GRANT SELECT ON TABLES TO ag_pgsql_ano_moduler_admin"
priv4 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_admin"
priv5 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_rw"
priv6 <- "GRANT USAGE ON SCHEMA sampling_frames TO ag_pgsql_ano_moduler_ro"
dbSendStatement(con, priv)
dbSendStatement(con, priv2)
dbSendStatement(con, priv3)
dbSendStatement(con, priv4)
dbSendStatement(con, priv5)
dbSendStatement(con, priv6)
```
### Read data into R
```{r}
#| eval: true
coast_path <- "/data/P-Prosjekter2/412421_okologisk_tilstand_2024/Jan/CoastalSampling3.gdb"
sf::st_layers(coast_path)
```
SSB500_Coast4 contains the SSB500 grid cells that overlap with the coastline.
```{r}
coastSSB <- read_sf(coast_path, layer = "SSB500_Coast4")
```
The SSBid will be the foregn key, linking to ssb_500.
We also need a primary key, and therefore I add a unique identifier.
```{r}
ids <- UUIDgenerate(n = nrow(coastSSB))
anyDuplicated(ids)
coastSSB <- coastSSB |>
select(ssbid = SSBid)|>
mutate(havstrand_id = ids)
```
We don't need the geometry
```{r}
coastSSB <- as_tibble(coastSSB) |>
select(ssbid, havstrand_id)
```
### Define table properties
We will name the table *samplingframe_havstrand_2024*.
```{r}
q1 <- "create table sampling_frames.samplingframe_havstrand_2024 (
havstrand_id character varying(50) primary key,
ssbid character varying(50),
CONSTRAINT fk_ssb_500
FOREIGN KEY (ssbid)
REFERENCES ssb_grids.ssb_500 (ssbid)
);"
# indices makes the database work faster. It should be added to all tables that are looked up frequently
q2 <- "create index on sampling_frames.samplingframe_havstrand_2024 using btree(ssbid);"
q3 <- "create index on sampling_frames.samplingframe_havstrand_2024 using btree(havstrand_id);"
```
```{r}
# sending the queries:
dbSendStatement(con, q1)
dbSendStatement(con, q2)
dbSendStatement(con, q3)
```
### Write to db
Then we write data to the samplingframe_havstrand_2024 table.
```{r}
write_sf(coastSSB, dsn = con,
layer = Id(schema = "sampling_frames", table = "samplingframe_havstrand_2024"),
append = T)
```