-
Notifications
You must be signed in to change notification settings - Fork 0
/
kelp_metrics.py
339 lines (265 loc) · 13 KB
/
kelp_metrics.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
import joblib
import numpy as np
import xarray as xr
from tqdm import tqdm
from statsmodels.regression.linear_model import OLS
import os
import zipfile
import datetime
import argparse
import astropy.units as u
from astropy.coordinates import EarthLocation, AltAz, get_sun
from astropy.time import Time
import numpy as np
def calculate_day_length(latitude, longitude, dates):
"""
Calculate the length of the day at a given latitude and longitude for a given date.
Parameters
----------
latitude : float
N. latitude of location
longitude : float
E. longitude of location
dates : list
list of dates in numpy.datetime64 format
Returns
-------
durations : list
list of daylight durations in days
"""
durations = np.zeros(len(dates))
location = EarthLocation(lat=latitude*u.deg, lon=longitude*u.deg)
for i,date in enumerate(dates):
times = Time(date)
# Calculate the altitude of the Sun at various times during the day
times_span = times + np.linspace(-12, 12, 24*60)*u.hour
altaz_frame = AltAz(obstime=times_span, location=location)
sun_altitudes = get_sun(times_span).transform_to(altaz_frame).alt
# Find sunrise and sunset times by looking for altitude crossings
sunrise_idx = np.where(sun_altitudes > 0)[0][0]
sunset_idx = np.where(sun_altitudes > 0)[0][-1]
sunrise_time = times_span[sunrise_idx]
sunset_time = times_span[sunset_idx]
durations[i] = (sunset_time - sunrise_time).to(u.day).value
return durations
# make function to extract kelp data and temperatures
def extract_kelp_metrics(data, bathymetry, sunlight, lowerBound, upperBound):
"""
Extract various metrics like correlation coefficients and slopes from kelp data and differences in temperature
Parameters
----------
data : list
list of dictionaries containing kelp data
bathymetry : xarray
bathymetry data
lowerBound : float
lower bound of latitude
upperBound : float
upper bound of latitude
"""
# initialize dictionary of data
kelp_data = {
'dkelp':[], # one quarter difference in kelp area
'dkelp_kelp':[], # kelp area at same time as difference (average of neighboring quarters)
'dtemp':[], # one quarter difference in temperature
'dtemp_temp':[], # temperature at same time as difference (average of neighboring quarters)
'dtemp_temp_lag':[], # temperature one quarter before
'dtemp_temp_lag2':[],# temperature two quarter before
'kelp':[], # total kelp area
'temp':[], # temperature [K]
'temp_lag':[], # temperature one quarter before [K]
'temp_lag2':[], # temperature two quarters before [K]
'sunlight':[], # daylight duration [day]
'time':[], # time
'dtime':[], # time of difference
'lat':[], # latitude
'lon':[], # longitude
'dlat':[], # latitude corresponding to difference measurements
'dlon':[], # longitude corresponding to difference measurements
#'elevation':[], # elevation [m]
#'delevation':[], # elevation corresponding to difference measurements [m]
}
# loop over all locations
for d in tqdm(data):
# skip data that is not between the upper and lower bounds
if (d['lat'] >= upperBound) or (d['lat'] < lowerBound):
continue
# filter out areas with no measurements and nans
bad_mask = (d['kelp_area'] == 0) | (np.isnan(d['kelp_area'])) | (np.isnan(d['kelp_temp']))
# compute temperature one quarter before
d['kelp_temp_lag'] = np.roll(d['kelp_temp'],1)
d['kelp_temp_lag'][0] = np.nan
# compute temperature two quarters before
d['kelp_temp_lag2'] = np.roll(d['kelp_temp'],2)
d['kelp_temp_lag2'][0] = np.nan
d['kelp_temp_lag2'][1] = np.nan
# save data
kelp_data['kelp'].extend(d['kelp_area'][~bad_mask])
kelp_data['temp'].extend(d['kelp_temp'][~bad_mask])
kelp_data['temp_lag'].extend(d['kelp_temp_lag'][~bad_mask])
kelp_data['temp_lag2'].extend(d['kelp_temp_lag2'][~bad_mask])
kelp_data['time'].extend(d['kelp_time'][~bad_mask])
# save latitude and longitude
kelp_data['lat'].extend(d['lat']*np.ones(len(d['kelp_area'][~bad_mask])))
kelp_data['lon'].extend(d['long']*np.ones(len(d['kelp_area'][~bad_mask])))
# too slow to sample daylight duration for every data point
#daylight_duration = calculate_day_length(d['lat'], d['long'], d['kelp_time'][~bad_mask])
# change year on every date to 2023 to match sunlight grid for interpolation
kelp_time = d['kelp_time'][~bad_mask]
kelp_time = np.array([np.datetime64(str(d)[:10]) for d in kelp_time])
kelp_time = np.array([f'2023-{str(d)[5:]}' for d in kelp_time])
kelp_time = np.array([np.datetime64(d) for d in kelp_time])
kelp_time = kelp_time.astype('datetime64[ns]')
# calculate daylight duration
if len(kelp_time) > 0:
daylight_duration = sunlight.interp(time=kelp_time, lat=d['lat'], lon=d['long']).daylight_duration.values
kelp_data['sunlight'].extend(daylight_duration)
# use linear interpolation
#elevation = bathymetry.interp(lat=d['lat'],lon=d['long']).elevation.values
#kelp_data['elevation'].extend(elevation*np.ones(len(d['kelp_area'][~bad_mask])))
# properly estimate the difference data
nanmask = d['kelp_area'] == 0 # mask out areas with no kelp
nandata = d['kelp_area'].copy()
nandata[nanmask] = np.nan # set areas with no kelp to nan
nandiff = np.diff(nandata) # take quarterly difference
nonnanmask_kelp = ~np.isnan(nandiff) # mask out nans to ensure difference between sequential quarters
nandiff_temp = np.diff(d['kelp_temp']) # take quarterly difference of temperature
nonnanmask_temp = ~np.isnan(nandiff_temp) # mask out nans to ensure difference between sequential quarters
nonnanmask = nonnanmask_kelp & nonnanmask_temp # mask out nans in both kelp and temperature data
kelp_data['dkelp'].extend(nandiff[nonnanmask]) # save kelp difference data
kelp_data['dkelp_kelp'].extend((d['kelp_area'][1:][nonnanmask] + d['kelp_area'][:-1][nonnanmask])/2) # average kelp area at same time as difference
# average temperature at same time as difference
kelp_data['dtemp_temp'].extend((d['kelp_temp'][1:][nonnanmask] + d['kelp_temp'][:-1][nonnanmask])/2)
kelp_data['dtemp_temp_lag'].extend((d['kelp_temp_lag'][1:][nonnanmask] + d['kelp_temp_lag'][:-1][nonnanmask])/2)
kelp_data['dtemp_temp_lag2'].extend((d['kelp_temp_lag2'][1:][nonnanmask] + d['kelp_temp_lag2'][:-1][nonnanmask])/2)
kelp_data['dtemp'].extend(nandiff_temp[nonnanmask]) # save temperature difference data
# save time data for differences by averaging time of sequential quartersq
times_prev = d['kelp_time'][1:][nonnanmask].astype('datetime64[ns]')
times_next = d['kelp_time'][:-1][nonnanmask].astype('datetime64[ns]')
delta = times_next - times_prev
times_mid = times_prev + delta/2
kelp_data['dtime'].extend(times_mid.astype(str))
# save latitude and longitude
kelp_data['dlat'].extend(d['lat']*np.ones(len(nandiff[nonnanmask])))
kelp_data['dlon'].extend(d['long']*np.ones(len(nandiff[nonnanmask])))
# add delevation
#kelp_data['delevation'].extend(elevation*np.ones(len(nandiff[nonnanmask])))
# free up memory
del nanmask, nandata, nandiff, nonnanmask, nandiff_temp, bad_mask
# convert to numpy arrays
for k in kelp_data.keys():
# convert to numpy arrays
for k in kelp_data.keys():
try:
kelp_data[k] = np.array(kelp_data[k])
except:
print("could not convert", k)
kelp_data[k] = np.array(kelp_data[k])
# convert time to datetime
kelp_data['time'] = kelp_data['time'].astype('datetime64[ns]')
kelp_data['dtime'] = kelp_data['dtime'].astype('datetime64[ns]')
# measure correlation and trend line
A = np.vstack([kelp_data['dtemp_temp']-273.15, np.ones(len(kelp_data['dtemp_temp']))]).T
res = OLS(kelp_data['dkelp'], A).fit()
m,b = res.params[0], res.params[1]
# define characteristic temperature where change in kelp is 0
# just solve for x above to get the characteristic temperature at each location
kelp_data['temp_char'] = -b/m
kelp_data['average_temp'] = np.mean(kelp_data['dtemp_temp'])-273
# saving the slope + error
kelp_data['slope_dkelp_temp_char'] = m
kelp_data['slope_dkelp_temp_char_err'] = res.bse[0]
return kelp_data
def main(lower_lat, upper_lat):
"""
Main function to extract kelp metrics
"""
# load data
with open('Data/kelp_averaged_data.pkl', 'rb') as f:
data = joblib.load(f)
# check if elevation data file exists or unzip it
# if not os.path.exists('Data/crm_socal_1as_vers2.nc'):
# # if running for first time
# with zipfile.ZipFile('Data/crm_socal_1as_vers2.nc.zip', 'r') as zip_ref:
# zip_ref.extractall('Data/')
# load bathymetry data from noaa
# if using noaa dem: limit: ~31-36
#bathymetry = xr.open_dataset('Data/crm_socal_1as_vers2.nc')
#bathymetry = bathymetry.rename({'Band1':'elevation'})
# load bathymetry data
#bathymetry = xr.open_dataset('Data/GEBCO_2022_sub_ice_topo.nc')
# parse lat/lon limits
lower_lat = min(lower_lat, upper_lat)
upper_lat = max(lower_lat, upper_lat)
# create a grid for computing daylight
sunlight_file = f'Data/sunlight_27_37.nc' # TODO change later
if os.path.exists(sunlight_file):
sunlight = xr.open_dataset(sunlight_file)
else:
print("Computing daylight duration...")
# find all unique dates and lat/lon limit
dates = data[0]['kelp_time']
lats = []; lons = []
# loop over all locations and extract lat/long
for d in data:
lats.append(d['lat'])
lons.append(d['long'])
# convert to numpy array
lats = np.array(lats)
lons = np.array(lons)
# mask data based on lat limit
mask = (lats >= lower_lat) & (lats < upper_lat)
lats = lats[mask]
lons = lons[mask]
# create a grid of lat/long
lat_list = np.linspace(lats.min(), lats.max(), 50)
lon_list = np.linspace(lons.min(), lons.max(), 50)
lat_grid, lon_grid = np.meshgrid(lat_list, lon_list)
# ignore the year and find unique dates
dates = np.unique(np.array([np.datetime64(str(d)[:10]) for d in dates]))
# dates = array(['1984-02-15', '1984-05-15', '1984-08-15', '1984-11-15'])
# ignore year and find unique dates as string
dates_str = np.unique(np.array([str(d)[5:] for d in dates]))
# add the year 2023 to each date
dates_str = np.array([f'2023-{d}' for d in dates_str])
# convert to datetime
dates = np.array([np.datetime64(d) for d in dates_str])
# create a grid of daylight duration
sunlight = np.zeros((len(dates), len(lat_list), len(lon_list)))
# loop over all dates
for j in tqdm(range(len(lat_list))):
for k in range(len(lon_list)):
# calculate daylight duration
sunlight[:,j,k] = calculate_day_length(lat_list[j], lon_list[k], dates)
# convert to xarray so we can interpolate
# fix this error: AttributeError: 'DataArray' object has no attribute 'daylight_duration'
sunlight_xr = xr.DataArray(sunlight, dims=['time', 'lat', 'lon'], coords={'time':dates, 'lat':lat_list, 'lon':lon_list}, name='daylight_duration')
# save grid to netcdf file
sunlight_xr.to_netcdf(sunlight_file)
# free up memory
del lat_list, lon_list, lat_grid, lon_grid, dates, dates_str, sunlight, sunlight_xr
# load file
sunlight = xr.open_dataset(sunlight_file)
# extract kelp metrics
kelp_data = extract_kelp_metrics(data, None, sunlight, lower_lat, upper_lat)
# save to pkl file
with open(f"Data/kelp_metrics_{lower_lat}_{upper_lat}.pkl", "wb") as f:
joblib.dump(kelp_data, f)
print(f"Data/kelp_metrics_{lower_lat}_{upper_lat}.pkl saved")
print(kelp_data.keys())
del sunlight, data
return kelp_data
if __name__ == "__main__":
# cmd line args
parser = argparse.ArgumentParser()
# upper lat limit
parser.add_argument('-u', '--upper_lat', type=int,
help='upper latitude limit',
default=37)
# lower lat limit
parser.add_argument('-l', '--lower_lat', type=int,
help='lower latitude limit',
default=27)
args = parser.parse_args()
# run main function
main(args.lower_lat, args.upper_lat)