From 6ae57e8bc6ee3bfe72db5bf0254b7ce7849c7000 Mon Sep 17 00:00:00 2001 From: trchudley Date: Mon, 29 Jul 2024 13:52:04 +0100 Subject: [PATCH 1/3] increase memory efficiency of coreg/geomorph --- README.md | 2 +- batch/batch_download_and_coregister.py | 51 ++++++++++++++++++++------ src/pdemtools/__init__.py | 2 +- src/pdemtools/_coreg.py | 23 +++++++----- src/pdemtools/_geomorphometry.py | 20 +++++----- 5 files changed, 66 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 2eb0977..58370c4 100644 --- a/README.md +++ b/README.md @@ -20,7 +20,7 @@ The second aim is to provide (pre)processing functions _specific_ to the sort of - Simple coregistration for quick elevation change analysis. - Identifying/masking sea level and icebergs. -Rather than introducing custom classes, pDEMtools will always try and return DEM data as an [xarray](https://docs.xarray.dev/en/stable/) DataArray with geospatial metadata via the [rioxarray](https://corteva.github.io/rioxarray/stable/) extension. The aim is to allow the user to quickly move beyond pDEMtools into their own analysis in whatever format they desire, be that `xarray`, `numpy` or `dask` datasets, DEM-specific Python packages such as [`xdem`](https://github.com/GlacioHack/xdem) for advanced coregistration or [`richdem`](https://github.com/r-barnes/richdem) for flow analysis, or exporting to geospatial file formats for analysis beyond Python. +Rather than introducing custom classes, pDEMtools will always try and return DEM data as an [`xarray`](https://docs.xarray.dev/en/stable/) DataArray with geospatial metadata via the [`rioxarray`](https://corteva.github.io/rioxarray/stable/) extension. The aim is to allow the user to quickly move beyond pDEMtools into their own analysis in whatever format they desire, be that `xarray`, `numpy` or `dask` datasets, DEM-specific Python packages such as [`xdem`](https://github.com/GlacioHack/xdem) for advanced coregistration or [`richdem`](https://github.com/r-barnes/richdem) for flow analysis, or exporting to geospatial file formats for analysis beyond Python. Contact: thomas.r.chudley@durham.ac.uk diff --git a/batch/batch_download_and_coregister.py b/batch/batch_download_and_coregister.py index 9577fbb..1bda538 100644 --- a/batch/batch_download_and_coregister.py +++ b/batch/batch_download_and_coregister.py @@ -5,22 +5,31 @@ The script will download the 2 m ArcticDEM or REMA mosaic for coregistration, and then loop through the strip record, downloading and coregistering against stable ground -identified within the BedMachine mask. +identified within the BedMachine mask. If coregistration is succesful, the file will +be saved ending *_coreg.tif. If coregistration fails (most usually because no stable +bedrock is available in the strip), the filename will not have `_coreg` appended. Note: this will only work in regions of Greenland and Antarctica where bare rock -is exposed. If you require coregistration in other regions, please drop me an email - -I would be happy to help. +is exposed. If you require coregistration in regions outside of Greenland or Antarctica, please +drop me an email - I would be happy to help. Tom Chudley | thomas.r.chudley@durham.ac.uk Durham University -November 2023 + +v1 | 2023-11 | Initial script +v2 | 2024-07 | Updated to make *_coreg.tif fname dependent on sucessful coregistration. + """ import os + import rioxarray as rxr import numpy as np import pdemtools as pdt +from glob import glob +from math import isnan + # ----------------------------------------------------------------------------------- # # EDIT THIS SECTION TO SPECIFY DEPENDENT FILES, THE REGION, AND BOUNDS OF INTEREST # ----------------------------------------------------------------------------------- # @@ -68,9 +77,9 @@ reference_dem = pdt.load.mosaic( dataset=dataset, # must be `arcticdem` or `rema` - resolution=2, # must be 2, 10, or 32 - bounds=bounds, # (xmin, ymin, xmax, ymax) or shapely geometry - version="v4.1", # optional: desired version (defaults to most recent) + resolution=2, # must be 2, 10, or 32 + bounds=bounds, # (xmin, ymin, xmax, ymax) or shapely geometry + version="v4.1", # optional: desired version (defaults to most recent) ) reference_dem.rio.to_raster( reference_dem_fpath, compress="ZSTD", predictor=3, zlevel=1 @@ -104,7 +113,7 @@ print(f"{n_strips} strips found") -i = 0 +i = 1 print("\nDownloading DEM strips...") for _, row in gdf.iterrows(): @@ -112,14 +121,34 @@ date_str = date.strftime("%Y%m%d") dem_id = row.dem_id - out_fpath = os.path.join(outdir, f"{date_str}_{dem_id}_coreg.tif") + out_fname = os.path.join(outdir, f"{date_str}_{dem_id}") + + # If the file doesn't yet exist, download. + if len(glob(f'{out_fname}*')) == 0: - if not os.path.exists(out_fpath): print(f"\nDownloading {i}/{n_strips} {os.path.basename(out_fpath)}...") + + # download DEM dem = pdt.load.from_search(row, bounds=bounds, bitmask=True) dem.compute() # rioxarray uses lazy evaluation, so we can force the download using the `.compute()` function. + + # pad to full size of AOI dem = dem.rio.pad_box(*bounds, constant_values=np.nan) - dem = dem.pdt.coregister(reference_dem, bedrock_mask, max_horiz_offset=50) + + # coregister DEM, with return_stats=True. + dem = dem.pdt.coregister(reference_dem, bedrock_mask, max_horiz_offset=50, return_stats=True) + + # return_stats=True, so extract DEM as well as RMSE (which will be NaN if the coregisteration failed) + rmse = dem[-1] + dem = dem[0] + + # check whether coreg worked, and construct filename appropriately + if isnan(rmse) == True: + out_fpath = out_fname + '.tif' + else: + out_fpath = out_fname + '_coreg.tif' + + # Export to geotiff dem.rio.to_raster(out_fpath, compress="ZSTD", predictor=3, zlevel=1) del dem diff --git a/src/pdemtools/__init__.py b/src/pdemtools/__init__.py index 0415759..81d1bb7 100644 --- a/src/pdemtools/__init__.py +++ b/src/pdemtools/__init__.py @@ -6,6 +6,6 @@ from ._accessor import DemAccessor -__version__ = "0.8.3" +__version__ = "0.8.4" __all__ = ["search", "DemAccessor"] diff --git a/src/pdemtools/_coreg.py b/src/pdemtools/_coreg.py index 8344985..e915b07 100644 --- a/src/pdemtools/_coreg.py +++ b/src/pdemtools/_coreg.py @@ -81,9 +81,9 @@ def coregisterdems( # Apply offsets if pn[1] != 0 and pn[2] != 0: - dem2n = shift_dem(dem2, pn.T[0], x, y) + dem2n = shift_dem(dem2, pn.T[0], x, y).astype('float32') else: - dem2n = dem2 - pn[0].astype(np.float32) + dem2n = dem2 - pn[0].astype('float32') # # Calculate slopes - original method from PGC # sy, sx = np.gradient(dem2n, res) @@ -159,6 +159,7 @@ def coregisterdems( # Build design matrix. X = np.column_stack((np.ones(n_count, dtype=np.float32), sx[n], sy[n])) + sx, sy = None, None # release for data amangement # Solve for new adjustment. p1 = np.reshape(np.linalg.lstsq(X, dz[n], rcond=None)[0], (-1, 1)) @@ -171,6 +172,8 @@ def coregisterdems( r = dz[n] - yhat.T[0] # residuals normr = np.linalg.norm(r) + dz = None # release for memory managment + rmse = normr / np.sqrt(nu) tval = stats.t.ppf((1 - 0.32 / 2), nu) @@ -286,18 +289,20 @@ def interp2_gdal(X, Y, Z, Xi, Yi, interp_str, extrapolate=False, oob_val=np.nan) "", interp_gdal, ) - gdal.ReprojectImage( - ds_in, - ds_out, - "", - "", - interp_gdal, - ) + # gdal.ReprojectImage( + # ds_in, + # ds_out, + # "", + # "", + # interp_gdal, + # ) Zi = ds_out.GetRasterBand(1).ReadAsArray() if not extrapolate: interp2_fill_oob(X, Y, Zi, Xi, Yi, oob_val) + + ds_in, ds_out = None, None return Zi diff --git a/src/pdemtools/_geomorphometry.py b/src/pdemtools/_geomorphometry.py index e6df4f1..bf39499 100644 --- a/src/pdemtools/_geomorphometry.py +++ b/src/pdemtools/_geomorphometry.py @@ -18,7 +18,7 @@ def _p_zt(z, w): @numba.njit def p_zt(z, w=2): - return _p_zt(z, w) + return _p_zt(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -28,7 +28,7 @@ def _q_zt(z, w): @numba.njit def q_zt(z, w=2): - return _q_zt(z, w) + return _q_zt(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -38,7 +38,7 @@ def _r_zt(z, w): @numba.njit def r_zt(z, w=2): - return _r_zt(z, w) + return _r_zt(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -48,7 +48,7 @@ def _t_zt(z, w): @numba.njit def t_zt(z, w=2): - return _t_zt(z, w) + return _t_zt(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -58,7 +58,7 @@ def _s_zt(z, w): @numba.njit def s_zt(z, w=2): - return _s_zt(z, w) + return _s_zt(z, w).astype('float32') """ @@ -85,7 +85,7 @@ def _p_f(z, w): @numba.njit def p_f(z, w=2): - return _p_f(z, w) + return _p_f(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -107,7 +107,7 @@ def _q_f(z, w): @numba.njit def q_f(z, w=2): - return _q_f(z, w) + return _q_f(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -142,7 +142,7 @@ def _r_f(z, w): @numba.njit def r_f(z, w=2): - return _r_f(z, w) + return _r_f(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -177,7 +177,7 @@ def _t_f(z, w): @numba.njit def t_f(z, w=2): - return _t_f(z, w) + return _t_f(z, w).astype('float32') @numba.stencil(func_or_mode="constant", cval=np.nan) @@ -204,7 +204,7 @@ def _s_f(z, w): @numba.njit def s_f(z, w=2): - return _s_f(z, w) + return _s_f(z, w).astype('float32') """ From b9f43f9dac95fa0a4d7687944e732745350336ae Mon Sep 17 00:00:00 2001 From: trchudley Date: Mon, 29 Jul 2024 14:35:39 +0100 Subject: [PATCH 2/3] update tests to account for newer float32 approach --- tests/test_processing.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/test_processing.py b/tests/test_processing.py index dab19b8..60d201c 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -78,7 +78,7 @@ def test_geomorphometry(): variable_min_max = { "slope": (0.0, 24.721811294555664), - "aspect": (0.0, 360.0), + "aspect": (1.3660377589985728e-05, 359.9999694824219), #(0.0, 360.0), "hillshade": (0.0, 1.0), "horizontal_curvature": (-0.0007999989320524037, 0.0007856095326133072), "vertical_curvature": (-0.027519449591636658, 0.022581202909350395), @@ -95,6 +95,7 @@ def test_geomorphometry(): for variables in variable_min_max.keys(): min_val = dem[variables].min().item() max_val = dem[variables].max().item() + print(f'{variables} | min: {min_val} | max: {max_val}') assert_allclose(min_val, variable_min_max[variables][0]) assert_allclose(max_val, variable_min_max[variables][1]) @@ -113,7 +114,7 @@ def test_coregistration(): diff_coreg = (dem_2_coreg - dem_1).mean().item() expected_diff = 15.476494902013904 - expected_diff_coreg = 0.19603339082138396 + expected_diff_coreg = 0.19603341170854155 # print(diff, corrected_diff) From 7c18d50dcef2dca67a631b62918424bcf9aa1c86 Mon Sep 17 00:00:00 2001 From: trchudley Date: Mon, 29 Jul 2024 14:38:38 +0100 Subject: [PATCH 3/3] update tests to account for newer float32 approach --- tests/test_processing.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_processing.py b/tests/test_processing.py index 60d201c..37c5a74 100644 --- a/tests/test_processing.py +++ b/tests/test_processing.py @@ -78,7 +78,7 @@ def test_geomorphometry(): variable_min_max = { "slope": (0.0, 24.721811294555664), - "aspect": (1.3660377589985728e-05, 359.9999694824219), #(0.0, 360.0), + "aspect": (0.0, 360.0), # (1.3660377589985728e-05, 359.9999694824219), "hillshade": (0.0, 1.0), "horizontal_curvature": (-0.0007999989320524037, 0.0007856095326133072), "vertical_curvature": (-0.027519449591636658, 0.022581202909350395),