From 8521a5342e583d451c27c2b6370999a7c7a3c6e2 Mon Sep 17 00:00:00 2001 From: mats-knmi <145579783+mats-knmi@users.noreply.github.com> Date: Mon, 7 Oct 2024 19:24:45 +0200 Subject: [PATCH] allow for significantly less memory usage in steps blending (#435) * allow for significantly less memory usage in steps blending * apply black * use threadpool * only decompose if not already done * fix black * update docstring * fix one more docstring and rename some variables * Apply suggestions from code review Co-authored-by: Ruben Imhoff <31476760+RubenImhoff@users.noreply.github.com> * rename precip_models_pm * move comment about memory reduction to velocity_models * fix zerovalue precip_cascade * use precip_models_cascade if present * set done to true * fix black * revert to old logic, but now with precip_models_cascade using new loop * fix issue with precip_models_cascade * fix tests * Move comment up a little --------- Co-authored-by: Ruben Imhoff <31476760+RubenImhoff@users.noreply.github.com> Co-authored-by: Daniele Nerini --- pysteps/blending/steps.py | 289 ++++++++++++++++++++------------------ 1 file changed, 152 insertions(+), 137 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 7e4535dd..a6ecc09f 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -46,6 +46,8 @@ import math import time from copy import deepcopy +from functools import partial +from multiprocessing.pool import ThreadPool import numpy as np from scipy.linalg import inv @@ -117,20 +119,29 @@ def forecast( ordered by timestamp from oldest to newest. The time steps between the inputs are assumed to be regular. precip_models: array-like - Array of shape (n_models,timesteps+1) containing, per timestep (t=0 to - lead time here) and per (NWP) model or model ensemble member, a - dictionary with a list of cascades obtained by calling a method - implemented in :py:mod:`pysteps.cascade.decomposition`. In case of one - (deterministic) model as input, add an extra dimension to make sure - precip_models is five dimensional prior to calling this function. - It is also possible to supply the original (NWP) model forecasts containing - rainfall fields as an array of shape (n_models,timestep+1,m,n), which will - then be decomposed in this function. Note that for an operational application - or for testing with multiple model runs, it is recommended to decompose - the model forecasts outside beforehand, as this reduces calculation times. + Either raw (NWP) model forecast data or decomposed (NWP) model forecast data. + If you supply decomposed data, it needs to be an array of shape + (n_models,timesteps+1) containing, per timestep (t=0 to lead time here) and + per (NWP) model or model ensemble member, a dictionary with a list of cascades + obtained by calling a method implemented in :py:mod:`pysteps.cascade.decomposition`. + If you supply the original (NWP) model forecast data, it needs to be an array of shape + (n_models,timestep+1,m,n) containing precipitation (or other) fields, which will + then be decomposed in this function. + + Depending on your use case it can be advantageous to decompose the model + forecasts outside beforehand, as this slightly reduces calculation times. This is possible with :py:func:`pysteps.blending.utils.decompose_NWP`, :py:func:`pysteps.blending.utils.compute_store_nwp_motion`, and - :py:func:`pysteps.blending.utils.load_NWP`. + :py:func:`pysteps.blending.utils.load_NWP`. However, if you have a lot of (NWP) model + members (e.g. 1 model member per nowcast member), this can lead to excessive memory + usage. + + To further reduce memory usage, both this array and the ``velocity_models`` array + can be given as float32. They will then be converted to float64 before computations + to minimize loss in precision. + + In case of one (deterministic) model as input, add an extra dimension to make sure + precip_models is four dimensional prior to calling this function. velocity: array-like Array of shape (2,m,n) containing the x- and y-components of the advection field. The velocities are assumed to represent one time step between the @@ -139,6 +150,10 @@ def forecast( Array of shape (n_models,timestep,2,m,n) containing the x- and y-components of the advection field for the (NWP) model field per forecast lead time. All values are required to be finite. + + To reduce memory usage, this array + can be given as float32. They will then be converted to float64 before computations + to minimize loss in precision. timesteps: int or list of floats Number of time steps to forecast or a list of time steps for which the forecasts are computed (relative to the input time step). The elements of @@ -557,22 +572,19 @@ def forecast( fft, ) - # 2.2 If necessary, decompose (NWP) model forecasts and stack cascades - ( - precip_models_cascade, - mu_models, - sigma_models, - precip_models_pm, - ) = _compute_cascade_decomposition_nwp( - precip_models, bp_filter, decompositor, recompositor, fft, domain - ) + # 2.2 If necessary, recompose (NWP) model forecasts + precip_models_cascade = None + + if precip_models.ndim != 4: + precip_models_cascade = precip_models + precip_models = _compute_cascade_recomposition_nwp(precip_models, recompositor) # 2.3 Check for zero input fields in the radar and NWP data. zero_precip_radar = blending.utils.check_norain(precip, precip_thr, norain_thr) # The norain fraction threshold used for nwp is the default value of 0.0, # since nwp does not suffer from clutter. zero_model_fields = blending.utils.check_norain( - precip_models_pm, precip_thr, norain_thr + precip_models, precip_thr, norain_thr ) if isinstance(timesteps, int): @@ -583,7 +595,7 @@ def forecast( timesteps = nowcast_utils.binned_timesteps(original_timesteps) timestep_type = "list" - # 2.3.1 If precip is below the norain threshold and precip_models_pm is zero, + # 2.3.1 If precip is below the norain threshold and precip_models is zero, # we consider it as no rain in the domain. # The forecast will directly return an array filled with the minimum # value present in precip (which equals zero rainfall in the used @@ -633,37 +645,41 @@ def forecast( # 2.3.3 If zero_precip_radar, make sure that precip_cascade does not contain # only nans or infs. If so, fill it with the zero value. if zero_precip_radar: - precip_cascade = np.nan_to_num( - precip_cascade, - copy=True, - nan=np.nanmin(precip_models_cascade), - posinf=np.nanmin(precip_models_cascade), - neginf=np.nanmin(precip_models_cascade), - ) - - # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so, - # fill these with the minimum value present in precip (corresponding to - # zero rainfall in the radar observations) - ( - precip_models_cascade, - precip_models_pm, - mu_models, - sigma_models, - ) = _fill_nans_infs_nwp_cascade( - precip_models_cascade, - precip_models_pm, - precip_cascade, - precip, - mu_models, - sigma_models, - ) + # Look for a timestep and member with rain so that we have a sensible decomposition + done = False + for t in timesteps: + if done: + break + for j in range(precip_models.shape[0]): + if not blending.utils.check_norain( + precip_models[j, t], precip_thr, norain_thr + ): + if precip_models_cascade is not None: + precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin( + precip_models_cascade[j, t]["cascade_levels"] + ) + continue + precip_models_cascade_temp = decompositor( + precip_models[j, t, :, :], + bp_filter=bp_filter, + fft_method=fft, + output_domain=domain, + normalize=True, + compute_stats=True, + compact_output=True, + )["cascade_levels"] + precip_cascade[~np.isfinite(precip_cascade)] = np.nanmin( + precip_models_cascade_temp + ) + done = True + break # 2.3.5 If zero_precip_radar is True, only use the velocity field of the NWP # forecast. I.e., velocity (radar) equals velocity_model at the first time # step. if zero_precip_radar: # Use the velocity from velocity_models at time step 0 - velocity = velocity_models[:, 0, :, :, :] + velocity = velocity_models[:, 0, :, :, :].astype(np.float64, copy=False) # Take the average over the first axis, which corresponds to n_models # (hence, the model average) velocity = np.mean(velocity, axis=0) @@ -675,7 +691,7 @@ def forecast( # rainfall data if zero_precip_radar: precip_noise_input = _determine_max_nr_rainy_cells_nwp( - precip_models_pm, precip_thr, precip_models_pm.shape[0], timesteps + precip_models, precip_thr, precip_models.shape[0], timesteps ) # Make sure precip_noise_input is three dimensional if len(precip_noise_input.shape) != 3: @@ -809,23 +825,80 @@ def forecast( if measure_time: starttime = time.time() + if precip_models_cascade is not None: + decomp_precip_models = list(precip_models_cascade[:, t]) + else: + if precip_models.shape[0] == 1: + decomp_precip_models = [ + decompositor( + precip_models[0, t, :, :], + bp_filter=bp_filter, + fft_method=fft, + output_domain=domain, + normalize=True, + compute_stats=True, + compact_output=True, + ) + ] + else: + with ThreadPool(num_workers) as pool: + decomp_precip_models = pool.map( + partial( + decompositor, + bp_filter=bp_filter, + fft_method=fft, + output_domain=domain, + normalize=True, + compute_stats=True, + compact_output=True, + ), + list(precip_models[:, t, :, :]), + ) + + precip_models_cascade_temp = np.array( + [decomp["cascade_levels"] for decomp in decomp_precip_models] + ) + mu_models_temp = np.array( + [decomp["means"] for decomp in decomp_precip_models] + ) + sigma_models_temp = np.array( + [decomp["stds"] for decomp in decomp_precip_models] + ) + + # 2.3.4 Check if the NWP fields contain nans or infinite numbers. If so, + # fill these with the minimum value present in precip (corresponding to + # zero rainfall in the radar observations) + ( + precip_models_cascade_temp, + precip_models_temp, + mu_models_temp, + sigma_models_temp, + ) = _fill_nans_infs_nwp_cascade( + precip_models_cascade_temp, + precip_models[:, t, :, :].astype(np.float64, copy=False), + precip_cascade, + precip, + mu_models_temp, + sigma_models_temp, + ) + # 8.1.1 Before calling the worker for the forecast loop, determine which (NWP) # models will be combined with which nowcast ensemble members. With the # way it is implemented at this moment: n_ens_members of the output equals # the maximum number of (ensemble) members in the input (either the nowcasts or NWP). ( precip_models_cascade_temp, - precip_models_pm_temp, + precip_models_temp, velocity_models_temp, mu_models_temp, sigma_models_temp, n_model_indices, ) = _find_nwp_combination( - precip_models_cascade[:, t, :, :, :], - precip_models_pm[:, t, :, :], - velocity_models[:, t, :, :, :], - mu_models[:, t, :], - sigma_models[:, t, :], + precip_models_cascade_temp, + precip_models_temp, + velocity_models[:, t, :, :, :].astype(np.float64, copy=False), + mu_models_temp, + sigma_models_temp, n_ens_members, ar_order, n_cascade_levels, @@ -1421,7 +1494,7 @@ def worker(j): R_pm_stacked = np.concatenate( ( R_pm_ep[None, t_index], - precip_models_pm_temp, + precip_models_temp, ), axis=0, ) @@ -1429,7 +1502,7 @@ def worker(j): R_pm_stacked = np.concatenate( ( R_pm_ep[None, t_index], - precip_models_pm_temp[None, j], + precip_models_temp[None, j], ), axis=0, ) @@ -1446,11 +1519,11 @@ def worker(j): weights_pm_normalized_mod_only.reshape( weights_pm_normalized_mod_only.shape[0], 1, 1 ) - * precip_models_pm_temp, + * precip_models_temp, axis=0, ) else: - R_pm_blended_mod_only = precip_models_pm_temp[j] + R_pm_blended_mod_only = precip_models_temp[j] # The extrapolation components are NaN outside the advected # radar domain. This results in NaN values in the blended @@ -1541,7 +1614,7 @@ def worker(j): # that for the probability matching. if probmatching_method is not None and resample_distribution: arr1 = R_pm_ep[t_index] - arr2 = precip_models_pm_temp[j] + arr2 = precip_models_temp[j] # resample weights based on cascade level 2. # Areas where one of the fields is nan are not included. R_pm_resampled = probmatching.resample_distributions( @@ -2043,78 +2116,24 @@ def _compute_cascade_decomposition_radar( return R_c, mu_extrapolation, sigma_extrapolation -def _compute_cascade_decomposition_nwp( - precip_models, bp_filter, decompositor, recompositor, fft, domain -): - """If necessary, decompose (NWP) model forecasts and stack cascades.""" - if precip_models.ndim == 4: - # Keep the model fields for the probability matching later on - precip_models_pm = precip_models.copy() - # Decompose the (NWP) model forecasts - precip_models_cascade = [] - # Loop through the n_models - for i in range(precip_models.shape[0]): - precip_model_cascade = [] - # Loop through the time steps - for j in range(precip_models.shape[1]): - precip_model_cascade.append( - decompositor( - field=precip_models[i, j, :, :], - bp_filter=bp_filter, - fft_method=fft, - output_domain=domain, - normalize=True, - compute_stats=True, - compact_output=True, - ) - ) - precip_models_cascade.append(precip_model_cascade) - precip_models_cascade = np.array(precip_models_cascade) - - precip_models, precip_model_cascade = None, None - else: - precip_models_cascade = precip_models - precip_models_pm = None +def _compute_cascade_recomposition_nwp(precip_models_cascade, recompositor): + """If necessary, recompose (NWP) model forecasts.""" + precip_models = None - # Stack the (NWP) model cascades in separate normalized cascades and return - # the means and sigmas. - # The normalized model cascade should have the shape: - # [n_models, n_timesteps, n_cascade_levels, m, n] + # Recompose the (NWP) model cascades to have rainfall fields per + # model and time step, which will be used in the probability matching steps. + # Recomposed cascade will have shape: [n_models, n_timesteps, m, n] precip_models = [] - mu_models = [] - sigma_models = [] - # Stack it per model and combine that for i in range(precip_models_cascade.shape[0]): - precip_model, mu_model, sigma_model = blending.utils.stack_cascades( - R_d=precip_models_cascade[i, :], donorm=False - ) + precip_model = [] + for time_step in range(precip_models_cascade.shape[1]): + precip_model.append(recompositor(precip_models_cascade[i, time_step])) precip_models.append(precip_model) - mu_models.append(mu_model) - sigma_models.append(sigma_model) precip_models = np.stack(precip_models) - mu_models = np.stack(mu_models) - sigma_models = np.stack(sigma_models) - - precip_model, mu_model, sigma_model = None, None, None - - if precip_models_pm is None: - # Finally, recompose the (NWP) model cascades to have rainfall fields per - # model and time step, which will be used in the probability matching steps. - # Recomposed cascade will have shape: [n_models, n_timesteps, m, n] - precip_models_pm = [] - for i in range(precip_models_cascade.shape[0]): - precip_model_pm = [] - for time_step in range(precip_models_cascade.shape[1]): - precip_model_pm.append( - recompositor(precip_models_cascade[i, time_step]) - ) - precip_models_pm.append(precip_model_pm) - - precip_models_pm = np.stack(precip_models_pm) - precip_model_pm = None + precip_model = None - return precip_models, mu_models, sigma_models, precip_models_pm + return precip_models def _estimate_ar_parameters_radar( @@ -2434,7 +2453,7 @@ def _init_noise_cascade( def _fill_nans_infs_nwp_cascade( precip_models_cascade, - precip_models_pm, + precip_models, precip_cascade, precip, mu_models, @@ -2446,34 +2465,30 @@ def _fill_nans_infs_nwp_cascade( min_cascade = np.nanmin(precip_cascade) min_precip = np.nanmin(precip) precip_models_cascade[~np.isfinite(precip_models_cascade)] = min_cascade - precip_models_pm[~np.isfinite(precip_models_pm)] = min_precip + precip_models[~np.isfinite(precip_models)] = min_precip # Also set any nans or infs in the mean and sigma of the cascade to # respectively 0.0 and 1.0 mu_models[~np.isfinite(mu_models)] = 0.0 sigma_models[~np.isfinite(sigma_models)] = 0.0 - return precip_models_cascade, precip_models_pm, mu_models, sigma_models + return precip_models_cascade, precip_models, mu_models, sigma_models -def _determine_max_nr_rainy_cells_nwp( - precip_models_pm, precip_thr, n_models, timesteps -): +def _determine_max_nr_rainy_cells_nwp(precip_models, precip_thr, n_models, timesteps): """Initialize noise based on the NWP field time step where the fraction of rainy cells is highest""" if precip_thr is None: - precip_thr = np.nanmin(precip_models_pm) + precip_thr = np.nanmin(precip_models) max_rain_pixels = -1 max_rain_pixels_j = -1 max_rain_pixels_t = -1 for j in range(n_models): for t in timesteps: - rain_pixels = precip_models_pm[j][t][ - precip_models_pm[j][t] > precip_thr - ].size + rain_pixels = precip_models[j][t][precip_models[j][t] > precip_thr].size if rain_pixels > max_rain_pixels: max_rain_pixels = rain_pixels max_rain_pixels_j = j max_rain_pixels_t = t - precip_noise_input = precip_models_pm[max_rain_pixels_j][max_rain_pixels_t] + precip_noise_input = precip_models[max_rain_pixels_j][max_rain_pixels_t] - return precip_noise_input + return precip_noise_input.astype(np.float64, copy=False)