From a6f641aa214b33f860f396eaedd166d831f65a9e Mon Sep 17 00:00:00 2001 From: Arijus Pleska Date: Thu, 28 Sep 2017 13:04:01 +0300 Subject: [PATCH] [MaxVar split, Part 1] Added the general Gaussian noise model. (#233) * [MaxVar, Part 1] Added the general Gaussian noise model. * Removed the covariance matrix duplicates. * Set the priors to be always accustomed to the true parameters' values. --- CHANGELOG.rst | 1 + elfi/examples/bignk.py | 2 +- elfi/examples/gauss.py | 131 ++++++++++++++++++----- elfi/examples/gnk.py | 17 +-- elfi/methods/post_processing.py | 4 +- tests/conftest.py | 1 + tests/functional/test_post_processing.py | 30 +++--- tests/unit/test_examples.py | 20 +++- 8 files changed, 154 insertions(+), 52 deletions(-) diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 976827a5..3ac6b4e3 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -5,6 +5,7 @@ dev --- - Furhther performance improvements for rerunning inference using stored data via caches +- Added the general Gaussian noise example model (fixed covariance) 0.6.2 (2017-09-06) ------------------ diff --git a/elfi/examples/bignk.py b/elfi/examples/bignk.py index 7c1db509..d7feeca7 100644 --- a/elfi/examples/bignk.py +++ b/elfi/examples/bignk.py @@ -98,7 +98,7 @@ def BiGNK(a1, a2, b1, b2, g1, g2, k1, k2, rho, c=.8, n_obs=150, batch_size=1, term_product_misaligned = np.swapaxes(term_product, 1, 0) y_misaligned = np.add(a, term_product_misaligned) y = np.swapaxes(y_misaligned, 1, 0) - # print(y.shape) + return y diff --git a/elfi/examples/gauss.py b/elfi/examples/gauss.py index 2e2b8091..89557cc8 100644 --- a/elfi/examples/gauss.py +++ b/elfi/examples/gauss.py @@ -1,4 +1,4 @@ -"""An example implementation of a Gaussian noise model.""" +"""Example implementations of Gaussian noise models.""" from functools import partial @@ -6,10 +6,11 @@ import scipy.stats as ss import elfi +from elfi.examples.gnk import euclidean_multidim -def Gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None): - """Sample the Gaussian distribution. +def gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None): + """Sample the 1-D Gaussian distribution. Parameters ---------- @@ -17,14 +18,61 @@ def Gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None): sigma : float, array_like n_obs : int, optional batch_size : int, optional - random_state : RandomState, optional + random_state : np.random.RandomState, optional + + Returns + ------- + y_obs : array_like + 1-D observation. + + """ + # Handling batching. + batches_mu = np.asanyarray(mu).reshape((-1, 1)) + batches_sigma = np.asanyarray(sigma).reshape((-1, 1)) + + # Sampling observations. + y_obs = ss.norm.rvs(loc=batches_mu, scale=batches_sigma, + size=(batch_size, n_obs), random_state=random_state) + return y_obs + + +def gauss_nd_mean(*mu, cov_matrix, n_obs=15, batch_size=1, random_state=None): + """Sample an n-D Gaussian distribution. + + Parameters + ---------- + *mu : array_like + Mean parameters. + cov_matrix : array_like + Covariance matrix. + n_obs : int, optional + batch_size : int, optional + random_state : np.random.RandomState, optional + + Returns + ------- + y_obs : array_like + n-D observation. """ - # Standardising the parameter's format. - mu = np.asanyarray(mu).reshape((-1, 1)) - sigma = np.asanyarray(sigma).reshape((-1, 1)) - y = ss.norm.rvs(loc=mu, scale=sigma, size=(batch_size, n_obs), random_state=random_state) - return y + n_dim = len(mu) + + # Handling batching. + batches_mu = np.zeros(shape=(batch_size, n_dim)) + for idx_dim, param_mu in enumerate(mu): + batches_mu[:, idx_dim] = param_mu + + # Sampling the observations. + y_obs = np.zeros(shape=(batch_size, n_obs, n_dim)) + for idx_batch in range(batch_size): + y_batch = ss.multivariate_normal.rvs(mean=batches_mu[idx_batch], + cov=cov_matrix, + size=n_obs, + random_state=random_state) + if n_dim == 1: + y_batch = y_batch[:, np.newaxis] + y_obs[idx_batch, :, :] = y_batch + return y_obs def ss_mean(x): @@ -39,36 +87,71 @@ def ss_var(x): return ss -def get_model(n_obs=50, true_params=None, seed_obs=None): - """Return a complete Gaussian noise model. +def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matrix=None): + """Return a Gaussian noise model. Parameters ---------- n_obs : int, optional - the number of observations true_params : list, optional - true_params[0] corresponds to the mean, - true_params[1] corresponds to the standard deviation + Default parameter settings. seed_obs : int, optional - seed for the observed data generation + Seed for the observed data generation. + nd_mean : bool, optional + Option to use an n-D mean Gaussian noise model. + cov_matrix : None, optional + Covariance matrix, a requirement for the nd_mean model. Returns ------- m : elfi.ElfiModel """ + # Defining the default settings. if true_params is None: - true_params = [10, 2] + if nd_mean: + true_params = [4, 4] # 2-D mean. + else: + true_params = [4, .4] # mean and standard deviation. - y_obs = Gauss(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) - sim_fn = partial(Gauss, n_obs=n_obs) + # Choosing the simulator for both observations and simulations. + if nd_mean: + sim_fn = partial(gauss_nd_mean, cov_matrix=cov_matrix, n_obs=n_obs) + else: + sim_fn = partial(gauss, n_obs=n_obs) + + # Obtaining the observations. + y_obs = sim_fn(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs)) m = elfi.ElfiModel() - elfi.Prior('uniform', -10, 50, model=m, name='mu') - elfi.Prior('truncnorm', 0.01, 5, model=m, name='sigma') - elfi.Simulator(sim_fn, m['mu'], m['sigma'], observed=y_obs, name='Gauss') - elfi.Summary(ss_mean, m['Gauss'], name='S1') - elfi.Summary(ss_var, m['Gauss'], name='S2') - elfi.Distance('euclidean', m['S1'], m['S2'], name='d') + + # Initialising the priors. + eps_prior = 5 # The longest distance from the median of an initialised prior's distribution. + priors = [] + if nd_mean: + n_dim = len(true_params) + for i in range(n_dim): + name_prior = 'mu_{}'.format(i) + prior_mu = elfi.Prior('uniform', true_params[i] - eps_prior, + 2 * eps_prior, model=m, name=name_prior) + priors.append(prior_mu) + else: + priors.append(elfi.Prior('uniform', true_params[0] - eps_prior, + 2 * eps_prior, model=m, name='mu')) + priors.append(elfi.Prior('truncnorm', + np.amax([.01, true_params[1] - eps_prior]), + 2 * eps_prior, model=m, name='sigma')) + elfi.Simulator(sim_fn, *priors, observed=y_obs, name='gauss') + + # Initialising the summary statistics. + sumstats = [] + sumstats.append(elfi.Summary(ss_mean, m['gauss'], name='ss_mean')) + sumstats.append(elfi.Summary(ss_var, m['gauss'], name='ss_var')) + + # Choosing the discrepancy metric. + if nd_mean: + elfi.Discrepancy(euclidean_multidim, *sumstats, name='d') + else: + elfi.Distance('euclidean', *sumstats, name='d') return m diff --git a/elfi/examples/gnk.py b/elfi/examples/gnk.py index eeabb4df..80337c0c 100644 --- a/elfi/examples/gnk.py +++ b/elfi/examples/gnk.py @@ -134,11 +134,11 @@ def euclidean_multidim(*simulated, observed): array_like """ - pts_sim = np.column_stack(simulated) - pts_obs = np.column_stack(observed) - d_multidim = np.sum((pts_sim - pts_obs)**2., axis=1) - d_squared = np.sum(d_multidim, axis=1) - d = np.sqrt(d_squared) + pts_sim = np.stack(simulated, axis=1) + pts_obs = np.stack(observed, axis=1) + d_ss_merged = np.sum((pts_sim - pts_obs)**2., axis=1) + d_dim_merged = np.sum(d_ss_merged, axis=1) + d = np.sqrt(d_dim_merged) return d @@ -185,8 +185,8 @@ def ss_robust(y): ss_g = _get_ss_g(y) ss_k = _get_ss_k(y) - ss_robust = np.stack((ss_a, ss_b, ss_g, ss_k), axis=1) - + # Combining the summary statistics by expanding the dimensionality. + ss_robust = np.hstack((ss_a, ss_b, ss_g, ss_k)) return ss_robust @@ -209,7 +209,8 @@ def ss_octile(y): octiles = np.linspace(12.5, 87.5, 7) E1, E2, E3, E4, E5, E6, E7 = np.percentile(y, octiles, axis=1) - ss_octile = np.stack((E1, E2, E3, E4, E5, E6, E7), axis=1) + # Combining the summary statistics by expanding the dimensionality. + ss_octile = np.hstack((E1, E2, E3, E4, E5, E6, E7)) return ss_octile diff --git a/elfi/methods/post_processing.py b/elfi/methods/post_processing.py index 384c7abc..cc3ba785 100644 --- a/elfi/methods/post_processing.py +++ b/elfi/methods/post_processing.py @@ -242,8 +242,8 @@ def adjust_posterior(sample, model, summary_names, parameter_names=None, adjustm >>> import elfi >>> from elfi.examples import gauss >>> m = gauss.get_model() - >>> res = elfi.Rejection(m['d'], output_names=['S1', 'S2']).sample(1000) - >>> adj = adjust_posterior(res, m, ['S1', 'S2'], ['mu'], LinearAdjustment()) + >>> res = elfi.Rejection(m['d'], output_names=['ss_mean', 'ss_var']).sample(1000) + >>> adj = adjust_posterior(res, m, ['ss_mean', 'ss_var'], ['mu'], LinearAdjustment()) """ adjustment = _get_adjustment(adjustment) diff --git a/tests/conftest.py b/tests/conftest.py index 2f749a3c..7213e864 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -9,6 +9,7 @@ import elfi.clients.ipyparallel as eipp import elfi.clients.multiprocessing as mp import elfi.clients.native as native +import elfi.examples.gauss import elfi.examples.ma2 elfi.clients.native.set_as_default() diff --git a/tests/functional/test_post_processing.py b/tests/functional/test_post_processing.py index bcce3e95..a7e2e0da 100644 --- a/tests/functional/test_post_processing.py +++ b/tests/functional/test_post_processing.py @@ -28,9 +28,9 @@ def test_single_parameter_linear_adjustment(): # Hyperparameters mu0, sigma0 = (10, 100) - y_obs = gauss.Gauss( + y_obs = gauss.gauss( mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed)) - sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs) + sim_fn = partial(gauss.gauss, sigma=sigma, n_obs=n_obs) # Posterior n = y_obs.shape[1] @@ -40,12 +40,12 @@ def test_single_parameter_linear_adjustment(): # Model m = elfi.ElfiModel() elfi.Prior('norm', mu0, sigma0, model=m, name='mu') - elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss') - elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1') - elfi.Distance('euclidean', m['S1'], name='d') + elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='gauss') + elfi.Summary(lambda x: x.mean(axis=1), m['gauss'], name='ss_mean') + elfi.Distance('euclidean', m['ss_mean'], name='d') - res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1) - adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['S1']) + res = elfi.Rejection(m['d'], output_names=['ss_mean'], seed=seed).sample(1000, threshold=1) + adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['ss_mean']) assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544)) @@ -61,9 +61,9 @@ def test_nonfinite_values(): # Hyperparameters mu0, sigma0 = (10, 100) - y_obs = gauss.Gauss( + y_obs = gauss.gauss( mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed)) - sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs) + sim_fn = partial(gauss.gauss, sigma=sigma, n_obs=n_obs) # Posterior n = y_obs.shape[1] @@ -73,19 +73,19 @@ def test_nonfinite_values(): # Model m = elfi.ElfiModel() elfi.Prior('norm', mu0, sigma0, model=m, name='mu') - elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss') - elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1') - elfi.Distance('euclidean', m['S1'], name='d') + elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='gauss') + elfi.Summary(lambda x: x.mean(axis=1), m['gauss'], name='ss_mean') + elfi.Distance('euclidean', m['ss_mean'], name='d') - res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1) + res = elfi.Rejection(m['d'], output_names=['ss_mean'], seed=seed).sample(1000, threshold=1) # Add some invalid values res.outputs['mu'] = np.append(res.outputs['mu'], np.array([np.inf])) - res.outputs['S1'] = np.append(res.outputs['S1'], np.array([np.inf])) + res.outputs['ss_mean'] = np.append(res.outputs['ss_mean'], np.array([np.inf])) with pytest.warns(UserWarning): adj = elfi.adjust_posterior( - model=m, sample=res, parameter_names=['mu'], summary_names=['S1']) + model=m, sample=res, parameter_names=['mu'], summary_names=['ss_mean']) assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544)) diff --git a/tests/unit/test_examples.py b/tests/unit/test_examples.py index 92c6feff..6e0f09bb 100644 --- a/tests/unit/test_examples.py +++ b/tests/unit/test_examples.py @@ -41,12 +41,28 @@ def test_bdm(): if do_cleanup: os.system('rm {}/bdm'.format(cpp_path)) - -def test_Gauss(): +def test_gauss(): m = gauss.get_model() rej = elfi.Rejection(m, m['d'], batch_size=10) rej.sample(20) +def test_gauss_1d_mean(): + params_true = [4] + cov_matrix = [1] + + m = gauss.get_model(true_params=params_true, nd_mean=True, cov_matrix=cov_matrix) + rej = elfi.Rejection(m, m['d'], batch_size=10) + rej.sample(20) + + +def test_gauss_2d_mean(): + params_true = [4, 4] + cov_matrix = [[1, .5], [.5, 1]] + + m = gauss.get_model(true_params=params_true, nd_mean=True, cov_matrix=cov_matrix) + rej = elfi.Rejection(m, m['d'], batch_size=10) + rej.sample(20) + def test_Ricker(): m = ricker.get_model()