From a6f641aa214b33f860f396eaedd166d831f65a9e Mon Sep 17 00:00:00 2001
From: Arijus Pleska <perdaug@users.noreply.github.com>
Date: Thu, 28 Sep 2017 13:04:01 +0300
Subject: [PATCH] [MaxVar split, Part 1] Added the general Gaussian noise
 model. (#233)

* [MaxVar, Part 1] Added the general Gaussian noise model.

* Removed the covariance matrix duplicates.

* Set the priors to be always accustomed to the true parameters' values.
---
 CHANGELOG.rst                            |   1 +
 elfi/examples/bignk.py                   |   2 +-
 elfi/examples/gauss.py                   | 131 ++++++++++++++++++-----
 elfi/examples/gnk.py                     |  17 +--
 elfi/methods/post_processing.py          |   4 +-
 tests/conftest.py                        |   1 +
 tests/functional/test_post_processing.py |  30 +++---
 tests/unit/test_examples.py              |  20 +++-
 8 files changed, 154 insertions(+), 52 deletions(-)

diff --git a/CHANGELOG.rst b/CHANGELOG.rst
index 976827a5..3ac6b4e3 100644
--- a/CHANGELOG.rst
+++ b/CHANGELOG.rst
@@ -5,6 +5,7 @@ dev
 ---
 
 - Furhther performance improvements for rerunning inference using stored data via caches
+- Added the general Gaussian noise example model (fixed covariance)
 
 0.6.2 (2017-09-06)
 ------------------
diff --git a/elfi/examples/bignk.py b/elfi/examples/bignk.py
index 7c1db509..d7feeca7 100644
--- a/elfi/examples/bignk.py
+++ b/elfi/examples/bignk.py
@@ -98,7 +98,7 @@ def BiGNK(a1, a2, b1, b2, g1, g2, k1, k2, rho, c=.8, n_obs=150, batch_size=1,
     term_product_misaligned = np.swapaxes(term_product, 1, 0)
     y_misaligned = np.add(a, term_product_misaligned)
     y = np.swapaxes(y_misaligned, 1, 0)
-    # print(y.shape)
+
     return y
 
 
diff --git a/elfi/examples/gauss.py b/elfi/examples/gauss.py
index 2e2b8091..89557cc8 100644
--- a/elfi/examples/gauss.py
+++ b/elfi/examples/gauss.py
@@ -1,4 +1,4 @@
-"""An example implementation of a Gaussian noise model."""
+"""Example implementations of Gaussian noise models."""
 
 from functools import partial
 
@@ -6,10 +6,11 @@
 import scipy.stats as ss
 
 import elfi
+from elfi.examples.gnk import euclidean_multidim
 
 
-def Gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None):
-    """Sample the Gaussian distribution.
+def gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None):
+    """Sample the 1-D Gaussian distribution.
 
     Parameters
     ----------
@@ -17,14 +18,61 @@ def Gauss(mu, sigma, n_obs=50, batch_size=1, random_state=None):
     sigma : float, array_like
     n_obs : int, optional
     batch_size : int, optional
-    random_state : RandomState, optional
+    random_state : np.random.RandomState, optional
+
+    Returns
+    -------
+    y_obs : array_like
+        1-D observation.
+
+    """
+    # Handling batching.
+    batches_mu = np.asanyarray(mu).reshape((-1, 1))
+    batches_sigma = np.asanyarray(sigma).reshape((-1, 1))
+
+    # Sampling observations.
+    y_obs = ss.norm.rvs(loc=batches_mu, scale=batches_sigma,
+                        size=(batch_size, n_obs), random_state=random_state)
+    return y_obs
+
+
+def gauss_nd_mean(*mu, cov_matrix, n_obs=15, batch_size=1, random_state=None):
+    """Sample an n-D Gaussian distribution.
+
+    Parameters
+    ----------
+    *mu : array_like
+        Mean parameters.
+    cov_matrix : array_like
+        Covariance matrix.
+    n_obs : int, optional
+    batch_size : int, optional
+    random_state : np.random.RandomState, optional
+
+    Returns
+    -------
+    y_obs : array_like
+        n-D observation.
 
     """
-    # Standardising the parameter's format.
-    mu = np.asanyarray(mu).reshape((-1, 1))
-    sigma = np.asanyarray(sigma).reshape((-1, 1))
-    y = ss.norm.rvs(loc=mu, scale=sigma, size=(batch_size, n_obs), random_state=random_state)
-    return y
+    n_dim = len(mu)
+
+    # Handling batching.
+    batches_mu = np.zeros(shape=(batch_size, n_dim))
+    for idx_dim, param_mu in enumerate(mu):
+        batches_mu[:, idx_dim] = param_mu
+
+    # Sampling the observations.
+    y_obs = np.zeros(shape=(batch_size, n_obs, n_dim))
+    for idx_batch in range(batch_size):
+        y_batch = ss.multivariate_normal.rvs(mean=batches_mu[idx_batch],
+                                             cov=cov_matrix,
+                                             size=n_obs,
+                                             random_state=random_state)
+        if n_dim == 1:
+            y_batch = y_batch[:, np.newaxis]
+        y_obs[idx_batch, :, :] = y_batch
+    return y_obs
 
 
 def ss_mean(x):
@@ -39,36 +87,71 @@ def ss_var(x):
     return ss
 
 
-def get_model(n_obs=50, true_params=None, seed_obs=None):
-    """Return a complete Gaussian noise model.
+def get_model(n_obs=50, true_params=None, seed_obs=None, nd_mean=False, cov_matrix=None):
+    """Return a Gaussian noise model.
 
     Parameters
     ----------
     n_obs : int, optional
-        the number of observations
     true_params : list, optional
-        true_params[0] corresponds to the mean,
-        true_params[1] corresponds to the standard deviation
+        Default parameter settings.
     seed_obs : int, optional
-        seed for the observed data generation
+        Seed for the observed data generation.
+    nd_mean : bool, optional
+        Option to use an n-D mean Gaussian noise model.
+    cov_matrix : None, optional
+        Covariance matrix, a requirement for the nd_mean model.
 
     Returns
     -------
     m : elfi.ElfiModel
 
     """
+    # Defining the default settings.
     if true_params is None:
-        true_params = [10, 2]
+        if nd_mean:
+            true_params = [4, 4]  # 2-D mean.
+        else:
+            true_params = [4, .4]  # mean and standard deviation.
 
-    y_obs = Gauss(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs))
-    sim_fn = partial(Gauss, n_obs=n_obs)
+    # Choosing the simulator for both observations and simulations.
+    if nd_mean:
+        sim_fn = partial(gauss_nd_mean, cov_matrix=cov_matrix, n_obs=n_obs)
+    else:
+        sim_fn = partial(gauss, n_obs=n_obs)
+
+    # Obtaining the observations.
+    y_obs = sim_fn(*true_params, n_obs=n_obs, random_state=np.random.RandomState(seed_obs))
 
     m = elfi.ElfiModel()
-    elfi.Prior('uniform', -10, 50, model=m, name='mu')
-    elfi.Prior('truncnorm', 0.01, 5, model=m, name='sigma')
-    elfi.Simulator(sim_fn, m['mu'], m['sigma'], observed=y_obs, name='Gauss')
-    elfi.Summary(ss_mean, m['Gauss'], name='S1')
-    elfi.Summary(ss_var, m['Gauss'], name='S2')
-    elfi.Distance('euclidean', m['S1'], m['S2'], name='d')
+
+    # Initialising the priors.
+    eps_prior = 5  # The longest distance from the median of an initialised prior's distribution.
+    priors = []
+    if nd_mean:
+        n_dim = len(true_params)
+        for i in range(n_dim):
+            name_prior = 'mu_{}'.format(i)
+            prior_mu = elfi.Prior('uniform', true_params[i] - eps_prior,
+                                  2 * eps_prior, model=m, name=name_prior)
+            priors.append(prior_mu)
+    else:
+        priors.append(elfi.Prior('uniform', true_params[0] - eps_prior,
+                                 2 * eps_prior, model=m, name='mu'))
+        priors.append(elfi.Prior('truncnorm',
+                                 np.amax([.01, true_params[1] - eps_prior]),
+                                 2 * eps_prior, model=m, name='sigma'))
+    elfi.Simulator(sim_fn, *priors, observed=y_obs, name='gauss')
+
+    # Initialising the summary statistics.
+    sumstats = []
+    sumstats.append(elfi.Summary(ss_mean, m['gauss'], name='ss_mean'))
+    sumstats.append(elfi.Summary(ss_var, m['gauss'], name='ss_var'))
+
+    # Choosing the discrepancy metric.
+    if nd_mean:
+        elfi.Discrepancy(euclidean_multidim, *sumstats, name='d')
+    else:
+        elfi.Distance('euclidean', *sumstats, name='d')
 
     return m
diff --git a/elfi/examples/gnk.py b/elfi/examples/gnk.py
index eeabb4df..80337c0c 100644
--- a/elfi/examples/gnk.py
+++ b/elfi/examples/gnk.py
@@ -134,11 +134,11 @@ def euclidean_multidim(*simulated, observed):
     array_like
 
     """
-    pts_sim = np.column_stack(simulated)
-    pts_obs = np.column_stack(observed)
-    d_multidim = np.sum((pts_sim - pts_obs)**2., axis=1)
-    d_squared = np.sum(d_multidim, axis=1)
-    d = np.sqrt(d_squared)
+    pts_sim = np.stack(simulated, axis=1)
+    pts_obs = np.stack(observed, axis=1)
+    d_ss_merged = np.sum((pts_sim - pts_obs)**2., axis=1)
+    d_dim_merged = np.sum(d_ss_merged, axis=1)
+    d = np.sqrt(d_dim_merged)
 
     return d
 
@@ -185,8 +185,8 @@ def ss_robust(y):
     ss_g = _get_ss_g(y)
     ss_k = _get_ss_k(y)
 
-    ss_robust = np.stack((ss_a, ss_b, ss_g, ss_k), axis=1)
-
+    # Combining the summary statistics by expanding the dimensionality.
+    ss_robust = np.hstack((ss_a, ss_b, ss_g, ss_k))
     return ss_robust
 
 
@@ -209,7 +209,8 @@ def ss_octile(y):
     octiles = np.linspace(12.5, 87.5, 7)
     E1, E2, E3, E4, E5, E6, E7 = np.percentile(y, octiles, axis=1)
 
-    ss_octile = np.stack((E1, E2, E3, E4, E5, E6, E7), axis=1)
+    # Combining the summary statistics by expanding the dimensionality.
+    ss_octile = np.hstack((E1, E2, E3, E4, E5, E6, E7))
 
     return ss_octile
 
diff --git a/elfi/methods/post_processing.py b/elfi/methods/post_processing.py
index 384c7abc..cc3ba785 100644
--- a/elfi/methods/post_processing.py
+++ b/elfi/methods/post_processing.py
@@ -242,8 +242,8 @@ def adjust_posterior(sample, model, summary_names, parameter_names=None, adjustm
     >>> import elfi
     >>> from elfi.examples import gauss
     >>> m = gauss.get_model()
-    >>> res = elfi.Rejection(m['d'], output_names=['S1', 'S2']).sample(1000)
-    >>> adj = adjust_posterior(res, m, ['S1', 'S2'], ['mu'], LinearAdjustment())
+    >>> res = elfi.Rejection(m['d'], output_names=['ss_mean', 'ss_var']).sample(1000)
+    >>> adj = adjust_posterior(res, m, ['ss_mean', 'ss_var'], ['mu'], LinearAdjustment())
 
     """
     adjustment = _get_adjustment(adjustment)
diff --git a/tests/conftest.py b/tests/conftest.py
index 2f749a3c..7213e864 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -9,6 +9,7 @@
 import elfi.clients.ipyparallel as eipp
 import elfi.clients.multiprocessing as mp
 import elfi.clients.native as native
+import elfi.examples.gauss
 import elfi.examples.ma2
 
 elfi.clients.native.set_as_default()
diff --git a/tests/functional/test_post_processing.py b/tests/functional/test_post_processing.py
index bcce3e95..a7e2e0da 100644
--- a/tests/functional/test_post_processing.py
+++ b/tests/functional/test_post_processing.py
@@ -28,9 +28,9 @@ def test_single_parameter_linear_adjustment():
     # Hyperparameters
     mu0, sigma0 = (10, 100)
 
-    y_obs = gauss.Gauss(
+    y_obs = gauss.gauss(
         mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed))
-    sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs)
+    sim_fn = partial(gauss.gauss, sigma=sigma, n_obs=n_obs)
 
     # Posterior
     n = y_obs.shape[1]
@@ -40,12 +40,12 @@ def test_single_parameter_linear_adjustment():
     # Model
     m = elfi.ElfiModel()
     elfi.Prior('norm', mu0, sigma0, model=m, name='mu')
-    elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss')
-    elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1')
-    elfi.Distance('euclidean', m['S1'], name='d')
+    elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='gauss')
+    elfi.Summary(lambda x: x.mean(axis=1), m['gauss'], name='ss_mean')
+    elfi.Distance('euclidean', m['ss_mean'], name='d')
 
-    res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1)
-    adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['S1'])
+    res = elfi.Rejection(m['d'], output_names=['ss_mean'], seed=seed).sample(1000, threshold=1)
+    adj = elfi.adjust_posterior(model=m, sample=res, parameter_names=['mu'], summary_names=['ss_mean'])
 
     assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544))
 
@@ -61,9 +61,9 @@ def test_nonfinite_values():
     # Hyperparameters
     mu0, sigma0 = (10, 100)
 
-    y_obs = gauss.Gauss(
+    y_obs = gauss.gauss(
         mu, sigma, n_obs=n_obs, batch_size=1, random_state=np.random.RandomState(seed))
-    sim_fn = partial(gauss.Gauss, sigma=sigma, n_obs=n_obs)
+    sim_fn = partial(gauss.gauss, sigma=sigma, n_obs=n_obs)
 
     # Posterior
     n = y_obs.shape[1]
@@ -73,19 +73,19 @@ def test_nonfinite_values():
     # Model
     m = elfi.ElfiModel()
     elfi.Prior('norm', mu0, sigma0, model=m, name='mu')
-    elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='Gauss')
-    elfi.Summary(lambda x: x.mean(axis=1), m['Gauss'], name='S1')
-    elfi.Distance('euclidean', m['S1'], name='d')
+    elfi.Simulator(sim_fn, m['mu'], observed=y_obs, name='gauss')
+    elfi.Summary(lambda x: x.mean(axis=1), m['gauss'], name='ss_mean')
+    elfi.Distance('euclidean', m['ss_mean'], name='d')
 
-    res = elfi.Rejection(m['d'], output_names=['S1'], seed=seed).sample(1000, threshold=1)
+    res = elfi.Rejection(m['d'], output_names=['ss_mean'], seed=seed).sample(1000, threshold=1)
 
     # Add some invalid values
     res.outputs['mu'] = np.append(res.outputs['mu'], np.array([np.inf]))
-    res.outputs['S1'] = np.append(res.outputs['S1'], np.array([np.inf]))
+    res.outputs['ss_mean'] = np.append(res.outputs['ss_mean'], np.array([np.inf]))
 
     with pytest.warns(UserWarning):
         adj = elfi.adjust_posterior(
-            model=m, sample=res, parameter_names=['mu'], summary_names=['S1'])
+            model=m, sample=res, parameter_names=['mu'], summary_names=['ss_mean'])
 
     assert np.allclose(_statistics(adj.outputs['mu']), (4.9772879640569778, 0.02058680115402544))
 
diff --git a/tests/unit/test_examples.py b/tests/unit/test_examples.py
index 92c6feff..6e0f09bb 100644
--- a/tests/unit/test_examples.py
+++ b/tests/unit/test_examples.py
@@ -41,12 +41,28 @@ def test_bdm():
     if do_cleanup:
         os.system('rm {}/bdm'.format(cpp_path))
 
-
-def test_Gauss():
+def test_gauss():
     m = gauss.get_model()
     rej = elfi.Rejection(m, m['d'], batch_size=10)
     rej.sample(20)
 
+def test_gauss_1d_mean():
+    params_true = [4]
+    cov_matrix = [1]
+
+    m = gauss.get_model(true_params=params_true, nd_mean=True, cov_matrix=cov_matrix)
+    rej = elfi.Rejection(m, m['d'], batch_size=10)
+    rej.sample(20)
+
+
+def test_gauss_2d_mean():
+    params_true = [4, 4]
+    cov_matrix = [[1, .5], [.5, 1]]
+
+    m = gauss.get_model(true_params=params_true, nd_mean=True, cov_matrix=cov_matrix)
+    rej = elfi.Rejection(m, m['d'], batch_size=10)
+    rej.sample(20)
+
 
 def test_Ricker():
     m = ricker.get_model()