From 51c85180e37cf41bc46a7b22d0b4834667d0336e Mon Sep 17 00:00:00 2001
From: Xuhui Fan <xhfan.ml@gmail.com>
Date: Tue, 21 Aug 2018 15:53:06 +1000
Subject: [PATCH] Add the method of CopulaABC

---
 elfi/methods/CopulaABC.py | 206 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)
 create mode 100644 elfi/methods/CopulaABC.py

diff --git a/elfi/methods/CopulaABC.py b/elfi/methods/CopulaABC.py
new file mode 100644
index 00000000..6dd0bb1e
--- /dev/null
+++ b/elfi/methods/CopulaABC.py
@@ -0,0 +1,206 @@
+"""This module contains the CopulaABC methods."""
+
+
+import logging
+from math import ceil
+
+import matplotlib.pyplot as plt
+import numpy as np
+import scipy.stats as ss
+from scipy.stats import norm
+
+
+import elfi.client
+import elfi.methods.mcmc as mcmc
+import elfi.visualization.interactive as visin
+import elfi.visualization.visualization as vis
+from elfi.loader import get_sub_seed
+from elfi.methods.bo.acquisition import LCBSC
+from elfi.methods.bo.gpy_regression import GPyRegression
+from elfi.methods.bo.utils import stochastic_optimization
+from elfi.methods.posteriors import BolfiPosterior
+from elfi.methods.results import BolfiSample, OptimizationResult, Sample, SmcSample
+from elfi.methods.utils import (GMDistribution, ModelPrior, arr2d_to_batch,
+                                batch_to_arr2d, ceil_to_batch_size, weighted_var)
+from elfi.model.elfi_model import ComputationContext, ElfiModel, NodeReference
+from elfi.methods.parameter_inference import Rejection
+from elfi.utils import is_array
+
+logger = logging.getLogger(__name__)
+
+
+
+
+
+
+
+class Copula_ABC(Rejection):
+    """Copula ABC rejection sampler.
+
+    For a description of the rejection sampler and a general introduction to ABC, see e.g.
+    J. Li et al. Compuational Statistics and Data Analysis. 2017.
+
+    """
+
+    def __init__(self, model, discrepancy_name=None, output_names=None, **kwargs):
+        """Initialize the Rejection sampler.
+
+        Parameters
+        ----------
+        model : ElfiModel or NodeReference
+        discrepancy_name : str, NodeReference, optional
+            Only needed if model is an ElfiModel
+        output_names : list, optional
+            Additional outputs from the model to be included in the inference result, e.g.
+            corresponding summaries to the acquired samples
+        kwargs:
+            See InferenceMethod
+
+        """
+        super(Copula_ABC, self).__init__(model, output_names, **kwargs)
+
+
+
+    def update(self, batch, batch_index):
+        """Update the inference state with a new batch.
+
+        Parameters
+        ----------
+        batch : dict
+            dict with `self.outputs` as keys and the corresponding outputs for the batch
+            as values
+        batch_index : int
+
+        """
+        # super(Copula_ABC, self).update(batch, batch_index)
+        self.state['n_batches'] += 1
+        self.state['n_sim'] += self.batch_size
+        if self.state['samples'] is None:
+            # Lazy initialization of the outputs dict
+            self._init_samples_lazy(batch)
+        self._merge_batch(batch)
+
+    def infer(self, *args, vis=None, **kwargs):
+        """Set the objective and start the iterate loop until the inference is finished.
+
+        See the other arguments from the `set_objective` method.
+
+        Returns
+        -------
+        result : Sample
+
+        """
+        vis_opt = vis if isinstance(vis, dict) else {}
+
+        self.set_objective(*args, **kwargs)
+
+        while not self.finished:
+            self.iterate()
+            if vis:
+                self.plot_state(interactive=True, **vis_opt)
+
+        self.batches.cancel_pending()
+
+        self.copula_cal()
+
+        if vis:
+            self.plot_state(close=True, **vis_opt)
+
+        return self.extract_result()
+
+    def extract_result(self):
+        """Extract the result from the current state.
+
+        Returns
+        -------
+        result : Sample
+
+        """
+        if self.state['samples'] is None:
+            raise ValueError('Nothing to extract')
+
+        # Take out the correct number of samples
+        outputs = dict()
+        outputs['Lambda'] = self.Lambda
+        outputs['Marg'] = self.Marg
+
+        return CopulaABC_Sample(outputs=outputs, **self._extract_result_kwargs())
+
+
+    def copula_cal(self):
+        """Based on the generated samples, calculate the Lambda correlation matrix and the Marg distribution.
+
+        Returns
+        -------
+        Lambda : the bi-variate correlation matrix
+        Marg: the Marginal distribution
+
+        """
+
+        PP = self.state['samples'][self.discrepancy_name].shape[1]
+
+        Lambda = np.ones((PP, PP))
+        Marg = np.empty((self.objective['n_samples'], PP))
+
+        quantile_val = self.objective['n_samples']/self.state['n_sim']*100
+        for K1 in range(PP-1):
+            for K2 in np.arange(K1+1, PP):
+                dis_K1K2 = np.sqrt(np.sum(self.state['samples'][self.discrepancy_name][:, [K1, K2]]**2, axis=1))
+                indexx = (dis_K1K2<=np.percentile(dis_K1K2, quantile_val))
+
+                val1 = self.state['samples'][self.parameter_names[0]][indexx, [[K1], [K2]]].T
+
+                ri = ss.rankdata(val1[:, 0])
+                rj = ss.rankdata(val1[:, 1])
+
+                nn = sum(indexx)
+                # nn = len(indexx)
+                etaii = norm.ppf(ri/(nn+1))
+                etajj = norm.ppf(rj/(nn+1))
+
+                Lambda[K1, K2] = np.corrcoef(etaii, etajj)[0, 1]
+                Lambda[K2, K1] = Lambda[K1, K2]
+
+        for kk in range(PP):
+            dis_KK = self.state['samples'][self.discrepancy_name][:, kk]
+            indexx = (dis_KK <= np.percentile(dis_KK, quantile_val))
+
+            val_adjust_ii = self.state['samples'][self.parameter_names[0]][indexx, kk]
+
+            Marg[:, kk] = val_adjust_ii.reshape((-1))
+
+        self.Lambda = Lambda
+        self.Marg = Marg
+
+    def _init_samples_lazy(self, batch):
+        """Initialize the outputs dict based on the received batch."""
+        samples = {}
+        e_noarr = "Node {} output must be in a numpy array of length {} (batch_size)."
+        e_len = "Node {} output has array length {}. It should be equal to the batch size {}."
+
+        for node in self.output_names:
+            # Check the requested outputs
+            if node not in batch:
+                raise KeyError("Did not receive outputs for node {}".format(node))
+
+            nbatch = batch[node]
+            if not is_array(nbatch):
+                raise ValueError(e_noarr.format(node, self.batch_size))
+            elif len(nbatch) != self.batch_size:
+                raise ValueError(e_len.format(node, len(nbatch), self.batch_size))
+
+            # Prepare samples
+            shape = (self.objective['n_samples'] + self.batch_size, ) + nbatch.shape[1:]
+            dtype = nbatch.dtype
+
+            samples[node] = np.array([]).reshape(0, nbatch.shape[1])
+
+        self.state['samples'] = samples
+
+    def _merge_batch(self, batch):
+        # TODO: add index vector so that you can recover the original order
+        samples = self.state['samples']
+        # Put the acquired samples to the end
+        for node, v in samples.items():
+            self.state['samples'][node] = np.vstack((v, batch[node]))
+            # v[self.objective['n_samples']:] = batch[node]