From 6c4de2b08367cc02d060e459d6a58dad9126446f Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:01:32 +0000 Subject: [PATCH 1/6] Update base to support random_state upon initialization. Default is None to allow fur users who want variability and don't need reproducible results. --- pyhhmm/base.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pyhhmm/base.py b/pyhhmm/base.py index 802641f..52f74fa 100644 --- a/pyhhmm/base.py +++ b/pyhhmm/base.py @@ -83,6 +83,7 @@ def __init__( A_prior=1.0, learning_rate=0., verbose=True, + random_state=None, ): """Constructor method.""" @@ -97,6 +98,7 @@ def __init__( self.A_prior = A_prior self.learning_rate = learning_rate self.verbose = verbose + self.rng = np.random.default_rng(random_state) def __str__(self): """Function to allow directly printing the object.""" @@ -329,13 +331,13 @@ def sample(self, n_sequences=1, n_samples=1, return_states=False): transmat_cdf = np.cumsum(self.A, axis=1) for _ in range(n_sequences): - currstate = (startprob_cdf > np.random.rand()).argmax() + currstate = (startprob_cdf > self.rng.rand()).argmax() state_sequence = [currstate] X = [self._generate_sample_from_state(currstate)] for _ in range(n_samples - 1): currstate = (transmat_cdf[currstate] - > np.random.rand()).argmax() + > self.rng.rand()).argmax() state_sequence.append(currstate) X.append(self._generate_sample_from_state(currstate)) samples.append(np.vstack(X)) @@ -373,12 +375,12 @@ def _init_model_params(self): self.A = np.full((self.n_states, self.n_states), init) else: if 's' in self.init_params: - self.pi = np.random.dirichlet( + self.pi = self.rng.dirichlet( alpha=self.pi_prior * np.ones(self.n_states), size=1 )[0] if 't' in self.init_params: - self.A = np.random.dirichlet( + self.A = self.rng.dirichlet( alpha=self.A_prior * np.ones(self.n_states), size=self.n_states ) From caec37170e1b51ba4239e3daac1159b999625306 Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:03:17 +0000 Subject: [PATCH 2/6] Filtered the random_state parameter through to GaussianHMM --- pyhhmm/gaussian.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py index 4973733..864a470 100644 --- a/pyhhmm/gaussian.py +++ b/pyhhmm/gaussian.py @@ -84,6 +84,7 @@ def __init__( min_covar=1e-3, learning_rate=0., verbose=False, + random_state=None, ): if covariance_type not in COVARIANCE_TYPES: raise ValueError( @@ -99,6 +100,7 @@ def __init__( A_prior=A_prior, learning_rate=learning_rate, verbose=verbose, + random_state=random_state, ) self.n_emissions = n_emissions @@ -481,4 +483,4 @@ def _generate_sample_from_state(self, state): from the emission distribution corresponding to a given state :rtype: array_like """ - return np.random.multivariate_normal(self.means[state], self.covars[state]) + return self.rng.multivariate_normal(self.means[state], self.covars[state]) From f6becd5109bcaad5fce13e6a6ff76268d4e8606a Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:05:30 +0000 Subject: [PATCH 3/6] Now filtered the random_state through to HeterogeneousHMM --- pyhhmm/heterogeneous.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py index 8832412..a8239ce 100644 --- a/pyhhmm/heterogeneous.py +++ b/pyhhmm/heterogeneous.py @@ -98,6 +98,7 @@ def __init__( min_covar=1e-3, learning_rate=0, verbose=False, + random_state=None, ): """Constructor method. @@ -125,6 +126,7 @@ def __init__( A_prior=A_prior, learning_rate=learning_rate, verbose=verbose, + random_state=random_state, ) self.n_g_emissions = n_g_emissions @@ -629,20 +631,20 @@ def _pdf(self, x, mean, covar): return multivariate_normal.pdf(x, mean=mean, cov=covar, allow_singular=True) def _generate_sample_from_state(self, state): - """ Generates a random sample from a given component. + """ Generates a random sample from fa given component. :param state: index of the component to condition on :type state: int :return: array of shape (n_g_features+n_d_features, ) containing a random sample from the emission distribution corresponding to a given state :rtype: array_like """ - gauss_sample = np.random.multivariate_normal( + gauss_sample = self.rng.multivariate_normal( self.means[state], self.covars[state] ) cat_sample = [] for e in range(self.n_d_emissions): cdf = np.cumsum(self.B[e][state, :]) - cat_sample.append((cdf > np.random.rand()).argmax()) + cat_sample.append((cdf > self.rng.rand()).argmax()) return np.concatenate([gauss_sample, cat_sample]) From a05b23ebcd3548816d6ab57fd14cde3dd425088e Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:06:43 +0000 Subject: [PATCH 4/6] Finally filtered random_state through to MultinomialHMM --- pyhhmm/multinomial.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pyhhmm/multinomial.py b/pyhhmm/multinomial.py index 078b9a6..5078d7f 100644 --- a/pyhhmm/multinomial.py +++ b/pyhhmm/multinomial.py @@ -71,6 +71,7 @@ def __init__( state_no_train_de=None, learning_rate=0.1, verbose=True, + random_state=None, ): """Constructor method @@ -91,6 +92,7 @@ def __init__( A_prior=A_prior, verbose=verbose, learning_rate=learning_rate, + random_state=random_state, ) self.n_emissions = n_emissions self.n_features = n_features @@ -157,7 +159,7 @@ def _init_model_params(self): else: if self.nr_no_train_de == 0: self.B = [ - np.random.rand(self.n_states, self.n_features[i]) + self.rng.rand(self.n_states, self.n_features[i]) for i in range(self.n_emissions) ] for i in range(self.n_emissions): @@ -301,5 +303,5 @@ def _generate_sample_from_state(self, state): res = [] for e in range(self.n_emissions): cdf = np.cumsum(self.B[e][state, :]) - res.append((cdf > np.random.rand()).argmax()) + res.append((cdf > self.rng.rand()).argmax()) return np.asarray(res) From e8f7b398fe172fc16d288498770e7fe9a9454e0b Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:15:15 +0000 Subject: [PATCH 5/6] Added commentary to each of the classes. --- pyhhmm/base.py | 1 + pyhhmm/gaussian.py | 1 + pyhhmm/heterogeneous.py | 1 + pyhhmm/multinomial.py | 1 + 4 files changed, 4 insertions(+) diff --git a/pyhhmm/base.py b/pyhhmm/base.py index 52f74fa..10f3142 100644 --- a/pyhhmm/base.py +++ b/pyhhmm/base.py @@ -71,6 +71,7 @@ class BaseHMM(object): :param verbose: flag to be set to True if per-iteration convergence reports should be printed. Defaults to True. :type verbose: bool, optional + :param random_state: seed for the random number generator """ def __init__( diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py index 864a470..96ae217 100644 --- a/pyhhmm/gaussian.py +++ b/pyhhmm/gaussian.py @@ -66,6 +66,7 @@ class GaussianHMM(BaseHMM): :type learning_rate: float, optional :param verbose: flag to be set to True if per-iteration convergence reports should be printed. Defaults to True. :type verbose: bool, optional + :param random_state: seed for the random number generator """ def __init__( diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py index a8239ce..b286548 100644 --- a/pyhhmm/heterogeneous.py +++ b/pyhhmm/heterogeneous.py @@ -76,6 +76,7 @@ class HeterogeneousHMM(BaseHMM): :type learning_rate: float, optional :param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True :type verbose: bool, optional + :param random_state: seed for the random number generator """ def __init__( diff --git a/pyhhmm/multinomial.py b/pyhhmm/multinomial.py index 5078d7f..3f8d82c 100644 --- a/pyhhmm/multinomial.py +++ b/pyhhmm/multinomial.py @@ -54,6 +54,7 @@ class MultinomialHMM(BaseHMM): :type learning_rate: float, optional :param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True :type verbose: bool, optional + :param random_state: seed for the random number generator """ def __init__( From 42a41cd11ed95fa613e84805fb2fe129cb5c804f Mon Sep 17 00:00:00 2001 From: BenSeimon Date: Thu, 7 Nov 2024 15:27:09 +0000 Subject: [PATCH 6/6] Added random_state directly as an attribute. This is required for KMeans clustering reproducibility. --- pyhhmm/base.py | 1 + pyhhmm/gaussian.py | 2 +- pyhhmm/heterogeneous.py | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/pyhhmm/base.py b/pyhhmm/base.py index 10f3142..c987e22 100644 --- a/pyhhmm/base.py +++ b/pyhhmm/base.py @@ -99,6 +99,7 @@ def __init__( self.A_prior = A_prior self.learning_rate = learning_rate self.verbose = verbose + self.random_state = random_state self.rng = np.random.default_rng(random_state) def __str__(self): diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py index 96ae217..8d7fced 100644 --- a/pyhhmm/gaussian.py +++ b/pyhhmm/gaussian.py @@ -171,7 +171,7 @@ def _init_model_params(self, X): X_concat = concatenate_observation_sequences(X) if 'm' in self.init_params: - kmeans = cluster.KMeans(n_clusters=self.n_states) + kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state) kmeans.fit(X_concat) self.means = kmeans.cluster_centers_ if 'c' in self.init_params: diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py index b286548..8bcdb67 100644 --- a/pyhhmm/heterogeneous.py +++ b/pyhhmm/heterogeneous.py @@ -206,7 +206,7 @@ def _init_model_params(self, X): X, gidx=self.n_g_emissions) if 'm' in self.init_params: - kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=0) + kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state) kmeans.fit(X_concat) self.means = kmeans.cluster_centers_ if 'c' in self.init_params: