From 6c4de2b08367cc02d060e459d6a58dad9126446f Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:01:32 +0000
Subject: [PATCH 1/6] Update base to support random_state upon initialization.
 Default is None to allow fur users who want variability and don't need
 reproducible results.

---
 pyhhmm/base.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/pyhhmm/base.py b/pyhhmm/base.py
index 802641f..52f74fa 100644
--- a/pyhhmm/base.py
+++ b/pyhhmm/base.py
@@ -83,6 +83,7 @@ def __init__(
         A_prior=1.0,
         learning_rate=0.,
         verbose=True,
+        random_state=None,
     ):
         """Constructor method."""
 
@@ -97,6 +98,7 @@ def __init__(
         self.A_prior = A_prior
         self.learning_rate = learning_rate
         self.verbose = verbose
+        self.rng = np.random.default_rng(random_state)
 
     def __str__(self):
         """Function to allow directly printing the object."""
@@ -329,13 +331,13 @@ def sample(self, n_sequences=1, n_samples=1, return_states=False):
         transmat_cdf = np.cumsum(self.A, axis=1)
 
         for _ in range(n_sequences):
-            currstate = (startprob_cdf > np.random.rand()).argmax()
+            currstate = (startprob_cdf > self.rng.rand()).argmax()
             state_sequence = [currstate]
             X = [self._generate_sample_from_state(currstate)]
 
             for _ in range(n_samples - 1):
                 currstate = (transmat_cdf[currstate]
-                             > np.random.rand()).argmax()
+                             > self.rng.rand()).argmax()
                 state_sequence.append(currstate)
                 X.append(self._generate_sample_from_state(currstate))
             samples.append(np.vstack(X))
@@ -373,12 +375,12 @@ def _init_model_params(self):
                 self.A = np.full((self.n_states, self.n_states), init)
         else:
             if 's' in self.init_params:
-                self.pi = np.random.dirichlet(
+                self.pi = self.rng.dirichlet(
                     alpha=self.pi_prior * np.ones(self.n_states), size=1
                 )[0]
 
             if 't' in self.init_params:
-                self.A = np.random.dirichlet(
+                self.A = self.rng.dirichlet(
                     alpha=self.A_prior * np.ones(self.n_states), size=self.n_states
                 )
 

From caec37170e1b51ba4239e3daac1159b999625306 Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:03:17 +0000
Subject: [PATCH 2/6] Filtered the random_state parameter through to
 GaussianHMM

---
 pyhhmm/gaussian.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py
index 4973733..864a470 100644
--- a/pyhhmm/gaussian.py
+++ b/pyhhmm/gaussian.py
@@ -84,6 +84,7 @@ def __init__(
         min_covar=1e-3,
         learning_rate=0.,
         verbose=False,
+        random_state=None,
     ):
         if covariance_type not in COVARIANCE_TYPES:
             raise ValueError(
@@ -99,6 +100,7 @@ def __init__(
             A_prior=A_prior,
             learning_rate=learning_rate,
             verbose=verbose,
+            random_state=random_state,
         )
 
         self.n_emissions = n_emissions
@@ -481,4 +483,4 @@ def _generate_sample_from_state(self, state):
             from the emission distribution corresponding to a given state
         :rtype: array_like
         """
-        return np.random.multivariate_normal(self.means[state], self.covars[state])
+        return self.rng.multivariate_normal(self.means[state], self.covars[state])

From f6becd5109bcaad5fce13e6a6ff76268d4e8606a Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:05:30 +0000
Subject: [PATCH 3/6] Now filtered the random_state through to HeterogeneousHMM

---
 pyhhmm/heterogeneous.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py
index 8832412..a8239ce 100644
--- a/pyhhmm/heterogeneous.py
+++ b/pyhhmm/heterogeneous.py
@@ -98,6 +98,7 @@ def __init__(
         min_covar=1e-3,
         learning_rate=0,
         verbose=False,
+        random_state=None,
     ):
         """Constructor method.
 
@@ -125,6 +126,7 @@ def __init__(
             A_prior=A_prior,
             learning_rate=learning_rate,
             verbose=verbose,
+            random_state=random_state,
         )
 
         self.n_g_emissions = n_g_emissions
@@ -629,20 +631,20 @@ def _pdf(self, x, mean, covar):
         return multivariate_normal.pdf(x, mean=mean, cov=covar, allow_singular=True)
 
     def _generate_sample_from_state(self, state):
-        """ Generates a random sample from a given component.
+        """ Generates a random sample from fa given component.
         :param state: index of the component to condition on
         :type state: int
         :return: array of shape (n_g_features+n_d_features, ) containing a random sample
             from the emission distribution corresponding to a given state
         :rtype: array_like
         """
-        gauss_sample = np.random.multivariate_normal(
+        gauss_sample = self.rng.multivariate_normal(
             self.means[state], self.covars[state]
         )
 
         cat_sample = []
         for e in range(self.n_d_emissions):
             cdf = np.cumsum(self.B[e][state, :])
-            cat_sample.append((cdf > np.random.rand()).argmax())
+            cat_sample.append((cdf > self.rng.rand()).argmax())
 
         return np.concatenate([gauss_sample, cat_sample])

From a05b23ebcd3548816d6ab57fd14cde3dd425088e Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:06:43 +0000
Subject: [PATCH 4/6] Finally filtered random_state through to MultinomialHMM

---
 pyhhmm/multinomial.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/pyhhmm/multinomial.py b/pyhhmm/multinomial.py
index 078b9a6..5078d7f 100644
--- a/pyhhmm/multinomial.py
+++ b/pyhhmm/multinomial.py
@@ -71,6 +71,7 @@ def __init__(
         state_no_train_de=None,
         learning_rate=0.1,
         verbose=True,
+        random_state=None,
     ):
         """Constructor method
 
@@ -91,6 +92,7 @@ def __init__(
             A_prior=A_prior,
             verbose=verbose,
             learning_rate=learning_rate,
+            random_state=random_state,
         )
         self.n_emissions = n_emissions
         self.n_features = n_features
@@ -157,7 +159,7 @@ def _init_model_params(self):
             else:
                 if self.nr_no_train_de == 0:
                     self.B = [
-                        np.random.rand(self.n_states, self.n_features[i])
+                        self.rng.rand(self.n_states, self.n_features[i])
                         for i in range(self.n_emissions)
                     ]
                     for i in range(self.n_emissions):
@@ -301,5 +303,5 @@ def _generate_sample_from_state(self, state):
         res = []
         for e in range(self.n_emissions):
             cdf = np.cumsum(self.B[e][state, :])
-            res.append((cdf > np.random.rand()).argmax())
+            res.append((cdf > self.rng.rand()).argmax())
         return np.asarray(res)

From e8f7b398fe172fc16d288498770e7fe9a9454e0b Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:15:15 +0000
Subject: [PATCH 5/6] Added commentary to each of the classes.

---
 pyhhmm/base.py          | 1 +
 pyhhmm/gaussian.py      | 1 +
 pyhhmm/heterogeneous.py | 1 +
 pyhhmm/multinomial.py   | 1 +
 4 files changed, 4 insertions(+)

diff --git a/pyhhmm/base.py b/pyhhmm/base.py
index 52f74fa..10f3142 100644
--- a/pyhhmm/base.py
+++ b/pyhhmm/base.py
@@ -71,6 +71,7 @@ class BaseHMM(object):
     :param verbose: flag to be set to True if per-iteration convergence reports should 
         be printed. Defaults to True.
     :type verbose: bool, optional 
+    :param random_state: seed for the random number generator
     """
 
     def __init__(
diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py
index 864a470..96ae217 100644
--- a/pyhhmm/gaussian.py
+++ b/pyhhmm/gaussian.py
@@ -66,6 +66,7 @@ class GaussianHMM(BaseHMM):
     :type learning_rate: float, optional
     :param verbose: flag to be set to True if per-iteration convergence reports should be printed. Defaults to True.
     :type verbose: bool, optional 
+    :param random_state: seed for the random number generator
     """
 
     def __init__(
diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py
index a8239ce..b286548 100644
--- a/pyhhmm/heterogeneous.py
+++ b/pyhhmm/heterogeneous.py
@@ -76,6 +76,7 @@ class HeterogeneousHMM(BaseHMM):
     :type learning_rate: float, optional
     :param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True
     :type verbose: bool, optional
+    :param random_state: seed for the random number generator
     """
 
     def __init__(
diff --git a/pyhhmm/multinomial.py b/pyhhmm/multinomial.py
index 5078d7f..3f8d82c 100644
--- a/pyhhmm/multinomial.py
+++ b/pyhhmm/multinomial.py
@@ -54,6 +54,7 @@ class MultinomialHMM(BaseHMM):
     :type learning_rate: float, optional
     :param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True
     :type verbose: bool, optional
+    :param random_state: seed for the random number generator
     """
 
     def __init__(

From 42a41cd11ed95fa613e84805fb2fe129cb5c804f Mon Sep 17 00:00:00 2001
From: BenSeimon <benjamin.seimon@bse.eu>
Date: Thu, 7 Nov 2024 15:27:09 +0000
Subject: [PATCH 6/6] Added random_state directly as an attribute. This is
 required for KMeans clustering reproducibility.

---
 pyhhmm/base.py          | 1 +
 pyhhmm/gaussian.py      | 2 +-
 pyhhmm/heterogeneous.py | 2 +-
 3 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/pyhhmm/base.py b/pyhhmm/base.py
index 10f3142..c987e22 100644
--- a/pyhhmm/base.py
+++ b/pyhhmm/base.py
@@ -99,6 +99,7 @@ def __init__(
         self.A_prior = A_prior
         self.learning_rate = learning_rate
         self.verbose = verbose
+        self.random_state = random_state
         self.rng = np.random.default_rng(random_state)
 
     def __str__(self):
diff --git a/pyhhmm/gaussian.py b/pyhhmm/gaussian.py
index 96ae217..8d7fced 100644
--- a/pyhhmm/gaussian.py
+++ b/pyhhmm/gaussian.py
@@ -171,7 +171,7 @@ def _init_model_params(self, X):
         X_concat = concatenate_observation_sequences(X)
 
         if 'm' in self.init_params:
-            kmeans = cluster.KMeans(n_clusters=self.n_states)
+            kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state)
             kmeans.fit(X_concat)
             self.means = kmeans.cluster_centers_
         if 'c' in self.init_params:
diff --git a/pyhhmm/heterogeneous.py b/pyhhmm/heterogeneous.py
index b286548..8bcdb67 100644
--- a/pyhhmm/heterogeneous.py
+++ b/pyhhmm/heterogeneous.py
@@ -206,7 +206,7 @@ def _init_model_params(self, X):
             X, gidx=self.n_g_emissions)
 
         if 'm' in self.init_params:
-            kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=0)
+            kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state)
             kmeans.fit(X_concat)
             self.means = kmeans.cluster_centers_
         if 'c' in self.init_params: