Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Included random_state parameter for reproducibility #11

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions pyhhmm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class BaseHMM(object):
:param verbose: flag to be set to True if per-iteration convergence reports should
be printed. Defaults to True.
:type verbose: bool, optional
:param random_state: seed for the random number generator
"""

def __init__(
Expand All @@ -83,6 +84,7 @@ def __init__(
A_prior=1.0,
learning_rate=0.,
verbose=True,
random_state=None,
):
"""Constructor method."""

Expand All @@ -97,6 +99,8 @@ def __init__(
self.A_prior = A_prior
self.learning_rate = learning_rate
self.verbose = verbose
self.random_state = random_state
self.rng = np.random.default_rng(random_state)

def __str__(self):
"""Function to allow directly printing the object."""
Expand Down Expand Up @@ -329,13 +333,13 @@ def sample(self, n_sequences=1, n_samples=1, return_states=False):
transmat_cdf = np.cumsum(self.A, axis=1)

for _ in range(n_sequences):
currstate = (startprob_cdf > np.random.rand()).argmax()
currstate = (startprob_cdf > self.rng.rand()).argmax()
state_sequence = [currstate]
X = [self._generate_sample_from_state(currstate)]

for _ in range(n_samples - 1):
currstate = (transmat_cdf[currstate]
> np.random.rand()).argmax()
> self.rng.rand()).argmax()
state_sequence.append(currstate)
X.append(self._generate_sample_from_state(currstate))
samples.append(np.vstack(X))
Expand Down Expand Up @@ -373,12 +377,12 @@ def _init_model_params(self):
self.A = np.full((self.n_states, self.n_states), init)
else:
if 's' in self.init_params:
self.pi = np.random.dirichlet(
self.pi = self.rng.dirichlet(
alpha=self.pi_prior * np.ones(self.n_states), size=1
)[0]

if 't' in self.init_params:
self.A = np.random.dirichlet(
self.A = self.rng.dirichlet(
alpha=self.A_prior * np.ones(self.n_states), size=self.n_states
)

Expand Down
7 changes: 5 additions & 2 deletions pyhhmm/gaussian.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class GaussianHMM(BaseHMM):
:type learning_rate: float, optional
:param verbose: flag to be set to True if per-iteration convergence reports should be printed. Defaults to True.
:type verbose: bool, optional
:param random_state: seed for the random number generator
"""

def __init__(
Expand All @@ -84,6 +85,7 @@ def __init__(
min_covar=1e-3,
learning_rate=0.,
verbose=False,
random_state=None,
):
if covariance_type not in COVARIANCE_TYPES:
raise ValueError(
Expand All @@ -99,6 +101,7 @@ def __init__(
A_prior=A_prior,
learning_rate=learning_rate,
verbose=verbose,
random_state=random_state,
)

self.n_emissions = n_emissions
Expand Down Expand Up @@ -168,7 +171,7 @@ def _init_model_params(self, X):
X_concat = concatenate_observation_sequences(X)

if 'm' in self.init_params:
kmeans = cluster.KMeans(n_clusters=self.n_states)
kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state)
kmeans.fit(X_concat)
self.means = kmeans.cluster_centers_
if 'c' in self.init_params:
Expand Down Expand Up @@ -481,4 +484,4 @@ def _generate_sample_from_state(self, state):
from the emission distribution corresponding to a given state
:rtype: array_like
"""
return np.random.multivariate_normal(self.means[state], self.covars[state])
return self.rng.multivariate_normal(self.means[state], self.covars[state])
11 changes: 7 additions & 4 deletions pyhhmm/heterogeneous.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ class HeterogeneousHMM(BaseHMM):
:type learning_rate: float, optional
:param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True
:type verbose: bool, optional
:param random_state: seed for the random number generator
"""

def __init__(
Expand All @@ -98,6 +99,7 @@ def __init__(
min_covar=1e-3,
learning_rate=0,
verbose=False,
random_state=None,
):
"""Constructor method.

Expand Down Expand Up @@ -125,6 +127,7 @@ def __init__(
A_prior=A_prior,
learning_rate=learning_rate,
verbose=verbose,
random_state=random_state,
)

self.n_g_emissions = n_g_emissions
Expand Down Expand Up @@ -203,7 +206,7 @@ def _init_model_params(self, X):
X, gidx=self.n_g_emissions)

if 'm' in self.init_params:
kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=0)
kmeans = cluster.KMeans(n_clusters=self.n_states, random_state=self.random_state)
kmeans.fit(X_concat)
self.means = kmeans.cluster_centers_
if 'c' in self.init_params:
Expand Down Expand Up @@ -629,20 +632,20 @@ def _pdf(self, x, mean, covar):
return multivariate_normal.pdf(x, mean=mean, cov=covar, allow_singular=True)

def _generate_sample_from_state(self, state):
""" Generates a random sample from a given component.
""" Generates a random sample from fa given component.
:param state: index of the component to condition on
:type state: int
:return: array of shape (n_g_features+n_d_features, ) containing a random sample
from the emission distribution corresponding to a given state
:rtype: array_like
"""
gauss_sample = np.random.multivariate_normal(
gauss_sample = self.rng.multivariate_normal(
self.means[state], self.covars[state]
)

cat_sample = []
for e in range(self.n_d_emissions):
cdf = np.cumsum(self.B[e][state, :])
cat_sample.append((cdf > np.random.rand()).argmax())
cat_sample.append((cdf > self.rng.rand()).argmax())

return np.concatenate([gauss_sample, cat_sample])
7 changes: 5 additions & 2 deletions pyhhmm/multinomial.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ class MultinomialHMM(BaseHMM):
:type learning_rate: float, optional
:param verbose: flag to be set to True if per-iteration convergence reports should be printed, defaults to True
:type verbose: bool, optional
:param random_state: seed for the random number generator
"""

def __init__(
Expand All @@ -71,6 +72,7 @@ def __init__(
state_no_train_de=None,
learning_rate=0.1,
verbose=True,
random_state=None,
):
"""Constructor method

Expand All @@ -91,6 +93,7 @@ def __init__(
A_prior=A_prior,
verbose=verbose,
learning_rate=learning_rate,
random_state=random_state,
)
self.n_emissions = n_emissions
self.n_features = n_features
Expand Down Expand Up @@ -157,7 +160,7 @@ def _init_model_params(self):
else:
if self.nr_no_train_de == 0:
self.B = [
np.random.rand(self.n_states, self.n_features[i])
self.rng.rand(self.n_states, self.n_features[i])
for i in range(self.n_emissions)
]
for i in range(self.n_emissions):
Expand Down Expand Up @@ -301,5 +304,5 @@ def _generate_sample_from_state(self, state):
res = []
for e in range(self.n_emissions):
cdf = np.cumsum(self.B[e][state, :])
res.append((cdf > np.random.rand()).argmax())
res.append((cdf > self.rng.rand()).argmax())
return np.asarray(res)