diff --git a/sim_tools/ovs/toy_models.py b/sim_tools/ovs/toy_models.py index e165eda..9a9c437 100644 --- a/sim_tools/ovs/toy_models.py +++ b/sim_tools/ovs/toy_models.py @@ -1,74 +1,76 @@ -''' +""" Toy models for testing optimisation via simulation procedures and for running models. -''' +""" import numpy as np + class ManualOptimiser(object): - ''' + """ A class to manually run individual and multiple replications from competiting simulated designs of a system. - ''' + """ + def __init__(self, model, n_designs, verbose=False): - ''' + """ Constructor - + Parameters: ------- model - object, simulation model that implements simulate(design_index) and register_observer(observer) methods - - n_designs - int, number of competiting designs - + + n_designs - int, number of competiting designs + verbose_replications - bool, True if each individual rep observation is displayed. False if run silently. (default=False) - ''' + """ self._model = model model.register_observer(self) self.verbose = verbose self._n_designs = n_designs - self.means = np.zeros(n_designs, dtype='float') - self.vars = np.zeros(n_designs, dtype='float') - self._sq = np.zeros(n_designs, dtype='float') - self._ses = np.zeros(n_designs, dtype='float') + self.means = np.zeros(n_designs, dtype="float") + self.vars = np.zeros(n_designs, dtype="float") + self._sq = np.zeros(n_designs, dtype="float") + self._ses = np.zeros(n_designs, dtype="float") self.allocations = np.zeros(n_designs, np.int32) - + def __str__(self): - return f"ManualOptimiser(model={self._model.__str__()}, n_designs={self._n_designs}, verbose={self.verbose})" - + return f"ManualOptimiser(model={self._model.__str__()}, n_designs={self._n_designs}, verbose={self.verbose})" + def simulate_designs(self, design_indexes=None, replications=1): - ''' + """ Multiple replications of a simulated design - + Parameters: -------- design_indexes - list, zero based design indexes. If None then all system designs are included. (default=None) - + replications - int, number of replications to run (default=1) - ''' - + """ + if design_indexes is None: design_indexes = [design for design in range(self._n_designs)] - + for design_index in design_indexes: self.simulate_design(design_index, replications) - + def simulate_design(self, design_index, replications=1): - ''' + """ Multiple replications of a simulated design - + Parameters: -------- design_index - int, zero based design index replications - int, number of replications to run (default=1) - ''' + """ for rep in range(replications): self._model.simulate(design_index) - + def feedback(self, *args, **kwargs): - ''' + """ Feedback from the simulation model Recieves a reward and updates understanding of an arm @@ -83,18 +85,18 @@ def feedback(self, *args, **kwargs): *kwards -- dict of keyword arguments: None expected! - ''' + """ design_index = args[1] observation = args[2] - + self.allocations[design_index] += 1 self._update_moments(design_index, observation) - + if self.verbose: print(observation) - + def _update_moments(self, design_index, observation): - ''' + """ Updates the running average, var of the design Parameters: @@ -102,47 +104,48 @@ def _update_moments(self, design_index, observation): design_index -- int, index of the array to update observation -- float, observation recieved from the last replication - ''' + """ n = self.allocations[design_index] current_mean = self.means[design_index] new_mean = ((n - 1) / float(n)) * current_mean + (1 / float(n)) * observation if n > 1: - self._sq[design_index] += (observation - abs(current_mean)) * (observation - abs(new_mean)) + self._sq[design_index] += (observation - abs(current_mean)) * ( + observation - abs(new_mean) + ) self.vars[design_index] = self._sq[design_index] / (n - 1) self._ses[design_index] = np.sqrt(self.vars[design_index]) / np.sqrt(n) self.means[design_index] = new_mean - def gaussian_sequence_model(start, end, step=1): - ''' - Sequence of GaussianBandit Arms. Assumes unit variance. + """ + Sequence of GaussianBandit Arms. Assumes unit variance. e.g. start = 1, end = 10 return 10 bandits arms with means 1 through 10. Parameters: ------- - start: int, + start: int, start of sequence of means (inclusive) - end: int, + end: int, end of sequence of means (inclusive) step: int, optional (default = 1) - step size for sequence + step size for sequence Returns: ------- list of GaussianBandit objects ordered by means - ''' - bandits = [GaussianBandit(mean) for mean in range(start, end+1, step)] + """ + bandits = [GaussianBandit(mean) for mean in range(start, end + 1, step)] return BanditCasino(bandits) def gaussian_bandit_sequence(start, end, step=1): - ''' - Sequence of GaussianBandit Arms. Assumes unit variance. + """ + Sequence of GaussianBandit Arms. Assumes unit variance. e.g. start = 1, end = 10 return 10 bandits arms with means 1 through 10. Parameters: @@ -155,41 +158,40 @@ def gaussian_bandit_sequence(start, end, step=1): ------- list of GaussianBandit objects ordered by means - ''' + """ return [GaussianBandit(mean) for mean in range(start, end, step)] - + def random_gaussian_model(mean_low, mean_high, var_low, var_high, n_designs): - ''' + """ Create a model with n system designs where the mean and variance of the normal distributions are sampled to between the specified tolerances - + Parameters: ------- mean_low - float, a lower bound on the means of the output distributions - + mean_high- float, an upper bound on the means - + var_low - float, a lower bound on the variance of the output distributions - + var_high - float, an upper bound on the variances. - + n_designs - int, the number of designs to create. - + Returns: -------- BanditCasino with n_designs with means and variances between specified limits. - ''' + """ means = np.random.uniform(low=mean_low, high=mean_high, size=n_designs) sigmas = np.random.uniform(low=var_low, high=var_high, size=n_designs) return custom_gaussian_model(means, sigmas) - def custom_gaussian_model(mus, sigmas): - ''' - Creates a simulation model where each + """ + Creates a simulation model where each output distribution is distributed N ~(mu, sigma) Assumes mus and signmas are of equal length @@ -201,24 +203,24 @@ def custom_gaussian_model(mus, sigmas): Returns: ------ - object, simulation model - ''' + object, simulation model + """ bandits = [GaussianBandit(mu, sigma) for mu, sigma in zip(mus, sigmas)] return BanditCasino(bandits) class GaussianBandit(object): - ''' + """ Classic one armed bandit gambling machine. A user plays the bandit by pulling its arm. - The bandit returns a reward normally distribution + The bandit returns a reward normally distribution with mean mu and stdev sigma - ''' + """ def __init__(self, mu, sigma=1.0): - ''' + """ Constructor method for Gaussian Bandit Keyword arguments: @@ -226,7 +228,7 @@ def __init__(self, mu, sigma=1.0): mu -- float, mean of the normal distribution sigma -- float, stdev of the normal distribution (default = 1.0) - ''' + """ self._mu = mu self._sigma = sigma self._number_of_plays = 0 @@ -234,7 +236,7 @@ def __init__(self, mu, sigma=1.0): self._observers = [] def play(self): - ''' + """ Pull the arm on the bernoulli bandit @@ -242,7 +244,7 @@ def play(self): ----- reward -- int with value = 0 when no reward and 1 when the pull results in a win - ''' + """ reward = np.random.normal(self._mu, self._sigma) self._total_reward += reward @@ -251,33 +253,30 @@ def play(self): return reward def reset(self): - ''' - Reset the number of plays and + """ + Reset the number of plays and total rewards to zero. - ''' + """ self._number_of_plays = 0 self._total_reward = 0 - - class BanditCasino(object): - def __init__(self, bandits): - ''' + """ Casino constructor method Keyword arguments: ------ bandits -- list, of BernoulliBandits objects - ''' + """ self._bandits = bandits self._current_index = 0 self._observers = [] def __str__(self): return f"BanditCasino()" - + def __getitem__(self, index): return self._bandits[index] @@ -288,33 +287,29 @@ def _get_best_arm(self): means = [] for bandit in self._bandits: means.append(bandit._mu) - - return np.argmax(np.array(means)) - + return np.argmax(np.array(means)) - def simulate(self, bandit_index): - ''' + """ Play a specific bandit machine. - Notifies all observers of the outcome + Notifies all observers of the outcome Keyword arguments: ----- - bandit_index -- int, index of bandit to play - ''' + bandit_index -- int, index of bandit to play + """ reward = self._bandits[bandit_index].play() self.notify_observers(bandit_index, reward) def random_action(self): - ''' + """ Selects a bandit index at random and plays it. - ''' + """ bandit_index = np.random.choice(len(self._bandits)) self.simulate(bandit_index) - def __iter__(self): return self @@ -328,10 +323,10 @@ def __next__(self): def register_observer(self, observer): self._observers.append(observer) - + def notify_observers(self, *args, **kwargs): for observer in self._observers: - observer.feedback(self, *args, **kwargs) + observer.feedback(self, *args, **kwargs) number_of_arms = property(__get_number_of_arms) - best_design = property(_get_best_arm) \ No newline at end of file + best_design = property(_get_best_arm)