diff --git a/DDPG_study18_best_test_varianz.pkl b/DDPG_study18_best_test_varianz.pkl new file mode 100644 index 00000000..c87e3e11 Binary files /dev/null and b/DDPG_study18_best_test_varianz.pkl differ diff --git a/OMG_Integrator_Actor/3/model.zip b/OMG_Integrator_Actor/3/model.zip new file mode 100644 index 00000000..351a7617 Binary files /dev/null and b/OMG_Integrator_Actor/3/model.zip differ diff --git a/OMG_Integrator_Actor/32/model.zip b/OMG_Integrator_Actor/32/model.zip new file mode 100644 index 00000000..f7b22eb7 Binary files /dev/null and b/OMG_Integrator_Actor/32/model.zip differ diff --git a/OMG_Integrator_Actor_i_load_feature/0/model.zip b/OMG_Integrator_Actor_i_load_feature/0/model.zip new file mode 100644 index 00000000..393106b1 Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature/0/model.zip differ diff --git a/OMG_Integrator_Actor_i_load_feature/1/model.zip b/OMG_Integrator_Actor_i_load_feature/1/model.zip new file mode 100644 index 00000000..bdfdac14 Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature/1/model.zip differ diff --git a/OMG_Integrator_Actor_i_load_feature_2/1/model.zip b/OMG_Integrator_Actor_i_load_feature_2/1/model.zip new file mode 100644 index 00000000..21ec1154 Binary files /dev/null and b/OMG_Integrator_Actor_i_load_feature_2/1/model.zip differ diff --git a/Pipi.pkl b/Pipi.pkl new file mode 100644 index 00000000..4fcaf4dc Binary files /dev/null and b/Pipi.pkl differ diff --git a/experiments/DQN/env/Custom_Cartpole.py b/experiments/DQN/env/Custom_Cartpole.py new file mode 100644 index 00000000..fb627916 --- /dev/null +++ b/experiments/DQN/env/Custom_Cartpole.py @@ -0,0 +1,226 @@ +""" +Classic cart-pole system implemented by Rich Sutton et al. +Copied from http://incompleteideas.net/sutton/book/code/pole.c +permalink: https://perma.cc/C9ZM-652R +""" + +import math +import gym +from gym import spaces, logger +from gym.utils import seeding +import numpy as np + + +class CartPoleEnv(gym.Env): + """ + Description: + A pole is attached by an un-actuated joint to a cart, which moves along + a frictionless track. The pendulum starts upright, and the goal is to + prevent it from falling over by increasing and reducing the cart's + velocity. + + Source: + This environment corresponds to the version of the cart-pole problem + described by Barto, Sutton, and Anderson + + Observation: + Type: Box(4) + Num Observation Min Max + 0 Cart Position -4.8 4.8 + 1 Cart Velocity -Inf Inf + 2 Pole Angle -0.418 rad (-24 deg) 0.418 rad (24 deg) + 3 Pole Angular Velocity -Inf Inf + + + Actions: + Type: Discrete(2) + Num Action + 0 Push cart to the left + 1 Push cart to the right + + Note: The amount the velocity that is reduced or increased is not + fixed; it depends on the angle the pole is pointing. This is because + the center of gravity of the pole increases the amount of energy needed + to move the cart underneath it + + Reward: + Reward is 1 for every step taken, including the termination step + + Starting State: + All observations are assigned a uniform random value in [-0.05..0.05] + + Episode Termination: + Pole Angle is more than 12 degrees. + Cart Position is more than 2.4 (center of the cart reaches the edge of + the display). + Episode length is greater than 200. + Solved Requirements: + Considered solved when the average return is greater than or equal to + 195.0 over 100 consecutive trials. + """ + + metadata = { + 'render.modes': ['human', 'rgb_array'], + 'video.frames_per_second': 50 + } + + def __init__(self): + self.gravity = 9.8 + self.masscart = 1.0 + self.masspole = 0.1 + self.total_mass = (self.masspole + self.masscart) + self.length = 0.5 # actually half the pole's length + self.polemass_length = (self.masspole * self.length) + self.force_mag = 10.0 + self.tau = 0.02 # seconds between state updates + self.kinematics_integrator = 'euler' + + # Angle at which to fail the episode + self.theta_threshold_radians = 12 * 2 * math.pi / 360 + self.x_threshold = 2.4 + + # Angle limit set to 2 * theta_threshold_radians so failing observation + # is still within bounds. + high = np.array([self.x_threshold * 2, + np.finfo(np.float32).max, + self.theta_threshold_radians * 2, + np.finfo(np.float32).max], + dtype=np.float32) + + self.action_space = spaces.Discrete(2) + self.observation_space = spaces.Box(-high, high, dtype=np.float32) + + self.seed() + self.viewer = None + self.state = None + + self.steps_beyond_done = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + def step(self, action): + err_msg = "%r (%s) invalid" % (action, type(action)) + assert self.action_space.contains(action), err_msg + + x, x_dot, theta, theta_dot = self.state + force = self.force_mag if action == 1 else -self.force_mag + costheta = math.cos(theta) + sintheta = math.sin(theta) + + # For the interested reader: + # https://coneural.org/florian/papers/05_cart_pole.pdf + temp = (force + self.polemass_length * theta_dot ** 2 * sintheta) / self.total_mass + thetaacc = (self.gravity * sintheta - costheta * temp) / ( + self.length * (4.0 / 3.0 - self.masspole * costheta ** 2 / self.total_mass)) + xacc = temp - self.polemass_length * thetaacc * costheta / self.total_mass + + if self.kinematics_integrator == 'euler': + x = x + self.tau * x_dot + x_dot = x_dot + self.tau * xacc + theta = theta + self.tau * theta_dot + theta_dot = theta_dot + self.tau * thetaacc + else: # semi-implicit euler + x_dot = x_dot + self.tau * xacc + x = x + self.tau * x_dot + theta_dot = theta_dot + self.tau * thetaacc + theta = theta + self.tau * theta_dot + + if theta >= np.pi: + theta -= 2 * np.pi + elif theta <= -np.pi: + theta += 2 * np.pi + + self.state = (x, x_dot, theta, theta_dot) + + done = bool( + x < -self.x_threshold + or x > self.x_threshold + # or theta < -self.theta_threshold_radians + # or theta > self.theta_threshold_radians + ) + + if not done: + reward = 1 - (abs(theta) / np.pi) + # reward = 1.0 + elif self.steps_beyond_done is None: + # Pole just fell! + self.steps_beyond_done = 0 + reward = 0.0 + else: + if self.steps_beyond_done == 0: + logger.warn( + "You are calling 'step()' even though this " + "environment has already returned done = True. You " + "should always call 'reset()' once you receive 'done = " + "True' -- any further steps are undefined behavior." + ) + self.steps_beyond_done += 1 + reward = 0.0 + + return np.array(self.state), reward, done, {} + + def reset(self): + self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) + self.steps_beyond_done = None + return np.array(self.state) + + def render(self, mode='human'): + screen_width = 600 + screen_height = 400 + + world_width = self.x_threshold * 2 + scale = screen_width / world_width + carty = 100 # TOP OF CART + polewidth = 10.0 + polelen = scale * (2 * self.length) + cartwidth = 50.0 + cartheight = 30.0 + + if self.viewer is None: + from gym.envs.classic_control import rendering + self.viewer = rendering.Viewer(screen_width, screen_height) + l, r, t, b = -cartwidth / 2, cartwidth / 2, cartheight / 2, -cartheight / 2 + axleoffset = cartheight / 4.0 + cart = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + self.carttrans = rendering.Transform() + cart.add_attr(self.carttrans) + self.viewer.add_geom(cart) + l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2 + pole = rendering.FilledPolygon([(l, b), (l, t), (r, t), (r, b)]) + pole.set_color(.8, .6, .4) + self.poletrans = rendering.Transform(translation=(0, axleoffset)) + pole.add_attr(self.poletrans) + pole.add_attr(self.carttrans) + self.viewer.add_geom(pole) + self.axle = rendering.make_circle(polewidth / 2) + self.axle.add_attr(self.poletrans) + self.axle.add_attr(self.carttrans) + self.axle.set_color(.5, .5, .8) + self.viewer.add_geom(self.axle) + self.track = rendering.Line((0, carty), (screen_width, carty)) + self.track.set_color(0, 0, 0) + self.viewer.add_geom(self.track) + + self._pole_geom = pole + + if self.state is None: + return None + + # Edit the pole polygon vertex + pole = self._pole_geom + l, r, t, b = -polewidth / 2, polewidth / 2, polelen - polewidth / 2, -polewidth / 2 + pole.v = [(l, b), (l, t), (r, t), (r, b)] + + x = self.state + cartx = x[0] * scale + screen_width / 2.0 # MIDDLE OF CART + self.carttrans.set_translation(cartx, carty) + self.poletrans.set_rotation(-x[2]) + + return self.viewer.render(return_rgb_array=mode == 'rgb_array') + + def close(self): + if self.viewer: + self.viewer.close() + self.viewer = None diff --git a/experiments/DQN/train_dqn_cont_env_test.py b/experiments/DQN/train_dqn_cont_env_test.py new file mode 100644 index 00000000..84c85bc8 --- /dev/null +++ b/experiments/DQN/train_dqn_cont_env_test.py @@ -0,0 +1,176 @@ +from multiprocessing import Pool +from typing import Union + +import gym +import numpy as np +import pandas as pd +import torch as th +from stable_baselines3 import DQN + +import matplotlib.pyplot as plt + +# env = gym.make("CartPole-v0") +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.type_aliases import GymStepReturn, GymObs + +from experiments.DQN.env.Custom_Cartpole import CartPoleEnv + + +# return_all_agents = [] + +class FeatureWrapper(Monitor): + + def __init__(self, env, training_episode_length): + """ + : + """ + state_constraints = [[- 2.4, 2.4], + [-7, 7], + [-np.pi, +np.pi], + [-10, 10]] + self.state_low = np.array(state_constraints)[:, 0] + self.state_high = np.array(state_constraints)[:, 1] + self.delta_v = 0.15 + + super().__init__(env) + self.training_episode_length = training_episode_length + self._n_training_steps = 0 + self.episode_return = [] + self.observation_space = gym.spaces.Box(low=np.full(env.observation_space.shape[0] + 1, -np.inf), + high=np.full(env.observation_space.shape[0] + 1, np.inf)) + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + """ + + obs, reward, done, info = super().step(action) + + done = False + if np.any(np.abs(self.env.state) > self.state_high): + reward = -1 + done = True + + self._n_training_steps += 1 + + if self._n_training_steps == self.training_episode_length: + done = True + #info["timelimit_reached"] = True + + if self._n_training_steps == self.training_episode_length or done: + self.episode_return.append(sum(self.rewards)) + + norm_state = [(obs[0] - self.state_low[0]) / (self.state_high[0] - self.state_low[0]) * 2 - 1, + (obs[1] - self.state_low[1]) / (self.state_high[1] - self.state_low[1]) * 2 - 1, + np.cos(obs[2]), + np.sin(obs[2]), + (obs[3] - self.state_low[3]) / (self.state_high[3] - self.state_low[3]) * 2 - 1, + ] + + return norm_state, reward, done, info + + def reset(self, **kwargs) -> GymObs: + """ + + """ + obs = super().reset() + + # self.state = self.np_random.uniform(low=-0.05, high=0.05, size=(4,)) + # self.state[0] = self.np_random.uniform(low=-2.4, high=2.4, size=(1,)) + # self.state[1] = self.np_random.uniform(low=-7, high=7, size=(1,)) + # self.state[2] = self.np_random.uniform(low=-np.pi, high=np.pi, size=(1,)) + # self.state[3] = self.np_random.uniform(low=-10, high=10, size=(1,)) + + while True: + self.env.state = np.random.uniform(low=self.state_low, + high=self.state_high) + if np.abs(self.env.state[3]) < 1: + if self.env.state[1] > 0: + if np.sqrt((2.4 - self.env.state[0]) / self.env.tau * self.delta_v) > self.env.state[1]: + break + if self.env.state[1] < 0: + if -np.sqrt((2.4 + self.env.state[0]) / self.env.tau * self.delta_v) < self.env.state[1]: + break + + self.steps_beyond_done = None + + self._n_training_steps = 0 + + norm_state = [(self.env.state[0] - self.state_low[0]) / (self.state_high[0] - self.state_low[0]) * 2 - 1, + (self.env.state[1] - self.state_low[1]) / (self.state_high[1] - self.state_low[1]) * 2 - 1, + np.cos(self.env.state[2]), + np.sin(self.env.state[2]), + (self.env.state[3] - self.state_low[3]) / (self.state_high[3] - self.state_low[3]) * 2 - 1, + ] + + return norm_state #np.array(self.state) + + +# for i in range(2): +def bla(idx): + env = FeatureWrapper(CartPoleEnv(), training_episode_length=200) + # env = FeatureWrapper(gym.make("CartPole-v1")) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=[200, 200, 200]) + + model = DQN("MlpPolicy", env, learning_rate=1e-3, buffer_size=10000, learning_starts=1000, batch_size=32, + tau=0.001, + gamma=0.99, train_freq=(1, "step"), gradient_steps=-1, optimize_memory_usage=False, + target_update_interval=1, + exploration_fraction=0.4, exploration_initial_eps=0.5, exploration_final_eps=0.01, max_grad_norm=1000, + tensorboard_log='TB_log/', create_eval_env=False, policy_kwargs=policy_kwargs, verbose=1, seed=None, + device='auto', + _init_setup_model=True) + + model.q_net.q_net._modules['1'].negative_slope = 0.1 + model.q_net.q_net._modules['3'].negative_slope = 0.1 + model.q_net.q_net._modules['5'].negative_slope = 0.1 + model.q_net_target.q_net._modules['1'].negative_slope = 0.1 + model.q_net_target.q_net._modules['3'].negative_slope = 0.1 + model.q_net_target.q_net._modules['5'].negative_slope = 0.1 + + # model = DQN("MlpPolicy", env, verbose=1) + + # learn(total_timesteps, callback=None, log_interval=4, eval_env=None, eval_freq=- 1, n_eval_episodes=5, + # tb_log_name='DQN', eval_log_path=None, reset_n um_timesteps=True) + model.learn(total_timesteps=100000, log_interval=4) + return np.array(env.episode_return) + + +#with Pool(2) as p: +return_all_agents = map(bla, range(20)) + + + +# return_all_agents.append(np.array(env.episode_return)) + + +# asd = min(map(len,return_all_agents)) +# np_return_all_agents = np.array([l[:asd] for l in return_all_agents]) + +df = pd.DataFrame(return_all_agents) + +df.to_pickle("DQN_original20Agents_sb3_original_lowBuffer") + +m = df.mean() +s = df.std() + +episode = pd.Series(range(0, df.shape[1])) + +plt.plot(episode, m) +plt.fill_between(episode, m - s, m + s, facecolor='r') +plt.ylabel('Average return') +plt.xlabel('Episode') +plt.ylim([0, 200]) +plt.grid() +plt.title('20 Agent Original Code_original_lowBuffer') +plt.show() + +""" +obs = env.reset() +while True: + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + env.render() + if done: + obs = env.reset() +""" diff --git a/experiments/DQN/viz.py b/experiments/DQN/viz.py new file mode 100644 index 00000000..26300eeb --- /dev/null +++ b/experiments/DQN/viz.py @@ -0,0 +1,33 @@ +import pandas as pd +import matplotlib.pyplot as plt + +agents25_original = pd.read_pickle("DQN_ORIGINAL_5Agents") +agents50_fix = pd.read_pickle("DQN_WITH_fix5Agents") + +m = agents25_original.mean() +s = agents25_original.std() + +episode = pd.Series(range(0, agents25_original.shape[1])) + +plt.plot(episode, m) +plt.fill_between(episode, m - s, m + s, facecolor='r') +plt.ylabel('Average return') +plt.xlabel('Episode') +plt.ylim([0, 200]) +plt.grid() +plt.title('25 Agent Original') +plt.show() + +m_fix = agents50_fix.mean() +s_fix = agents50_fix.std() + +episode_fix = pd.Series(range(0, agents50_fix.shape[1])) + +plt.plot(episode_fix, m_fix) +plt.fill_between(episode_fix, m_fix - s_fix, m_fix + s_fix, facecolor='r') +plt.ylabel('Average return') +plt.xlabel('Episode') +plt.ylim([0, 200]) +plt.grid() +plt.title('50 Agent Fixed Code') +plt.show() diff --git a/experiments/GEM/env/env_wrapper_GEM.py b/experiments/GEM/env/env_wrapper_GEM.py new file mode 100644 index 00000000..2af43069 --- /dev/null +++ b/experiments/GEM/env/env_wrapper_GEM.py @@ -0,0 +1,480 @@ +import platform +from typing import Union + +import gym +import numpy as np +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.type_aliases import GymStepReturn + +from experiments.GEM.util.config import cfg +from experiments.hp_tune.env.vctrl_single_inv import net +from openmodelica_microgrid_gym.util import Fastqueue + + +class BaseWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", gamma=0, + number_learing_steps=500000, number_past_vals=0): + """ + Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be + used in case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features - 2, -np.inf), # -2 because v_dq is removed + high=np.full(env.observation_space.shape[0] + number_of_features - 2, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self.n_episode = 0 + self.reward_episode_mean = [] + self.i_d_mess = [] + self.i_q_mess = [] + self.i_d_ref = [] + self.i_q_ref = [] + self.action_d = [] + self.action_q = [] + self.n_trail = n_trail + self.used_P = np.zeros(self.action_space.shape) + self.gamma = gamma + self.number_learing_steps = number_learing_steps + self.delay_queues = [Fastqueue(1, 2) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + obs, reward, done, info = super().step(action) + reward = reward * (1 - self.gamma) + # super().render() + + self._n_training_steps += 1 + + if cfg['loglevel'] == 'train': + self.i_d_mess.append(np.float64(obs[0])) + self.i_q_mess.append(np.float64(obs[1])) + self.i_d_ref.append(np.float64(obs[2])) + self.i_q_ref.append(np.float64(obs[3])) + self.action_d.append(np.float64(action[0])) + self.action_q.append(np.float64(action[1])) + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "i_d_mess": self.i_d_mess, + "i_q_mess": self.i_q_mess, + "v_d_mess": self.env.env.v_d_mess, + "v_q_mess": self.env.env.v_q_mess, + "i_d_ref": self.i_d_ref, + "i_q_ref": self.i_q_ref, + 'action_d': self.action_d, + 'action_q': self.action_q, + "Rewards": self.rewards, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'MRE' + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.i_d_mess = [] + self.i_q_mess = [] + self.i_d_ref = [] + self.i_q_ref = [] + self.action_d = [] + self.action_q = [] + """ + Features + """ + error = (obs[2:4] - obs[0:2]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + obs = np.append(obs, self.used_P) + obs_delay_array = self.shift_and_append(obs[0:2]) + obs = np.append(obs, obs_delay_array) + + # todo efficiency? + self.used_P = np.copy(action) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + + if cfg['loglevel'] == 'train': + self.i_d_mess.append(np.float64(obs[0])) + self.i_q_mess.append(np.float64(obs[1])) + self.i_d_ref.append(np.float64(obs[2])) + self.i_q_ref.append(np.float64(obs[3])) + self.action_d.append(np.float64(0)) + self.action_q.append(np.float64(0)) + + self._n_training_steps = 0 + self.used_P = np.zeros(self.action_space.shape) + + # if cfg['loglevel'] == 'train': + # print("Log Data for Taining in Basewrapper L2xx?") + + """ + Features + """ + # SP wir an den State gehangen! + error = (obs[2:4] - obs[0:2]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + obs = np.append(obs, self.used_P) + obs_delay_array = self.shift_and_append(obs[0:2]) + obs = np.append(obs, obs_delay_array) + + return obs + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000): # , use_past_vals=False, number_past_vals=0): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features - 2, -np.inf), + # -2 because, v_dq is removed + high=np.full(env.observation_space.shape[0] + number_of_features - 2, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_d_mess = [] + self.i_q_mess = [] + self.i_d_ref = [] + self.i_q_ref = [] + self.action_d = [] + self.action_q = [] + self.n_episode = 0 + self.reward_episode_mean = [] + self.n_trail = n_trail + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + self.gamma = gamma + self.penalty_I_weight = penalty_I_weight + self.penalty_P_weight = penalty_P_weight + self.t_start_penalty_I = t_start_penalty_I + self.t_start_penalty_P = t_start_penalty_P + self.number_learing_steps = number_learing_steps + self.integrator_sum_list0 = [] + self.integrator_sum_list1 = [] + self.action_P0 = [] + self.action_P1 = [] + self.action_I0 = [] + self.action_I1 = [] + + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + action_P = action[0:2] + action_I = action[2:4] + + self.integrator_sum += action_I * self.integrator_weight + + action_PI = action_P + self.integrator_sum + + # check if m_abc will be clipped + if np.any(abs(action_PI) > 1): + + clipped_action = np.clip(action_PI, -1, 1) + + delta_action = clipped_action - action_PI + # if, reduce integrator by clipped delta + # action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase) + self.integrator_sum += delta_action * self.antiwindup_weight + + """ + clip_reward = np.clip(np.sum(np.abs(delta_action) * \ + (-1 / (self.env.net.components[0].v_lim / self.env.net.components[ + 0].v_DC))) / 3 * (1 - self.gamma), + -1, 0) + """ + clip_reward = 0 + + action_PI = clipped_action + + else: + clip_reward = 0 + + obs, reward, done, info = super().step(action_PI) + reward = reward + clip_reward + reward = reward * (1 - self.gamma) + + #super().render() + + integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3 + # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3 + action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3 + + # reward_weight is = 1 + + if self.total_steps > self.t_start_penalty_I: + penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps) + + else: + penalty_I_weight_scale = 1 + + if self.total_steps > self.t_start_penalty_P: + penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps) + + else: + + penalty_P_weight_scale = 1 + + reward = (reward + (self.penalty_I_weight * penalty_I_weight_scale) * integrator_penalty + + self.penalty_P_weight * penalty_P_weight_scale * action_P_penalty) \ + / (1 + self.penalty_I_weight * penalty_I_weight_scale + self.penalty_P_weight * penalty_P_weight_scale) + + self._n_training_steps += 1 + + if cfg['loglevel'] == 'train': + self.i_d_mess.append(np.float64(obs[0])) + self.i_q_mess.append(np.float64(obs[1])) + self.i_d_ref.append(np.float64(obs[2])) + self.i_q_ref.append(np.float64(obs[3])) + self.action_d.append(np.float64(action[0])) + self.action_q.append(np.float64(action[1])) + self.integrator_sum_list0.append(self.integrator_sum[0]) + self.integrator_sum_list1.append(self.integrator_sum[1]) + self.action_P0.append(np.float64(action_P[0])) + self.action_P1.append(np.float64(action_P[1])) + self.action_I0.append(np.float64(action_I[0])) + self.action_I1.append(np.float64(action_I[1])) + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "i_d_mess": self.i_d_mess, + "i_q_mess": self.i_q_mess, + "v_d_mess": self.env.env.v_d_mess, + "v_q_mess": self.env.env.v_q_mess, + "i_d_ref": self.i_d_ref, + "i_q_ref": self.i_q_ref, + 'action_d': self.action_d, + 'action_q': self.action_q, + "Rewards": self.rewards, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + 'Integrator0': self.integrator_sum_list0, + 'Integrator1': self.integrator_sum_list1, + 'actionP0': self.action_P0, + 'actionP1': self.action_P1, + 'actionI0': self.action_I0, + 'actionI1': self.action_I1, + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.i_d_mess = [] + self.i_q_mess = [] + self.i_d_ref = [] + self.i_q_ref = [] + self.action_d = [] + self.action_q = [] + + # if self._n_training_steps > 500: + # super().close() + + """ + Features + """ + error = (obs[2:4] - obs[0:2]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + + self.used_P = np.copy(action_P) + self.used_I = np.copy(self.integrator_sum) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + obs = super().reset() + + if cfg['loglevel'] == 'train': + self.i_d_mess.append(np.float64(obs[0])) + self.i_q_mess.append(np.float64(obs[1])) + self.i_d_ref.append(np.float64(obs[2])) + self.i_q_ref.append(np.float64(obs[3])) + self.action_d.append(np.float64(0)) + self.action_q.append(np.float64(0)) + + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + + """" + Features + """ + # SP wir an den State gehangen! + error = (obs[2:4] - obs[0:2]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + return obs + + + + +class FeatureWrapper_pastVals(FeatureWrapper): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000, number_past_vals=10): + """ + Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations. + Initialized with zeros! + """ + super().__init__(env, number_of_features, training_episode_length, + recorder, n_trail, integrator_weight, antiwindup_weight, gamma, + penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P, + number_learing_steps) + + # self.observation_space = gym.spaces.Box( + # low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + # high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + self.delay_queues = [Fastqueue(1, 2) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + obs, reward, done, info = super().step(action) + obs_delay_array = self.shift_and_append(obs[0:2]) + obs = np.append(obs, obs_delay_array) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + obs_delay_array = self.shift_and_append(obs[0:2]) + obs = np.append(obs, obs_delay_array) + + return obs + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array + + + + diff --git a/experiments/GEM/experiment_GEM.py b/experiments/GEM/experiment_GEM.py new file mode 100644 index 00000000..3c322ce5 --- /dev/null +++ b/experiments/GEM/experiment_GEM.py @@ -0,0 +1,490 @@ +import platform +import time +import gym_electric_motor as gem +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +from experiments.GEM.env.env_wrapper_GEM import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper +# from experiments.GEM.env.GEM_env import AppendLastActionWrapper +from experiments.GEM.util.config import cfg +from experiments.GEM.util.recorder_GEM import Recorder + +from gym.wrappers import FlattenObservation +import gym_electric_motor as gem +from gym_electric_motor.reference_generators import MultipleReferenceGenerator, ConstReferenceGenerator, \ + WienerProcessReferenceGenerator +from gym_electric_motor.visualization import MotorDashboard +from gym_electric_motor.visualization.motor_dashboard_plots import MeanEpisodeRewardPlot +from gym_electric_motor.physical_systems.mechanical_loads import ConstantSpeedLoad +from gym.core import Wrapper +from gym.spaces import Box, Tuple +from gym_electric_motor.constraints import SquaredConstraint + +test_length = 10000 +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] +Ki_ddpg_combi = 182 + + +class AppendLastActionWrapper(Wrapper): + """ + The following environment considers the dead time in the real-world motor control systems. + The real-world system changes its state, while the agent simultaneously calculates the next action based on a + previously measured observation. + Therefore, for the agents it seems as if the applied action affects the environment with one step delay + (with a dead time of one time step). + As a measure of feature engineering we append the last selected action to the observation of each time step, + because this action will be the one that is active while the agent has to make the next decision. + """ + + def __init__(self, environment): + super().__init__(environment) + # append the action space dimensions to the observation space dimensions + self.observation_space = Tuple((Box( + np.concatenate((environment.observation_space[0].low, environment.action_space.low)), + np.concatenate((environment.observation_space[0].high, environment.action_space.high)) + ), environment.observation_space[1])) + + self.v_d_mess = [] + self.v_q_mess = [] + + def step(self, action): + (state, ref), rew, term, info = self.env.step(action) + + self.v_d_mess.append(np.float64(state[2])) + self.v_q_mess.append(np.float64(state[3])) + state = np.delete(state, [2, 3]) + # extend the output state by the selected action + # state = np.concatenate((state, action)) + + return (state, ref), rew, term, info + + def reset(self, **kwargs): + # extend the output state by zeros after reset + # no action can be appended yet, but the dimension must fit + # state = np.concatenate((state, np.zeros(self.env.action_space.shape))) + + self.v_d_mess = [] + self.v_q_mess = [] + + # set random reference values + self.env.reference_generator._sub_generators[0]._reference_value = np.random.uniform(-1, 0) + self.env.reference_generator._sub_generators[1]._reference_value = np.random.uniform(-1, 1) + + state, ref = self.env.reset() + self.v_d_mess.append(np.float64(state[2])) + self.v_q_mess.append(np.float64(state[3])) + state = np.delete(state, [2, 3]) # remove vdq from state + + return state, ref + + +class AppendLastActionWrapper_testsetting(AppendLastActionWrapper): + + def __init__(self, environment, new_ref_d, new_ref_q, ref_change): + """ + new_ref_d/q mus be list of length test_steps/1000! + """ + super().__init__(environment) + self.step_number = 0 + self.ref_count = 0 + self.new_ref_d = new_ref_d + self.new_ref_q = new_ref_q + self.ref_change = ref_change + + def step(self, action): + self.step_number += 1 + + if self.step_number % self.ref_change == 0: + self.ref_count += 1 + self.env.reference_generator._sub_generators[0]._reference_value = self.new_ref_d[ + self.ref_count] # np.random.uniform(-1, 0) + self.env.reference_generator._sub_generators[1]._reference_value = self.new_ref_q[ + self.ref_count] # np.random.uniform(-1, 1) + + (state, ref), rew, term, info = super().step(action) + + return (state, ref), rew, term, info + + def reset(self, **kwargs): + self.v_d_mess = [] + self.v_q_mess = [] + + self.env.reference_generator._sub_generators[0]._reference_value = self.new_ref_d[ + self.ref_count] # np.random.uniform(-1, 0) + self.env.reference_generator._sub_generators[1]._reference_value = self.new_ref_q[ + self.ref_count] # np.random.uniform(-1, 1) + + state, ref = self.env.reset() + self.v_d_mess.append(np.float64(state[2])) + self.v_q_mess.append(np.float64(state[3])) + state = np.delete(state, [2, 3]) # remove vdq from state + + return state, ref + + +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, + number_past_vals=0): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + #################################################################################################################### + # GEM + # Define reference generators for both currents of the flux oriented dq frame + # d current reference is chosen to be constantly at zero to simplify this showcase scenario + d_generator = ConstReferenceGenerator('i_sd', 0) + # q current changes dynamically + q_generator = ConstReferenceGenerator('i_sq', 0) + + # The MultipleReferenceGenerator allows to apply these references simultaneously + rg = MultipleReferenceGenerator([d_generator, q_generator]) + + # Set the electric parameters of the motor + motor_parameter = dict( + r_s=15e-3, l_d=0.37e-3, l_q=1.2e-3, psi_p=65.6e-3, p=3, j_rotor=0.06 + ) + + # Change the motor operational limits (important when limit violations can terminate and reset the environment) + limit_values = dict( + i=160 * 1.41, + omega=12000 * np.pi / 30, + u=450 + ) + + # Change the motor nominal values + nominal_values = {key: 0.7 * limit for key, limit in limit_values.items()} + + # Create the environment + env_row = gem.make( + # Choose the permanent magnet synchronous motor with continuous-control-set + 'DqCont-CC-PMSM-v0', + # Pass a class with extra parameters + visualization=MotorDashboard( + state_plots=['i_sq', 'i_sd'], + action_plots='all', + reward_plot=True, + additional_plots=[MeanEpisodeRewardPlot()] + ), + # Set the mechanical load to have constant speed + load=ConstantSpeedLoad(omega_fixed=1000 * np.pi / 30), + + # Define which numerical solver is to be used for the simulation + ode_solver='scipy.solve_ivp', + + # Pass the previously defined reference generator + reference_generator=rg, + + reward_function=dict( + # Set weighting of different addends of the reward function + reward_weights={'i_sq': 1, 'i_sd': 1}, + # Exponent of the reward function + # Here we use a square root function + reward_power=0.5, + ), + + # Define which state variables are to be monitored concerning limit violations + # Here, only overcurrent will lead to termination + constraints=(), + + # Consider converter dead time within the simulation + # This means that a given action will show effect only with one step delay + # This is realistic behavior of drive applications + converter=dict( + dead_time=True, + ), + # Set the DC-link supply voltage + supply=dict( + u_nominal=400 + ), + + motor=dict( + # Pass the previously defined motor parameters + motor_parameter=motor_parameter, + + # Pass the updated motor limits and nominal values + limit_values=limit_values, + nominal_values=nominal_values, + ), + # Define which states will be shown in the state observation (what we can "measure") + state_filter=['i_sd', 'i_sq', 'u_sd', 'u_sq'], # , 'epsilon'], + ) + + # Now we apply the wrapper defined at the beginning of this script + env_train = AppendLastActionWrapper(env_row) + + # We flatten the observation (append the reference vector to the state vector such that + # the environment will output just a single vector with both information) + # This is necessary for compatibility with kerasRL2 + env_train = FlattenObservation(env_train) + + #################################################################################################################### + + if cfg['env_wrapper'] == 'past': + env = FeatureWrapper_pastVals(env_train, number_of_features=4 + number_past_vals * 2, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + elif cfg['env_wrapper'] == 'no-I-term': + env = BaseWrapper(env_train, number_of_features=2 + number_past_vals * 2, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env = FeatureWrapper(env_train, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) # , use_past_vals=True, number_past_vals=30) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(4, -1), high=np.full(4, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=1e-4) + + print('SCHRITTWEITE DES ACTIONNOISE?!?!?!?Passt laut standard 1e-4') + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path, + # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + # learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(train_freq, train_freq_type), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + count = 0 + for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(2, -1), high=np.full(2, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + # Refs created with https://github.com/max-schenke/DESSCA + i_d_refs = [-0.5718831392706399, -0.11155989917458595, -0.8444233463864655, -0.19260596846844558, + -0.48986342384598824, + -0.08540375784816023, -0.6983532259844449, -0.3409346664209051, -0.9852563901175903, + -0.019589794863040133, + -0.3057052318511703, -0.010759738176742362, -0.7264074671265837, -0.7003086456948622, + -0.5205127876117279, + -0.0035883351279332454, -0.24656126983332566, -0.7385108721382044, -0.8711444379999949, + -0.5322348905850738, + -0.16443631057073907, -0.26335305001172343, -0.8339056052207534, -0.9840272325710973, + -0.00099042967089491, + -0.4276376345373605, -0.4392085789117308, -0.29885945214798054, -0.3526213053117569, + -0.15544590095444902, + -0.38133627476871246, -0.0007362814213280888, -0.13766159578201825, -0.6998437778149555, + -0.02941718441323049, + -0.14911600490992516, -0.8711008909873345, -0.5803207691231205, -0.3908087722441505, + -0.30424273624679143, + -0.6032911651567467, -0.6097285170523984, -0.23000688296189783, -0.009050042083058152, + -0.13450601442490417, + -0.8117883556545268, -0.7542685229940803, -0.4627233964160423, -0.23713451030767801, -0.580302276033946] + i_q_refs = [-0.3392001552090831, 0.9601935188371409, -0.3536698661685236, -0.7470423329656373, 0.7498405690613185, + 0.02118430489789434, 0.2733946954263321, 0.2919040855524663, 0.16184776106212195, 0.5033515631986878, + -0.3472813053105329, -0.3978931436350608, 0.6856579757847681, -0.7061719805667996, 0.05173569323125849, + -0.9859275339077078, 0.6511009114276964, -0.07964009848269302, 0.4872958851075428, 0.4244964715390715, + 0.3348234680253275, -0.02175414797059596, 0.1689424266837956, -0.15367806515850901, -0.6890239130635769, + -0.5235888504056838, -0.18887320564466648, -0.9243752447874265, 0.9223611469482904, + -0.47288531380037824, + 0.5419042725157753, 0.21808910731016923, -0.2114136814114341, -0.43862800579799827, 0.7610593015542114, + -0.9580202514125911, -0.058327843098379906, -0.6351863815461574, 0.06422483040085132, + -0.6157429182475818, + 0.6283510657507491, -0.1007305747146939, 0.9225787627793309, -0.15228745162185686, 0.6513516638638627, + -0.5835510703463308, 0.46458552243856405, 0.25269729661377704, 0.1814216788492872, 0.2111335623928367] + + ref_change = 500 + + env_test = env_row + env_test = AppendLastActionWrapper_testsetting(env_test, i_d_refs, i_q_refs, ref_change) + env_test = FlattenObservation(env_test) + + if cfg['env_wrapper'] == 'past': + env_test = FeatureWrapper_pastVals(env_test, number_of_features=4 + number_past_vals * 2, + integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=0, penalty_I_weight=0, + penalty_P_weight=0, number_past_vals=number_past_vals, + training_episode_length=training_episode_length, ) + + + elif cfg['env_wrapper'] == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=2 + number_past_vals * 2, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=0, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, + training_episode_length=training_episode_length, ) # , use_past_vals=True, number_past_vals=30) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + + rew_list = [] + + aP0 = [] + aP1 = [] + aI0 = [] + aI1 = [] + integrator_sum0 = [] + integrator_sum1 = [] + i_d_mess = [] + i_q_mess = [] + i_d_ref = [] + i_q_ref = [] + action_d = [] + action_q = [] + env_test.training_episode_length = test_length + 1 # that env is not reset + + for step in range(test_length): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + if action.shape[0] > 2: + aI0.append(np.float64(action[2])) + aI1.append(np.float64(action[3])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + + # env_test.render() + return_sum += rewards + rew_list.append(rewards) + i_d_mess.append(np.float64(obs[0])) + i_q_mess.append(np.float64(obs[1])) + i_d_ref.append(np.float64(obs[2])) + i_q_ref.append(np.float64(obs[3])) + action_d.append(np.float64(action[0])) + action_q.append(np.float64(action[1])) + + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "i_d_mess": i_d_mess, + "i_q_mess": i_q_mess, + "v_d_mess": env_test.env.env.v_d_mess, + "v_q_mess": env_test.env.env.v_q_mess, + "i_d_ref": i_d_ref, + "i_q_ref": i_q_ref, + 'action_d': action_d, + 'action_q': action_q, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionI0": aI0, + "ActionI1": aI1, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "GEM; features: error, past_vals, used_action"} + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / test_length) diff --git a/experiments/GEM/hp_tune_ddpg_objective_GEM.py b/experiments/GEM/hp_tune_ddpg_objective_GEM.py new file mode 100644 index 00000000..e42613f9 --- /dev/null +++ b/experiments/GEM/hp_tune_ddpg_objective_GEM.py @@ -0,0 +1,471 @@ +import json +import os +import time + +import sqlalchemy +from optuna.samplers import TPESampler + +os.environ['PGOPTIONS'] = '-c statement_timeout=1000' + +import optuna +import platform +import argparse +import sshtunnel +import numpy as np +# np.random.seed(0) +from experiments.GEM.util.config import cfg + +from experiments.GEM.experiment_GEM import mongo_recorder, experiment_fit_DDPG +from experiments.hp_tune.util.scheduler import linear_schedule + +model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/' + +PC2_LOCAL_PORT2PSQL = 11999 +SERVER_LOCAL_PORT2PSQL = 6432 +DB_NAME = 'optuna' +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 +STUDY_NAME = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' + +node = platform.uname().node + + +def ddpg_objective_fix_params(trial): + file_congfig = open(model_path + + 'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', ) + trial_config = json.load(file_congfig) + + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = trial_config["penalty_I_weight"] # trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = trial_config["penalty_P_weight"] # trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = trial_config[ + "penalty_I_decay_start"] # trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = trial_config[ + "penalty_P_decay_start"] # trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = trial_config["integrator_weight"] # trial.suggest_float("integrator_weight", 1 / 200, 2) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = trial_config["antiwindup_weight"] # trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial_config["learning_rate"] # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1) # 0.0002# + + lr_decay_start = trial_config[ + "lr_decay_start"] # trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial_config["lr_decay_duration"] # trial.suggest_float("lr_decay_duration", 0.00001, + # 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial_config["final_lr"] # trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial_config["gamma"] # trial.suggest_float("gamma", 0.5, 0.9999) + weight_scale = trial_config["weight_scale"] # trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + bias_scale = trial_config["bias_scale"] # trial.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 + alpha_relu_actor = trial_config[ + "alpha_relu_actor"] # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 + alpha_relu_critic = trial_config[ + "alpha_relu_critic"] # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + + batch_size = trial_config["batch_size"] # trial.suggest_int("batch_size", 16, 1024) # 128 + buffer_size = trial_config[ + "buffer_size"] # trial.suggest_int("buffer_size", int(1e4), number_learning_steps) # 128 + + actor_hidden_size = trial_config[ + "actor_hidden_size"] # trial.suggest_int("actor_hidden_size", 10, 200) # 100 # Using LeakyReLU + actor_number_layers = trial_config["actor_number_layers"] # trial.suggest_int("actor_number_layers", 1, 4) + + critic_hidden_size = trial_config["critic_hidden_size"] # trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial_config["critic_number_layers"] # trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial_config["noise_var"] # trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial_config["noise_theta"] # trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial_config[ + "training_episode_length"] # trial.suggest_int("training_episode_length", 500, 5000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial_config["tau"] # trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial_config["train_freq"] # trial.suggest_int("train_freq", 1, 15000) + + optimizer = trial_config[ + "optimizer"] # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + number_past_vals = 5 # trial.suggest_int("number_past_vals", 0, 15) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Info": "P10 setting, EU grid, HPs von Stuy 22 + 5 pastvals" + "Reward design setzt sich aus MRE [0,1] und clipp-punishment [0,-1] zusammen", + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + + +def ddpg_objective(trial): + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = 1 # trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = 1 # trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = 0.5 # trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = 0.5 # trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = 0.1 # trial.suggest_float("integrator_weight", 1 / 200, 0.5) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = 0.1 # trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial.suggest_loguniform("learning_rate", 1e-7, 1e-2) # 0.0002# + + lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001, + 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial.suggest_float("gamma", 0.8, 0.9999) + weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + bias_scale = trial.suggest_loguniform("bias_scale", 0.01, 0.1) # 0.005 + alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5) # 0.005 + alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5) # 0.005 + + batch_size = trial.suggest_int("batch_size", 16, 512) # 128 + buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps) # 128 + + actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 75) # 100 # Using LeakyReLU + actor_number_layers = trial.suggest_int("actor_number_layers", 1, 3) + + critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial.suggest_int("train_freq", 1, 5000) + + optimizer = trial.suggest_categorical("optimizer", ["Adam"]) # ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + number_past_vals = trial.suggest_int("number_past_vals", 0, 15) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Optimierer/ Setting stuff": "DDPG HPO ohne Integrator, alle HPs fuer den I-Anteil " + "wurden daher fix gesetzt. Vgl. zu DDPG+I-Anteil" + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + + +def get_storage(url, storage_kws): + successfull = False + retry_counter = 0 + + while not successfull: + try: + storage = optuna.storages.RDBStorage( + url=url, **storage_kws) + successfull = True + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e: + wait_time = np.random.randint(60, 300) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Could not connect, retry in {wait_time} s') + time.sleep(wait_time) + + return storage + + +def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + if node in ('LEA-WORK35', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2MYSQL + else: + port = SERVER_LOCAL_PORT2MYSQL + + storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}') + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2MYSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2MYSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + study = optuna.create_study( + storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}", + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}", + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 100 + + print(n_trials) + print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in cfg['lea_vpn_nodes']: + optuna_path = './optuna/' + else: + # assume we are on not of pc2 -> store to project folder + optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/' + + os.makedirs(optuna_path, exist_ok=True) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f'sqlite:///{optuna_path}optuna.sqlite', + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_psql' + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + # set trial to failed if it seems dead for 20 minutes + storage_kws = dict(engine_kwargs={"pool_timeout": 600}) + if node in ('lea-cyberdyne', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2PSQL + else: + port = SERVER_LOCAL_PORT2PSQL + + storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2PSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2PSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + storage = get_storage(url=f'postgresql://{optuna_creds}' + f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws) + + # storage = optuna.storages.RDBStorage( + # url=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + # **storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +if __name__ == "__main__": + # learning_rate = list(itertools.chain(*[[1e-9] * 1])) + # search_space = {'learning_rate': learning_rate} # , 'number_learning_steps': number_learning_steps} + + TPE_sampler = TPESampler(n_startup_trials=400) # , constant_liar=True) + # TPE_sampler = TPESampler(n_startup_trials=2500) # , constant_liar=True) + + # optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler) + + optuna_optimize_mysql_lea35(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler) + # optuna_optimize_sqlite(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler) + + # optuna_optimize(ddpg_objective, study_name=STUDY_NAME, + # sampler=TPE_sampler) #, sampler=optuna.samplers.GridSampler(search_space)) diff --git a/experiments/GEM/pc2_schedule_ddpg_GEM.py b/experiments/GEM/pc2_schedule_ddpg_GEM.py new file mode 100644 index 00000000..522acf32 --- /dev/null +++ b/experiments/GEM/pc2_schedule_ddpg_GEM.py @@ -0,0 +1,105 @@ +"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass +the allowed cpu core limit""" + +import os +import pathlib +import uuid +import time + +import optuna +from optuna.samplers import TPESampler + +from experiments.hp_tune.util import pc2 +from experiments.GEM.util.config import cfg + +# config +USER = os.getenv('USER') +ALLOWED_MAX_CPU_CORES = 500 # 512 +STUDY_NAME = cfg['STUDY_NAME'] +DB_NAME = 'optuna' +# resources request +job_resource_plan = { + 'duration': 24, # in hours + 'ncpus': 2, + 'memory': 12, + 'vmemory': 16, +} + +MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus'] + +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 + + +def main(): + started_workers = 0 + print('Start slavedriving loop..') + old_ccsinfo_counts = None + while True: + + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study( + storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}', + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME, + load_if_exists=True, + direction='maximize') + + complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]) + print(f'Completed trials in study: {complete_trials}') + if complete_trials > 1000: + print('Maximal completed trials reached - STOPPING') + break + + job_files_path = pathlib.Path( + f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}") # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet + job_files_path.mkdir(parents=False, exist_ok=True) + + # read ccsinfo + ccsinfo = pc2.get_ccsinfo(USER) + ccsinfo_state_counts = ccsinfo.state.value_counts() + ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0) + ccs_planned = ccsinfo_state_counts.get('PLANNED', 0) + total_busy = ccs_running + ccs_planned + if not ccsinfo_state_counts.equals(old_ccsinfo_counts): + print("\n## ccs summary ##") + print(f"Running: {ccs_running}") + print(f"Planned : {ccs_planned}") + print(f"Total busy workers (ccs): {total_busy}") + + if total_busy < MAX_WORKERS: + # call workers to work + n_workers = MAX_WORKERS - total_busy + print(f'Start {n_workers} workers:') + for w in range(n_workers): + started_workers += 1 + jobid = str(uuid.uuid4()).split('-')[0] + cluster = "oculus" + job_name = job_files_path / f"pc2_job_{jobid}.sh" + res_plan = pc2.calculate_resources(**job_resource_plan) + + execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \ + "python $HOME/openmodelica-microgrid-gym/experiments/GEM/hp_tune_ddpg_objective_GEM.py -n 1" + + print(f'Start job {jobid} ..') + pc2.create_n_run_script( + job_name, + pc2.build_shell_script_lines(job_files_path, cluster, + job_name, res_plan, + execution_line), + dry=False) + print('sleep 10s for better DB interaction', end='\r') + time.sleep(10) + + old_ccsinfo_counts = ccsinfo_state_counts + + print('sleep..', end='\r') + time.sleep(300) + + +if __name__ == '__main__': + main() diff --git a/experiments/GEM/util/config.py b/experiments/GEM/util/config.py new file mode 100644 index 00000000..ae3d02db --- /dev/null +++ b/experiments/GEM/util/config.py @@ -0,0 +1,12 @@ +cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay', + 'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'], + STUDY_NAME='GEM_I_term_4', + meas_data_folder='Json_to_MonogDB_GEM_I_term_4/', + MONGODB_PORT=12001, + loglevel='train', + is_dq0=False, + train_episode_length=2881, # defines when in training the env is reset e.g. for exploring starts, + # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals + # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator + env_wrapper='past' + ) diff --git a/experiments/GEM/util/recorder_GEM.py b/experiments/GEM/util/recorder_GEM.py new file mode 100644 index 00000000..5ed3bc3b --- /dev/null +++ b/experiments/GEM/util/recorder_GEM.py @@ -0,0 +1,69 @@ +import json +from os import makedirs + +import sshtunnel +from pymongo import MongoClient + +from experiments.GEM.util.config import cfg + +MONGODB_PORT = cfg['MONGODB_PORT'] # 12001 + + +class Recorder: + + def __init__(self, node, database_name): + """ + Class to record measured data to mongo database using pymongo + Depending on the node we are operating at it connects via ssh to + - in lea_vpn: to cyberdyne port 12001 + - else: assume pc2 node -> connect to frontend + and stores data to mongoDB at port MONGODB_PORT ( =12001). + HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001 + is needed (assuming Mongod-Process running on cyberdyne + :params node: platform.uname().node + :params database_name: string for the database name to store data in + """ + self.node = node + self.save_count = 0 + + if self.node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + + self.database_name = database_name + makedirs(self.save_folder, exist_ok=True) + # pathlib.Path(self.save_folder.mkdir(exist_ok=True)) + + def save_to_mongodb(self, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[self.database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def save_to_json(self, col: str = ' trails', data=None): + """ + Stores data to json file in specified directory. From there the data can be grept by another process + and can be stored to a DB via ssh + To distinguish the files of one trail a save_count is incremented and added to the filename + """ + + with open(self.save_folder + self.database_name + '_' + col + '_' + str(self.save_count) + '.json', + 'w') as outfile: + json.dump(data, outfile) + + self.save_count += 1 diff --git a/experiments/GEM/util/reporter_GEM.py b/experiments/GEM/util/reporter_GEM.py new file mode 100644 index 00000000..f01a2eff --- /dev/null +++ b/experiments/GEM/util/reporter_GEM.py @@ -0,0 +1,149 @@ +import json +import os +import platform +import re +import time + +import numpy as np + +import sshtunnel +from pymongo import MongoClient +# from experiments.hp_tune.util.config import cfg +from experiments.GEM.util.config import cfg + +print('Log Config: GEM!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') + + +class Reporter: + + def __init__(self): + """ + Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB + on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT + """ + + MONGODB_PORT = cfg['MONGODB_PORT'] + + node = platform.uname().node + + if node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = './' + cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + + def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def oldest_file_in_tree(self, extension=".json"): + """ + Returns the oldest file-path string + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"): + """ + Returns the oldest file-path string + + :param count_number_to_find: List of count_numbers to find and store instead of storing all + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(str(count_number_to_find) + extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def json_to_mongo_via_sshtunnel(self, file_name_to_store=None): + + if not len(os.listdir(self.save_folder)) == 0: + + if file_name_to_store is None: + try: + oldest_file_path = self.oldest_file_in_tree() + except(ValueError) as e: + print('Folder seems empty or no matching data found!') + print(f'ValueError{e}') + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + return + else: + oldest_file_path = file_name_to_store + + with open(oldest_file_path) as json_file: + data = json.load(json_file) + + successfull = False + retry_counter = 0 + + while not successfull: + try: + now = time.time() + if os.stat(oldest_file_path).st_mtime < now - 60: + self.save_to_mongodb(database_name=data['Database name'], + col='Trial_number_' + data['Trial number'], data=data) + print('Reporter: Data stored successfully to MongoDB and will be removed locally!') + os.remove(oldest_file_path) + successfull = True + except (sshtunnel.BaseSSHTunnelForwarderError) as e: + wait_time = np.random.randint(1, 60) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s') + time.sleep(wait_time) + + else: + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + + +if __name__ == "__main__": + + reporter = Reporter() + print("Starting Reporter for logging from local savefolder to mongoDB") + + file_ending_number = [178, 179] + + print(f"Searching for files in directory with number ending on {file_ending_number}") + + # print(reporter.oldest_file_in_tree()) + while True: + # reporter.json_to_mongo_via_sshtunnel() + + for number in file_ending_number: + try: + oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number) + print(oldest_named_file_path) + + reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path) + + except(ValueError) as e: + print(f'No file with number {number} ending') + print(f'ValueError{e}') + print('Go to sleep for 5 seconds and go on with next number!') + time.sleep(5) diff --git a/experiments/GEM/viz/get_learningCurve.py b/experiments/GEM/viz/get_learningCurve.py new file mode 100644 index 00000000..daca650a --- /dev/null +++ b/experiments/GEM/viz/get_learningCurve.py @@ -0,0 +1,46 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True +plotly = False + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +# db_name = 'DDPG_SplitActor_Best_study18_6462' +db_name = 'GEM_no_I_term_4' # 15 +# db_name = 'GEM_past' # 17 +trial = '0' +show_episode_number = 19 + +reward_df = pd.DataFrame() + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + idx = 0 + for coll_name in db.list_collection_names(): + trial = db[coll_name] + # trial = db.Trial_number_23 + + train_data = trial.find_one({"Name": "After_Training"}) + # trial_test = trial.find_one({"Name": "Test"}) + + if train_data is not None: # if trial not finished (was in actor_Ddpg > 550) + + if idx == 0: + reward_df = pd.DataFrame({str(idx): train_data['Mean_eps_reward']}) + else: + + df_tmp = pd.DataFrame({str(idx): train_data['Mean_eps_reward']}) + reward_df = reward_df.join(df_tmp) + idx += 1 + +reward_df.to_pickle(db_name + "_1250_agents_train_data.pkl") diff --git a/experiments/GEM/viz/mongoDB_get_test_data.py b/experiments/GEM/viz/mongoDB_get_test_data.py new file mode 100644 index 00000000..20194db6 --- /dev/null +++ b/experiments/GEM/viz/mongoDB_get_test_data.py @@ -0,0 +1,122 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True +plotly = False + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +# db_name = 'DDPG_SplitActor_Best_study18_6462' +db_name = 'GEM_I_term_4' # 15 +# db_name = 'GEM_past' # 17 +trial = '0' +show_episode_number = 19 + +ret_mean_list_test = [] +ret_std_list_test = [] +i_q_delta_mean_list_test = [] +i_q_delta_std_list_test = [] +i_d_delta_mean_list_test = [] +i_d_delta_std_list_test = [] + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + for coll_name in db.list_collection_names(): + trial = db[coll_name] + # trial = db.Trial_number_23 + + # train_data = trial.find_one({"Name": "After_Training"}) + trial_test = trial.find_one({"Name": "Test"}) + + """ + ts = 1e-4 # if ts stored: take from db + t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist() + + plt.plot(t_test, trial_test['i_d_mess']) + plt.plot(t_test, trial_test['i_d_ref'], 'r') + plt.plot(t_test, trial_test['i_d_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_d") + plt.title(f"Test{db_name}") + plt.show() + + plt.plot(t_test, trial_test['i_q_mess']) + plt.plot(t_test, trial_test['i_q_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_q") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_d']) + if len(trial_test['integrator_sum0']) > 0: + plt.plot(t_test, trial_test['integrator_sum0'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_d (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_q']) + if len(trial_test['integrator_sum1']) > 0: + plt.plot(t_test, trial_test['integrator_sum1'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_q (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['Reward']) + plt.grid() + plt.xlabel("t") + plt.ylabel("reward") + plt.title(f"Test {db_name}") + plt.show() + + print(np.mean(trial_test['Reward'])) + print(np.std(trial_test['Reward'])) + """ + + ret_mean_list_test.append(np.mean(trial_test['Reward'])) + ret_std_list_test.append(np.std(trial_test['Reward'])) + i_q_delta_mean_list_test.append(np.mean(np.array(trial_test['i_q_ref']) - np.array(trial_test['i_q_mess']))) + i_q_delta_std_list_test.append(np.std(np.array(trial_test['i_q_ref']) - np.array(trial_test['i_q_mess']))) + i_d_delta_mean_list_test.append(np.mean(np.array(trial_test['i_d_ref']) - np.array(trial_test['i_d_mess']))) + i_d_delta_std_list_test.append(np.std(np.array(trial_test['i_d_ref']) - np.array(trial_test['i_d_mess']))) + +print(ret_mean_list_test) +print(ret_std_list_test) +asd = 1 +results = { + 'return_Mean': ret_mean_list_test, + 'return_Std': ret_std_list_test, + 'i_q_delta_Mean': i_q_delta_mean_list_test, + 'i_q_delta_Std': i_q_delta_std_list_test, + 'i_d_delta_Mean': i_d_delta_mean_list_test, + 'i_d_delta_Std': i_d_delta_std_list_test, + 'study_name': db_name} + +df = pd.DataFrame(results) +df.to_pickle(db_name + "mean_over_1250_agents.pkl") + +m = np.array(ret_mean_list_test) +s = np.array(ret_std_list_test) + +plt.plot(m) +plt.fill_between(m - s, m + s, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title(db_name) +plt.show() diff --git a/experiments/GEM/viz/mongoDB_plt_GEM.py b/experiments/GEM/viz/mongoDB_plt_GEM.py new file mode 100644 index 00000000..c72253dc --- /dev/null +++ b/experiments/GEM/viz/mongoDB_plt_GEM.py @@ -0,0 +1,138 @@ +import time + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from pymongo import MongoClient + +plt_train = True +plotly = False + +folder_name = 'saves/compare_I_noI_4/' + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +# db_name = 'DDPG_SplitActor_Best_study18_6462' +db_name = 'GEM_I_term_4' # 770 +db_name = 'GEM_no_I_term_4' # 1192 +trial = '0' +show_episode_number = 19 + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + # trial = db.Trial_number_770 + trial = db.Trial_number_1192 + + train_data = trial.find_one({"Name": "After_Training"}) + train_episode_data = trial.find_one({"Episode_number": show_episode_number}) + trial_config = trial.find_one({"Name": "Config"}) + trial_test = trial.find_one({"Name": "Test"}) + + train_reward_per_episode = train_data['Mean_eps_reward'] + + ax = plt.plot(train_reward_per_episode) # [::2]) + plt.grid() + plt.xlabel("Episodes") + plt.ylabel("Mean episode Reward") + plt.title(f"Test {db_name}") + plt.show() + + ts = 1e-4 # if ts stored: take from db + t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist() + + plt.plot(t_test, trial_test['i_d_mess']) + plt.plot(t_test, trial_test['i_d_ref'], 'r') + # plt.plot(t_test, trial_test['i_d_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_d") + plt.title(f"Test{db_name}") + plt.show() + + plt.plot(t_test, trial_test['i_q_mess']) + plt.plot(t_test, trial_test['i_q_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_q") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['v_d_mess'][:-1]) + plt.grid() + plt.xlabel("t") + plt.ylabel("v_d") + plt.title(f"Test{db_name}") + plt.show() + + plt.plot(t_test, trial_test['v_q_mess'][:-1]) + plt.grid() + plt.xlabel("t") + plt.ylabel("v_q") + plt.title(f"Test{db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_d']) + if len(trial_test['integrator_sum0']) > 0: + plt.plot(t_test, trial_test['integrator_sum0'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_d (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_q']) + if len(trial_test['integrator_sum1']) > 0: + plt.plot(t_test, trial_test['integrator_sum1'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_q (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['Reward']) + plt.grid() + plt.xlabel("t") + plt.ylabel("reward") + plt.title(f"Test {db_name}") + plt.show() + + ts = time.gmtime() + compare_result = {"Name": db_name, + "time": ts, + "i_q_mess": trial_test['i_q_mess'], + "i_q_ref": trial_test['i_q_ref'], + "i_d_mess": trial_test['i_d_mess'], + "i_d_ref": trial_test['i_d_ref'], + "v_d_mess": trial_test['v_d_mess'], + "v_q_mess": trial_test['v_q_mess'], + "Reward_test": trial_test['Reward'], + "train_reward_per_episode": train_reward_per_episode, + "info": "GEM results from testcase", + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/_{db_name}_trial770') + + if plotly: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=train_reward_per_episode)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() diff --git a/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py b/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py new file mode 100644 index 00000000..bae435c5 --- /dev/null +++ b/experiments/GEM/viz/mongoDB_plus_local_plt_GEM.py @@ -0,0 +1,251 @@ +import json + +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True +plotly = False +local = True + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +# db_name = 'DDPG_SplitActor_Best_study18_6462' +db_name = 'GEM_I_term_3' # 15 +# db_name = 'GEM_past' # 17 +trial = '0' +show_episode_number = 19 + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + trial = db.Trial_number_23 + + if local: + file_train = open('I_term_3_23/GEM_I_term_3_Trial_number_23_178.json', ) + train_data = json.load(file_train) + file_test = open('I_term_3_23/GEM_I_term_3_Trial_number_23_179.json', ) + trial_test = json.load(file_test) + else: + train_data = trial.find_one({"Name": "After_Training"}) + train_episode_data = trial.find_one({"Episode_number": show_episode_number}) + trial_test = trial.find_one({"Name": "Test"}) + trial_config = trial.find_one({"Name": "Config"}) + + train_reward_per_episode = train_data['Mean_eps_reward'] + + ax = plt.plot(train_reward_per_episode) # [::2]) + plt.grid() + plt.xlabel("Episodes") + plt.ylabel("Mean episode Reward") + plt.title(f"Test {db_name}") + plt.show() + + ts = 1e-4 # if ts stored: take from db + t_test = np.arange(0, len(trial_test['i_d_mess']) * ts, ts).tolist() + + plt.plot(t_test, trial_test['i_d_mess']) + plt.plot(t_test, trial_test['i_d_ref'], 'r') + plt.plot(t_test, trial_test['i_d_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_d") + plt.title(f"Test{db_name}") + plt.show() + + plt.plot(t_test, trial_test['i_q_mess']) + plt.plot(t_test, trial_test['i_q_ref'], 'r') + plt.grid() + plt.xlabel("t") + plt.ylabel("i_q") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_d']) + if len(trial_test['integrator_sum0']) > 0: + plt.plot(t_test, trial_test['integrator_sum0'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_d (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + plt.plot(t_test, trial_test['action_q']) + if len(trial_test['integrator_sum1']) > 0: + plt.plot(t_test, trial_test['integrator_sum1'], 'g') + plt.grid() + plt.xlabel("t") + plt.ylabel("action_q (integrator_sum-g)") + plt.title(f"Test {db_name}") + plt.show() + + if train_episode_data is not None: + # only available if loglevel == 'train' + ############################################################## + # Plot example Training Episode + R_load = train_episode_data['R_load_training'] + i_a = train_episode_data['i_a_training'] + i_b = train_episode_data['i_b_training'] + i_c = train_episode_data['i_c_training'] + v_a = train_episode_data['v_a_training'] + v_b = train_episode_data['v_b_training'] + v_c = train_episode_data['v_c_training'] + reward = train_episode_data['Rewards'] + phase = train_episode_data['Phase'] + + plt.plot(R_load) + plt.grid() + plt.xlabel("steps") + plt.ylabel("R_load") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(i_a) + plt.plot(i_b) + plt.plot(i_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("i_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(v_a) + plt.plot(v_b) + plt.plot(v_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(reward) + plt.grid() + plt.xlabel("steps") + plt.ylabel("Reward") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['Integrator0']) + plt.plot(train_episode_data['Integrator1']) + plt.plot(train_episode_data['Integrator2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("Int Zustand") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['actionP0']) + plt.plot(train_episode_data['actionP1']) + plt.plot(train_episode_data['actionP2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("actionP") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['actionI0']) + plt.plot(train_episode_data['actionI1']) + plt.plot(train_episode_data['actionI2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("actionI") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + df = pd.DataFrame() + df['R_load'] = R_load + + hist = df['R_load'].hist(bins=50) + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + """ + plot = px.Figure() + plot.add_trace( + px.Scatter(y=R_load) + """ + # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0']) + # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1']) + # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2']) + + # df2['phase'] = pd.DataFrame(test_data['Phase']) + + # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase'])) + + v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase)) + + # x = df2['t'] + v_d = v_mess_dq0[0][:] # df2['v_a'] + v_q = v_mess_dq0[1][:] # df2['v_b'] + v_0 = v_mess_dq0[2][:] # df2['v_c'] + + plt.plot(v_d) + plt.plot(v_q) + plt.plot(v_0) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_dq0") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:] + # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:] + # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:] + + plot = px.Figure() + plot.add_trace( + px.Scatter(y=v_a)) + + plot.add_trace( + px.Scatter(y=v_b)) + + plot.add_trace( + px.Scatter(y=v_c)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + ############################################################## + # After Training + + if plotly: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=train_reward_per_episode)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() diff --git a/experiments/GEM/viz/plt_learningCurve.py b/experiments/GEM/viz/plt_learningCurve.py new file mode 100644 index 00000000..6dbb6aa1 --- /dev/null +++ b/experiments/GEM/viz/plt_learningCurve.py @@ -0,0 +1,143 @@ +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + +save_results = True +folder_name = 'errorbar_plots/' + +# Plot setting +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 12, # fontsize for x and y labels (was 10) + 'axes.titlesize': 12, + 'font.size': 12, # was 10 + 'legend.fontsize': 12, # was 10 + 'xtick.labelsize': 12, + 'ytick.labelsize': 12, + 'text.usetex': True, + 'figure.figsize': [5.5, 3.7], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +SEC = pd.read_pickle('GEM_I_term_4_1250_agents_data_with_rewards.pkl') +DDPG = pd.read_pickle('GEM_no_I_term_4_1250_agents_data_with_rewards.pkl') + +m_sec = np.array(SEC['return_Mean']) +s_sec = np.array(SEC['return_Std']) + +idx_SEC_sort = np.argsort(m_sec) + +agents = np.arange(0, 1250) + +# take the best 50 and the worst 50 and and 450 random + +idxs = np.random.randint(low=50, high=1200, size=450) +m_sort = np.sort(m_sec) +m_sec_550 = np.concatenate([m_sort[0:50], m_sort[1200:1250], np.take(m_sort, idxs)]) + +m_ddgp = np.array(DDPG['return_Mean']) +s_ddgp = np.array(DDPG['return_Std']) + +idx_DDPG_sort = np.argsort(m_ddgp) + +# take the best 50 and the worst 50 and and 450 random +m_sort = np.sort(m_ddgp) +m_ddpg_550 = np.concatenate([m_sort[0:50], m_sort[1200:1250], np.take(m_sort, idxs)]) + +if save_results: + matplotlib.rcParams.update(params) + +fig = plt.figure() +plt.boxplot((m_sec_550, m_ddpg_550)) +plt.grid() +plt.ylim([-1, 0]) +plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$']) +plt.ylabel('$\overline{r}_{k,i}$') +# plt.ylabel('$\overline{\sum{r_k}}$') +plt.tick_params(direction='in') +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pgf') + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.png') + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pdf') + +##########################LearningCurve############### + +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 12, # fontsize for x and y labels (was 10) + 'axes.titlesize': 12, + 'font.size': 12, # was 10 + 'legend.fontsize': 12, # was 10 + 'xtick.labelsize': 12, + 'ytick.labelsize': 12, + 'text.usetex': True, + 'figure.figsize': [5.5, 3.7], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +matplotlib.rcParams.update(params) + +SEC_train_data = pd.read_pickle('GEM_I_term_4_1250_agents_train_data.pkl') +DDPG_train_data = pd.read_pickle('GEM_no_I_term_4_1250_agents_train_data.pkl') + +# sort df based on test return top down +# sort df by idx (return of test case from above) - not needed, just for doublecheck +df_sort_sec = SEC_train_data.iloc[:, idx_SEC_sort] +df_sort_ddpg = SEC_train_data.iloc[:, idx_SEC_sort] + +# get the best/worst idx2 out ouf sort_idx and snip the df to 550 based on that idx2 +idx2_ddpg = np.concatenate([idx_DDPG_sort[0:50], idx_DDPG_sort[idxs], idx_DDPG_sort[747:798]]) +ddpg550 = DDPG_train_data.iloc[:, idx2_ddpg] + +idx2_sec = np.concatenate([idx_SEC_sort[0:50], idx_SEC_sort[idxs], idx_SEC_sort[747:798]]) +sec550 = SEC_train_data.iloc[:, idx2_sec] + +DDPG_mean_learningCurve_550 = ddpg550.mean(axis=1) +DDPG_std_learningCurve_550 = ddpg550.std(axis=1) + +SEC_mean_learningCurve_550 = sec550.mean(axis=1) +SEC_std_learningCurve_550 = sec550.std(axis=1) + +low = (SEC_mean_learningCurve_550 - SEC_std_learningCurve_550).to_numpy() +up = (SEC_mean_learningCurve_550 + SEC_std_learningCurve_550).to_numpy() +SEC = SEC_mean_learningCurve_550.to_numpy() +DDPG = DDPG_mean_learningCurve_550.to_numpy() +episode = np.array([list(range(0, 177))]).squeeze() + +fig, ax = plt.subplots() +plt.fill_between(episode, up, low, facecolor='b', alpha=0.25) +plt.fill_between(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), + (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), facecolor='r', alpha=0.25) +plt.plot(episode, SEC, 'b', label='$\mathrm{SEC}$', linewidth=2) +plt.plot(episode, low, '--b', linewidth=0.5) +plt.plot(episode, up, '--b', linewidth=0.5) +plt.plot(episode, DDPG, 'r', label='$\mathrm{DDPG}$', linewidth=2) +plt.plot(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5) +plt.plot(episode, (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5) +plt.grid() +plt.tick_params(direction='in') +plt.legend() +plt.xlim([0, 176]) +# plt.set_xlim([0, 10]) +plt.ylabel('$\overline{r}_{k,i}$') +# plt.ylabel('$\overline{{r}}$') +plt.xlabel(r'$\mathrm{Episode}$') +plt.show() + +if save_results: + matplotlib.rcParams.update(params) + + fig.savefig(f'{folder_name}/GEM_learning_curve.pgf') + fig.savefig(f'{folder_name}/GEM_learning_curve.png') + fig.savefig(f'{folder_name}/GEM_learning_curve.pdf') diff --git a/experiments/GEM/viz/plt_pkl.py b/experiments/GEM/viz/plt_pkl.py new file mode 100644 index 00000000..5cb5d459 --- /dev/null +++ b/experiments/GEM/viz/plt_pkl.py @@ -0,0 +1,96 @@ +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + +save_results = False +folder_name = 'errorbar_plots/' + +# Plot setting +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 8, # fontsize for x and y labels (was 10) + 'axes.titlesize': 8, + 'font.size': 10, # was 10 + 'legend.fontsize': 10, # was 10 + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'text.usetex': True, + 'figure.figsize': [5.8, 3.8], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +I_term = pd.read_pickle('GEM_I_term_4mean_over_1250_agents.pkl') +no_I_term = pd.read_pickle('GEM_no_I_term_4mean_over_1250_agents.pkl') + +asd = 1 + +m = np.array(I_term['return_Mean']) +s = np.array(I_term['return_Std']) +agents = np.arange(0, 1250) + +plt.plot(agents, m) +plt.fill_between(agents, m - s, m + s, facecolor='r') +plt.ylabel('Average return ') +plt.xlabel('Agents') +plt.ylim([-0.6, 0.2]) +plt.grid() +plt.title('I_term') +plt.show() + +plt.plot(s) +# plt.fill_between( m - s, m + s, facecolor='r') +plt.ylabel('Average return sdt') +plt.xlabel('agents') +# plt.ylim([-0.4, 0]) +plt.grid() +plt.title('I_term') +plt.show() + +m_no_I = np.array(no_I_term['return_Mean']) +s_no_I = np.array(no_I_term['return_Std']) +agents = np.arange(0, 1250) + +plt.plot(agents, m_no_I) +plt.fill_between(agents, m_no_I - s_no_I, m_no_I + s_no_I, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Agents') +plt.ylim([-0.6, 0.2]) +plt.grid() +plt.title('no_I_term') +plt.show() + +plt.plot(s_no_I) +# plt.fill_between( m - s, m + s, facecolor='r') +plt.ylabel('Average return sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([-0.4, 0]) +plt.grid() +plt.title('no_I_term') +plt.show() + +if save_results: + matplotlib.rcParams.update(params) + +fig = plt.figure() +plt.boxplot((m_no_I, m)) +plt.grid() +plt.ylim([-1, 0]) +plt.xticks([1, 2], ['$\mathrm{DDPG}$', '$\mathrm{SEC}$']) +plt.ylabel('$\overline{\sum{r_k}}$') +plt.tick_params(direction='in') +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pgf') + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.png') + fig.savefig(f'{folder_name}/GEM_Errorbar_lim.pdf') + +plt.boxplot((m, m_no_I)) +plt.grid() +plt.xticks([1, 2], ['I', 'no-I']) +plt.show() diff --git a/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py b/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py new file mode 100644 index 00000000..59552a11 --- /dev/null +++ b/experiments/GEM/viz/saves/compare_I_noI_4/Qualitativ_comparison.py @@ -0,0 +1,140 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +make_pyplot = False +show_load = True +interval_plt = True + +# Fuer den Detzerministc case +interval_list_x = [[0.197, 0.206], [0.795, 0.81], [0.8025, 0.81], [0.849, 0.855]] +interval_list_x = [[0.699, 0.705], [0.499, 0.505]] # ,[0.8025, 0.81], [0.849, 0.855]] +interval_list_y_q = [[-0.9, 0.9], [-1.2, 0.7], [0.55, 0.725], [-0.2, 0.67]] +interval_list_y_d = [[-1.5, -0.1], [-1.2, 0.2], [-0.3, -0.1], [-1.1, -0.21]] +interval_list_y_q = [[-1, 1], [-1, 1]] # , [-1, 1], [-1, 1]] +interval_list_y_d = [[-1, 1], [-1, 1]] # , [-1, 1], [-1, 1]] + +file_names = ['_GEM_I_term_4_trial770', '_GEM_no_I_term_4_trial1192'] +ylabels = ['I', 'no-I'] + +reward_list_DDPG = [] + +# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12)) # , sharex=True) # a new figure window +fig, axs = plt.subplots(len(file_names) * 2 + 1, len(interval_list_y_q), + figsize=(14, 10)) # , sharex=True) # a new figure window + +for i in range(len(interval_list_y_q)): + plt_count = 1 + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + + for file_name, ylabel_use in zip(file_names, ylabels): + + df_DDPG = pd.read_pickle(file_name) + # df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427') + + ts = 1e-4 # if ts stored: take from db + t_test = np.arange(0, len(df_DDPG['i_d_mess'][0]) * ts, ts).tolist() + + Name = df_DDPG['Name'].tolist()[0] + reward = df_DDPG['Reward_test'].tolist()[0] + + if plt_count == 1: + axs[0, i].plot(t_test, reward, 'b', label=f' {Name}: ' + f'{round(sum(reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + else: + axs[0, i].plot(t_test, reward, 'r', label=f'{Name}: ' + f'{round(sum(reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[0, i].grid() + axs[0, i].set_xlim(interval_list_x[i]) + # axs[0, i].set_ylim(interval_list_y[i]) + axs[0, i].legend() + if 0 == 0: + axs[0, i].set_ylabel("Reward") + + axs[plt_count, i].plot(t_test, df_DDPG['i_q_mess'].tolist()[0], 'b', label='i_q') + axs[plt_count, i].plot(t_test, df_DDPG['i_q_ref'].tolist()[0], 'r', label='i_q_ref') + axs[plt_count, i].grid() + axs[plt_count, i].legend() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y_q[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel(Name) + # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + + axs[plt_count, i].plot(t_test, df_DDPG['i_d_mess'].tolist()[0], 'b', label='i_d') + axs[plt_count, i].plot(t_test, df_DDPG['i_d_ref'].tolist()[0], 'r', label='i_d_ref') + axs[plt_count, i].grid() + axs[plt_count, i].legend() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y_d[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel(Name) + # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + """ + axs[plt_count, i].plot(t_test, df_DDPG['v_d_mess'].tolist()[0][:-1], 'b', label='v_d') + axs[plt_count, i].plot(t_test, df_DDPG['v_q_mess'].tolist()[0][:-1], 'r', label='v_q') + axs[plt_count, i].grid() + axs[plt_count, i].legend() + axs[plt_count, i].set_xlim(interval_list_x[i]) + #axs[plt_count, i].set_ylim(interval_list_y_d[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel(Name) + # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + """ + +""" +fig.suptitle(f'Model using pastVals:' + str(pastVals) + ' \n ' + f'Model-return(MRE)' + str(return_list_DDPG) + ' \n' + f' PI-return(MRE): {round(return_PI, 7)} \n ' + f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', + fontsize=14) +""" +fig.subplots_adjust(wspace=0.4, hspace=0.4) +plt.show() + +fig.savefig(f'/Ausschnitt.pdf') + +""" +plt.plot(t_test, trial_test['i_d_mess']) +plt.plot(t_test, trial_test['i_d_ref'], 'r') +plt.plot(t_test, trial_test['i_d_ref'], 'r') +plt.grid() +plt.xlabel("t") +plt.ylabel("i_d") +plt.title(f"Test{db_name}") +plt.show() + +plt.plot(t_test, df_DDPG['i_q_mess'].tolist()) +plt.plot(t_test, df_DDPG['i_q_ref'].tolist(), 'r') +plt.grid() +plt.xlabel("t") +plt.ylabel("i_q") +plt.title(f"Test {Name}") +plt.show() + +plt.plot(t_test, trial_test['Reward']) +plt.grid() +plt.xlabel("t") +plt.ylabel("reward") +plt.title(f"Test {db_name}") +plt.show() +""" diff --git a/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py b/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py new file mode 100644 index 00000000..1c33d86b --- /dev/null +++ b/experiments/GEM/viz/saves/compare_I_noI_4/paper_lpt_single.py @@ -0,0 +1,78 @@ +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +save_results = False + +# Fuer den 10s Fall +interval_list_x = [0.498, 0.505] +interval_list_y = [80, 345] + +if save_results: + # Plot setting + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 10, # fontsize for x and y labels (was 10) + 'axes.titlesize': 10, + 'font.size': 10, # was 10 + 'legend.fontsize': 10, # was 10 + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'text.usetex': True, + 'figure.figsize': [4.5, 4.6], # [5.4, 6],#[3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + +folder_name = 'saves/' # _deterministic' + +df_DDPG = pd.read_pickle('_GEM_no_I_term_4_trial1192') +df_DDPG_I = pd.read_pickle('_GEM_I_term_4_trial770') + +ts = 1e-4 +t_test = np.arange(0, len(df_DDPG['i_d_mess'][0]) * ts, ts).tolist() + +# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y), +fig = plt.figure() + +iq_I = df_DDPG_I['i_q_mess'] + +fig, axs = plt.subplots(2, 1) +axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_q_mess'].tolist()[0]], 'r', label='$\mathrm{SEC}$') +axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG['i_q_mess'].tolist()[0]], '--r', label='$\mathrm{DDPG}_\mathrm{}$') +axs[1].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_q_ref'].tolist()[0]], ':', color='gray', + label='$\mathrm{i}_\mathrm{q}^*$') +axs[1].grid() +# axs[1].legend() +axs[1].set_xlim(interval_list_x) +axs[1].set_ylim([-0.5 * 160 * 1.41, 0.55 * 160 * 1.41]) +# axs[0].set_xlabel(r'$t\,/\,\mathrm{s}$') +axs[1].set_xlabel(r'$t\,/\,\mathrm{s}$') +axs[1].set_ylabel("$i_{\mathrm{q}}\,/\,{\mathrm{A}}$") +axs[1].tick_params(direction='in') + +axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_d_mess'].tolist()[0]], 'b', label='$\mathrm{SEC}_\mathrm{}$') +axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG['i_d_mess'].tolist()[0]], '--b', label='$\mathrm{DDPG}_\mathrm{}$') +axs[0].plot(t_test, [i * 160 * 1.41 for i in df_DDPG_I['i_d_ref'].tolist()[0]], ':', color='gray', + label='$i_\mathrm{}^*$') +axs[0].grid() +axs[0].legend() +axs[0].set_xlim(interval_list_x) +axs[0].set_ylim([-0.78 * 160 * 1.41, 0.05 * 160 * 1.41]) +axs[0].tick_params(axis='x', colors='w') +axs[0].set_ylabel("$i_{\mathrm{d}}\,/\,{\mathrm{A}}$") +axs[0].tick_params(direction='in') +fig.subplots_adjust(wspace=0, hspace=0.05) +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.pgf') + fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.png') + fig.savefig(f'{folder_name}/GEM_DDPG_I_noI_idq.pdf') diff --git a/experiments/P10/env/env_wrapper_P10.py b/experiments/P10/env/env_wrapper_P10.py new file mode 100644 index 00000000..e70569ca --- /dev/null +++ b/experiments/P10/env/env_wrapper_P10.py @@ -0,0 +1,688 @@ +import platform +from functools import partial +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.type_aliases import GymStepReturn +from stochastic.processes import VasicekProcess + +from experiments.P10.env.random_load_P10 import RandomLoad +from experiments.P10.env.vctrl_single_inv_P10 import net +from experiments.P10.util.config import cfg +from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0, Fastqueue, RandProcess + + +class BaseWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", gamma=0, + number_learing_steps=500000, number_past_vals=0): + """ + Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be + used in case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.used_P = np.zeros(self.action_space.shape) + self.gamma = gamma + self.number_learing_steps = number_learing_steps + self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action, self.env.net.components[0].phase) + else: + action_abc = action + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + + clipped_action = np.clip(action_abc, -1, 1) + + delta_action = clipped_action - action_abc + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase) + self.integrator_sum += action_delta * self.antiwindup_weight + + clip_reward = np.clip(np.sum(np.abs(delta_action) * \ + (-1 / (self.env.net.components[0].v_lim / self.env.net.components[ + 0].v_DC))) / 3 * (1 - self.gamma), + -1, 0) + + # clip_reward = 0 + + action_abc = clipped_action + + else: + clip_reward = 0 + + obs, reward, done, info = super().step(action_abc) + + reward = reward + clip_reward + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + super().render() + + self._n_training_steps += 1 + + # if self._n_training_steps % round(self.training_episode_length / 10) == 0: + # self.env.on_episode_reset_callback() + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + # todo efficiency? + self.used_P = np.copy(action) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + self._n_training_steps = 0 + self.used_P = np.zeros(self.action_space.shape) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + obs = np.append(obs, error) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000): # , use_past_vals=False, number_past_vals=0): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.reward_plus_addon_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + self.gamma = gamma + self.penalty_I_weight = penalty_I_weight + self.penalty_P_weight = penalty_P_weight + self.t_start_penalty_I = t_start_penalty_I + self.t_start_penalty_P = t_start_penalty_P + self.number_learing_steps = number_learing_steps + self.integrator_sum_list0 = [] + self.integrator_sum_list1 = [] + self.integrator_sum_list2 = [] + self.action_P0 = [] + self.action_P1 = [] + self.action_P2 = [] + self.action_I0 = [] + self.action_I1 = [] + self.action_I2 = [] + self.rew = [] + self.rew_sum = [] + self.penaltyP = [] + self.penaltyI = [] + self.clipped_rew = [] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + action_P = action[0:3] + action_I = action[3:6] + + self.integrator_sum += action_I * self.integrator_weight + + action_PI = action_P + self.integrator_sum + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase) + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + + clipped_action = np.clip(action_abc, -1, 1) + + delta_action = clipped_action - action_abc + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase) + self.integrator_sum += action_delta * self.antiwindup_weight + + clip_reward = np.clip(np.sum(np.abs(delta_action) * \ + (-1 / (self.env.net.components[0].v_lim / self.env.net.components[ + 0].v_DC / 2))) / 3, + -1, 0) * (1 - self.gamma) + + # clip_reward = 0 + + action_abc = clipped_action + + else: + clip_reward = 0 + + obs, reward, done, info = super().step(action_abc) + + # reward = reward + clip_reward shifted to reward sum + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + super().render() + + integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3 + # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3 + action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3 + + # reward_weight is = 1 + + if self.total_steps > self.t_start_penalty_I: + penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps) + + else: + penalty_I_weight_scale = 1 + + if self.total_steps > self.t_start_penalty_P: + penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps) + + else: + + penalty_P_weight_scale = 1 + + lam_P = self.penalty_P_weight * penalty_P_weight_scale + lam_I = self.penalty_I_weight * penalty_I_weight_scale + + if cfg['loglevel'] == 'debug_reward': + self.rew.append(reward) + self.penaltyP.append(lam_P * action_P_penalty) + self.penaltyI.append(lam_I * integrator_penalty) + self.clipped_rew.append(clip_reward) + + if reward > -1: + # if reward = -1, env is abort, worst reward = -1, if not, sum up components: + reward_sum = (reward + clip_reward + lam_I * integrator_penalty + lam_P * action_P_penalty) + + # normalize r_sum between [-1, 1] from [-1-lam_P-lam_I, 1] using min-max normalization from + # https://en.wikipedia.org/wiki/Feature_scaling + + reward = 2 * (reward_sum + 1 + lam_P + lam_I) / (1 + 1 + lam_P + lam_I) - 1 + + self._n_training_steps += 1 + + # if self._n_training_steps % round(self.training_episode_length / 10) == 0: + # self.env.on_episode_reset_callback() + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + self.integrator_sum_list0.append(self.integrator_sum[0]) + self.integrator_sum_list1.append(self.integrator_sum[1]) + self.integrator_sum_list2.append(self.integrator_sum[2]) + self.action_P0.append(np.float64(action_P[0])) + self.action_P1.append(np.float64(action_P[1])) + self.action_P2.append(np.float64(action_P[2])) + self.action_I0.append(np.float64(action_I[0])) + self.action_I1.append(np.float64(action_I[1])) + self.action_I2.append(np.float64(action_I[2])) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + # todo efficiency? + self.used_P = np.copy(action_P) + self.used_I = np.copy(self.integrator_sum) + # self.used_P = action_P + # self.used_I = self.integrator_sum + + self.rew_sum.append(reward) + + if done: + # log train curve with additional rewards: + self.reward_plus_addon_episode_mean.append(np.mean(self.rew_sum)) + # log train curve with raw env-reward: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + reward_Data = {'Reward_env': self.rew, + 'penaltyP': self.penaltyP, + 'penaltyI': self.penaltyI, + 'clipped_rew': self.clipped_rew, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + } + + self.recorder.save_to_json('Trial_number_' + self.n_trail, reward_Data) + + self.rew = [] + self.penaltyP = [] + self.penaltyI = [] + self.clipped_rew = [] + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + 'Integrator0': self.integrator_sum_list0, + 'Integrator1': self.integrator_sum_list1, + 'Integrator2': self.integrator_sum_list2, + 'actionP0': self.action_P0, + 'actionP1': self.action_P1, + 'actionP2': self.action_P2, + 'actionI0': self.action_I0, + 'actionI1': self.action_I1, + 'actionI2': self.action_I2 + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + # if self._n_training_steps > 500: + super().close() + # plt.plot(self.integrator_sum_list0) + # plt.plot(self.integrator_sum_list1) + # plt.plot(self.integrator_sum_list2) + # plt.ylabel('intergratorzustand') + # plt.show() + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + obs = super().reset() + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + return obs + + +class FeatureWrapper_pastVals(FeatureWrapper): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000, number_past_vals=10): + """ + Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations. + Initialized with zeros! + """ + super().__init__(env, number_of_features, training_episode_length, + recorder, n_trail, integrator_weight, antiwindup_weight, gamma, + penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P, + number_learing_steps) + + # self.observation_space = gym.spaces.Box( + # low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + # high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + obs, reward, done, info = super().step(action) + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array diff --git a/experiments/P10/env/random_load_P10.py b/experiments/P10/env/random_load_P10.py new file mode 100644 index 00000000..7300e489 --- /dev/null +++ b/experiments/P10/env/random_load_P10.py @@ -0,0 +1,122 @@ +import numpy as np +import pandas as pd + +from openmodelica_microgrid_gym.util import RandProcess + + +class RandomLoad: + def __init__(self, train_episode_length: int, ts: float, rand_process: RandProcess, loadstep_time: int = None, + load_curve: pd.DataFrame = None, bounds=None, bounds_std=None): + """ + + :param max_episode_steps: number of steps per training episode (can differ from env.max_episode_steps) + :param ts: sampletime of env + :param rand_pocess: Instance of random process defines noise added to load + :param loadstep_time: number of env step where load step should happen + :param load_curve: Stored load data to sample from instead of smaple from distribution + :param bounds: Bounds to clip the sampled load data + :param bounds_std: Chosen bounds are sampled from a distribution with std=bounds_std and mean=bounds + + """ + self.train_episode_length = train_episode_length + self.ts = ts + self.rand_process = rand_process + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.train_episode_length) + else: + self.loadstep_time = loadstep_time + self.load_curve = load_curve + if bounds is None: + self.bounds = (-np.inf, np.inf) + else: + self.bounds = bounds + if bounds_std is None: + self.bounds_std = (0, 0) + else: + self.bounds_std = bounds_std + + self.lowerbound_std = 0 + self.upperbound_std = 0 + + def reset(self, loadstep_time=None): + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.train_episode_length) + else: + self.loadstep_time = loadstep_time + + def clipped_step(self, t): + return np.clip(self.rand_process.sample(t), + self.bounds[0] + self.lowerbound_std, + self.bounds[1] + self.upperbound_std + ) + + def give_dataframe_value(self, t, col): + """ + Gives load values from a stored dataframe (self.load_curve) + :parma t: time - represents here the row of the dataframe + :param col: colon name of the dataframe (typically str) + """ + if t < 0: + # return None + return self.load_curve[col][0] + if self.load_curve is None: + raise ValueError('No dataframe given! Please feed load class (.load_curve) with data') + return self.load_curve[col][int(t / self.ts)] + + def random_load_step(self, t, event_prob: int = 2, step_prob: int = 50): + """ + Changes the load parameters applying a loadstep with 0.2% probability which is a pure step with 50 % + probability otherwise a drift. In every event the random process variance is drawn randomly [1, 150]. + :param t: time + :param event_prob: probability (in pre mill) that the step event is triggered in the current step + :param step_prob: probability (in pre cent) that event is a abrupt step (drift otherwise!, random process speed + not adjustable yet + :return: Sample from SP + """ + # Changes rand process data with probability of 5% and sets new value randomly + if np.random.randint(0, 1001) < 2: + + gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1]) + + self.rand_process.proc.mean = gain + self.rand_process.proc.vol = np.random.randint(1, 150) + self.rand_process.proc.speed = np.random.randint(10, 1200) + # define sdt for clipping once every event + # np.maximum to not allow negative values + self.lowerbound_std = np.maximum(np.random.normal(scale=self.bounds_std[0]), 0.0001) + self.upperbound_std = np.random.normal(scale=self.bounds_std[1]) + + # With 50% probability do a step or a drift + if np.random.randint(0, 101) < 50: + # step + self.rand_process.reserve = gain + + else: + # drift -> Lower speed to allow + self.rand_process.proc.speed = np.random.randint(10, 100) + + return np.clip(self.rand_process.sample(t), + self.bounds[0] + self.lowerbound_std, + self.bounds[1] + self.upperbound_std + ) + + def do_change(self, event_prob_permill=2, step_prob_percent=50): + if np.random.randint(0, 1001) < event_prob_permill: + + gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1]) + + self.rand_process.proc.mean = gain + self.rand_process.proc.vol = np.random.randint(1, 150) + self.rand_process.proc.speed = np.random.randint(10, 1200) + # define sdt for clipping once every event + self.lowerbound_std = np.random.normal(scale=self.bounds_std[0]) + self.upperbound_std = np.random.normal(scale=self.bounds_std[1]) + + # With 50% probability do a step or a drift + if np.random.randint(0, 101) < step_prob_percent: + # step + self.rand_process.reserve = gain + + else: + # drift -> Lower speed to allow + self.rand_process.proc.speed = np.random.randint(10, 100) diff --git a/experiments/P10/env/rewards_P10.py b/experiments/P10/env/rewards_P10.py new file mode 100644 index 00000000..bca03916 --- /dev/null +++ b/experiments/P10/env/rewards_P10.py @@ -0,0 +1,132 @@ +import numpy as np +from openmodelica_microgrid_gym.util import nested_map, abc_to_dq0, dq0_to_abc +from typing import List + + +class Reward: + def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region: float = 1.1, use_gamma_normalization=1, + error_exponent: float = 1.0, i_lim: float = np.inf, i_nom: float = np.inf, i_exponent: float = 1.0): + """ + + :param nom: Nominal value for the voltage + :param lim: Limit value for the voltage + :param v_DC: DC-Link voltage + :param gamma: Discount factor to map critic values -> [-1, 1] + :param use_gamma_normalization: if 0 normalization depending on gamma is not used + :param nom_region: Defines cliff in the reward landscape where the reward is pulled down because the nominal + value is exceeded. nom_region defines how much the nominal value can be exceeded before + the cliff (e.g. 1.1 -> cliff @ 1.1*self.nom + :param error_exponent: defines the used error-function: E.g.: 1 -> Mean absolute error + 2 -> Mean squared error + 0.5 -> Mean root error + :param i_lim: Limit value for the current + :param i_nom: Nominal value for the current + """ + self._idx = None + self.nom = nom + self.lim = lim + self.v_DC = v_DC + self.use_gamma_normalization = use_gamma_normalization + if self.use_gamma_normalization == 1: + self.gamma = gamma + else: + self.gamma = 0 + self.det_run = det_run + self.nom_region = nom_region + self.exponent = error_exponent + self.i_lim = i_lim + self.i_nom = i_nom + self.i_exponent = i_exponent + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'], + [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012'], + 'inverter1.phase.0']) + + def rew_fun_dq0(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above + + If v_lim is exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + phase = data[idx[4]] + + vdq0_master = abc_to_dq0(data[idx[2]], phase) # 3 phase currents at LC inductors + + # set points (sp) + vsp_dq0_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + + if any(np.abs(data[idx[2]]) > 1): + if self.det_run: + return -(1 - self.gamma) + else: + return + else: + # rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 # /2 + + # / 2 da normiert auf v_lim = 1, max abweichung 2*vlim=2, damit worst case bei 0 + rew = np.sum(1 - ((np.abs(vsp_dq0_master - vdq0_master) / 2) ** self.exponent)) * (1 - self.gamma) / 3 # /2 + """ + rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 # /2 + """ + return rew + + def rew_fun_PIPI_MRE(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above but also includes reward depending on the current + If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage + Before r_voltage is scaled to the region [0,1]: + - r_voltage = (r_voltage+1)/2 + - r = r_voltage * r_current + - r = r-1 + + If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + i_mess = data[idx[0]] # 3 phase currents at LC inductors + mess = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + SP = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + + # rew = np.sum(-((np.abs(SP - mess)) ** 0.5)) * (1 - self.gamma) / 3 + + phase = data[idx[4]] + + vdq0_master = abc_to_dq0(data[idx[2]], phase) / self.lim # 3 phase currents at LC inductors + + # set points (sp) + vsp_dq0_master = abc_to_dq0(data[idx[3]], + phase) / self.lim # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + + # rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 + + rew = np.sum(1 - ((np.abs(vsp_dq0_master - vdq0_master) / 2) ** self.exponent)) * (1 - self.gamma) / 3 + + return rew diff --git a/experiments/P10/env/vctrl_single_inv_P10.py b/experiments/P10/env/vctrl_single_inv_P10.py new file mode 100644 index 00000000..96c36ffc --- /dev/null +++ b/experiments/P10/env/vctrl_single_inv_P10.py @@ -0,0 +1,297 @@ +from datetime import datetime, time +from functools import partial +from itertools import accumulate +from os import makedirs + +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from stochastic.processes import VasicekProcess + +from experiments.P10.env.random_load_P10 import RandomLoad + +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network + +from openmodelica_microgrid_gym.util import RandProcess +from gym.envs.registration import register +from experiments.P10.util.config import cfg + +folder_name = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' +# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED' +experiment_name = 'plots' +timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X') + +makedirs(folder_name, exist_ok=True) +# makedirs(folder_name + experiment_name, exist_ok=True) + + +# Simulation definitions +if not cfg['is_dq0']: + # load net using abc reference values + print('abc not implemented yet! dq0 is used!!!!') + net = Network.load('net/net_p10.yaml') +else: + # load net using dq0 reference values + + net = Network.load('net/net_p10.yaml') + +# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP +max_episode_steps = 1500000 # net.max_episode_steps # number of simulation steps per episode + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC + +# plant + +print('Using P10 setting') +L_filter = 70e-6 # / H +R_filter = 1.1e-3 # / Ohm +R_filter_C = 7e-3 # / Ohm +C_filter = 250e-6 # / F +lower_bound_load_clip = 1 # to allow maximal load that draws i_limit (let exceed?) +lower_bound_load_clip_std = 1 + +""" +old TB: +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +# R = 40 # nomVoltPeak / 7.5 # / Ohm +#lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +#lower_bound_load_clip_std = 2 +""" + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +upper_bound_load_clip = 200 # to apply symmetrical load bounds +upper_bound_load_clip_std = 0 +R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +# if save needed in dependence of trial ( -> foldername) shift to executive file? +def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + plt.close() + + +def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + # ts = time.gmtime() + # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +# toDo: train_episode_length not needed in used methods, reset not as well? +rand_load_train = RandomLoad(5000, net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + +cb = CallbackList() +# set initial = None to reset load random in range of bounds +cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) +cb.append(rand_load_train.reset) + +register(id='vctrl_single_inv_train-v0', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': R_filter_C, + 'lc.resistor5.R': R_filter_C, + 'lc.resistor6.R': R_filter_C, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +register(id='vctrl_single_inv_test-v0', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=20000, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'), + # 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'), + # 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R') + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +register(id='vctrl_single_inv_test-v1', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=100001, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': R_filter_C, + 'lc.resistor5.R': R_filter_C, + 'lc.resistor6.R': R_filter_C, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + # 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) diff --git a/experiments/P10/experiment_vctrl_single_inv_P10.py b/experiments/P10/experiment_vctrl_single_inv_P10.py new file mode 100644 index 00000000..b4aad907 --- /dev/null +++ b/experiments/P10/experiment_vctrl_single_inv_P10.py @@ -0,0 +1,353 @@ +import platform +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper +from experiments.P10.env.rewards_P10 import Reward +from experiments.P10.env.vctrl_single_inv_P10 import net # , folder_name +from experiments.P10.util.config import cfg +from experiments.P10.util.recorder_P10 import Recorder + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, + number_past_vals=0): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + pc2_log_path = cfg['pc2_logpath'] + log_path = f'{pc2_log_path}/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.P10.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'] + ) + + if cfg['env_wrapper'] == 'past': + env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + + elif cfg['env_wrapper'] == 'no-I-term': + env = BaseWrapper(env, number_of_features=6 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) # , use_past_vals=True, number_past_vals=30) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path, + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + # learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(train_freq, train_freq_type), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + count = 0 + for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {#"Name": "After_Training", + "Mean_eps_env_reward_raw": env.reward_episode_mean, + "Mean_eps_reward_sum": env.reward_plus_addon_episode_mean, + #"Trial number": n_trail, + #"Database name": folder_name, + #"Sum_eps_reward": env.get_episode_rewards() + } + #mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + mongo_recorder.save_local_to_pkl('traing_rewards_Trial_number_' + n_trail, train_data, n_trail) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + + env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'] + ) + + if cfg['env_wrapper'] == 'past': + env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3, + integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, number_past_vals=number_past_vals, + training_episode_length=training_episode_length, ) + + elif cfg['env_wrapper'] == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=6 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, + training_episode_length=training_episode_length, ) # , use_past_vals=True, number_past_vals=30) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + va = [] + vb = [] + vc = [] + v_ref0 = [] + v_ref1 = [] + v_ref2 = [] + ia = [] + ib = [] + ic = [] + R_load = [] + + for step in range(env_test.max_episode_steps): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + if cfg['loglevel'] in ['train', 'test']: + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + if cfg['env_wrapper'] not in ['no-I-term']: + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test:# and env_test.rew[-1]: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + + if limit_exceeded_in_test: + # if limit was exceeded once, reward will be kept to -1 till the end of the episode, + # nevertheless what the agent does + rewards = -1 + + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + + if step % 1000 == 0 and step != 0: + # if step % cfg['train_episode_length'] == 0 and step != 0: + if cfg['loglevel'] in ['train', 'test']: + va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist()) + vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist()) + vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist()) + v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist()) + v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist()) + v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist()) + ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist()) + ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist()) + ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist()) + R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist()) + + env_test.close() + obs = env_test.reset() + if cfg['loglevel'] in ['train', 'test']: + phase_list.append(env_test.env.net.components[0].phase) + + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + """ + test_after_training_config = {"Name": "Test_Reward", + "time": ts, + #"Reward": rew_list, + "Return": (return_sum / env_test.max_episode_steps), + "Trial number": n_trail, + "Database name": folder_name, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime())} + """ + + reward_test_after_training = {"Reward": rew_list} + + if cfg['loglevel'] in ['train', 'test', 'setting']: + #mongo_recorder.save_to_json('Test_setting_Trial_number_' + n_trail, test_after_training_config, n_trail) + mongo_recorder.save_local_to_pkl('Test_reward_Trial_number_' + n_trail, reward_test_after_training, n_trail) + + if cfg['loglevel'] in ['train', 'test']: + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "lc_capacitor1_v": va, + "lc_capacitor2_v": vb, + "lc_capacitor3_v": vc, + "inverter1_v_ref_0": v_ref0, + "inverter1_v_ref_1": v_ref1, + "inverter1_v_ref_2": v_ref2, + "lc_inductor1_i": ia, + "lc_inductor2_i": ib, + "lc_inductor3_i": ic, + "r_load_resistor1_R": R_load, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + """ + In new testenv not used, because then only the last episode is stored + """ + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training, n_trail) + + return (return_sum / env_test.max_episode_steps) diff --git a/experiments/P10/hp_tune_ddpg_objective_P10.py b/experiments/P10/hp_tune_ddpg_objective_P10.py new file mode 100644 index 00000000..25c920b4 --- /dev/null +++ b/experiments/P10/hp_tune_ddpg_objective_P10.py @@ -0,0 +1,444 @@ +import json +import os +import time + +import sqlalchemy +from optuna.samplers import TPESampler + +os.environ['PGOPTIONS'] = '-c statement_timeout=1000' + +import optuna +import platform +import argparse +import sshtunnel +import numpy as np +# np.random.seed(0) +from experiments.P10.util.config import cfg + +from experiments.P10.experiment_vctrl_single_inv_P10 import mongo_recorder, experiment_fit_DDPG +from experiments.hp_tune.util.scheduler import linear_schedule + +model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/' + +PC2_LOCAL_PORT2PSQL = 11999 +SERVER_LOCAL_PORT2PSQL = 6432 +DB_NAME = 'optuna' +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 +STUDY_NAME = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' + +node = platform.uname().node + + +def ddpg_objective_fix_params(trial): + file_congfig = open(model_path + + 'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', ) + trial_config = json.load(file_congfig) + + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = trial_config["penalty_I_weight"] # trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = trial_config["penalty_P_weight"] # trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = trial_config[ + "penalty_I_decay_start"] # trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = trial_config[ + "penalty_P_decay_start"] # trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = trial_config["integrator_weight"] # trial.suggest_float("integrator_weight", 1 / 200, 2) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = trial_config["antiwindup_weight"] # trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial_config["learning_rate"] # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1) # 0.0002# + + lr_decay_start = trial_config[ + "lr_decay_start"] # trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial_config["lr_decay_duration"] # trial.suggest_float("lr_decay_duration", 0.00001, + # 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial_config["final_lr"] # trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial_config["gamma"] # trial.suggest_float("gamma", 0.5, 0.9999) + weight_scale = trial_config["weight_scale"] # trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + bias_scale = trial_config["bias_scale"] # trial.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 + alpha_relu_actor = trial_config[ + "alpha_relu_actor"] # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 + alpha_relu_critic = trial_config[ + "alpha_relu_critic"] # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + + batch_size = trial_config["batch_size"] # trial.suggest_int("batch_size", 16, 1024) # 128 + buffer_size = trial_config[ + "buffer_size"] # trial.suggest_int("buffer_size", int(1e4), number_learning_steps) # 128 + + actor_hidden_size = trial_config[ + "actor_hidden_size"] # trial.suggest_int("actor_hidden_size", 10, 200) # 100 # Using LeakyReLU + actor_number_layers = trial_config["actor_number_layers"] # trial.suggest_int("actor_number_layers", 1, 4) + + critic_hidden_size = trial_config["critic_hidden_size"] # trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial_config["critic_number_layers"] # trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial_config["noise_var"] # trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial_config["noise_theta"] # trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial_config[ + "training_episode_length"] # trial.suggest_int("training_episode_length", 500, 5000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial_config["tau"] # trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial_config["train_freq"] # trial.suggest_int("train_freq", 1, 15000) + + optimizer = trial_config[ + "optimizer"] # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + number_past_vals = 5 # trial.suggest_int("number_past_vals", 0, 15) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Info": "AltesTestcase setting mit Integrator-Actor; 50 runs mit bestem HP-setting", + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + + +def ddpg_objective(trial): + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 100) # Using LeakyReLU + actor_number_layers = trial.suggest_int("actor_number_layers", 1, 4) + alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5) + alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5) + antiwindup_weight = trial.suggest_float("antiwindup_weight", 1e-4, 1) + batch_size = trial.suggest_int("batch_size", 16, 1024) + bias_scale = trial.suggest_loguniform("bias_scale", 5e-5, 0.2) + buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps) # 128 + critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300) + critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4) + error_exponent = 0.5 # 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + final_lr = trial.suggest_float("final_lr", 0.00001, 1) + gamma = trial.suggest_float("gamma", 0.6, 0.99999) + integrator_weight = trial.suggest_float("integrator_weight", 1e-4, 0.5) + learning_rate = trial.suggest_loguniform("learning_rate", 1e-8, 1e-2) + lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1) + lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001, 1) + n_trail = str(trial.number) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial.suggest_loguniform("noise_theta", 1, 100) # 25 # stiffness of OU + noise_var = trial.suggest_loguniform("noise_var", 0.001, 1) # 2 + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + number_past_vals = trial.suggest_int("number_past_vals", 0, 20) + optimizer = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + penalty_I_weight = trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + tau = trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000) # 128 + train_freq = trial.suggest_int("train_freq", 1, 5000) + use_gamma_in_rew = 1 + weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + """ + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Optimierer/ Setting stuff": "DDPG HPO mit Integrator und pastVals (HP) " + "P10 setting thrid try" + "Reward in test is kept to -1 if limit exceeded once" + } + + trail_config_mongo.update(trial.params) + mongo_recorder.save_to_json('Config_Trial_number_' + n_trail, trail_config_mongo, n_trail) + #mongo_recorder.save_local_to_pkl('Trial_number_' + n_trail, trail_config_mongo) + """ + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + + +def get_storage(url, storage_kws): + successfull = False + retry_counter = 0 + + while not successfull: + try: + storage = optuna.storages.RDBStorage( + url=url, **storage_kws) + successfull = True + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e: + wait_time = np.random.randint(60, 300) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Could not connect, retry in {wait_time} s') + time.sleep(wait_time) + + return storage + + +def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + if node in ('LEA-WORK35', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2MYSQL + else: + port = SERVER_LOCAL_PORT2MYSQL + + storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}') + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2MYSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2MYSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + study = optuna.create_study( + storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}", + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}", + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 100 + + print(n_trials) + print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in cfg['lea_vpn_nodes']: + optuna_path = './optuna/' + else: + # assume we are on not of pc2 -> store to project folder + optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/' + + os.makedirs(optuna_path, exist_ok=True) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f'sqlite:///{optuna_path}optuna.sqlite', + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_psql' + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + # set trial to failed if it seems dead for 20 minutes + storage_kws = dict(engine_kwargs={"pool_timeout": 600}) + if node in ('lea-cyberdyne', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2PSQL + else: + port = SERVER_LOCAL_PORT2PSQL + + storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2PSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2PSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + storage = get_storage(url=f'postgresql://{optuna_creds}' + f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws) + + # storage = optuna.storages.RDBStorage( + # url=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + # **storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +if __name__ == "__main__": + TPE_sampler = TPESampler(n_startup_trials=1000) # , constant_liar=True) + + optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler) + # optuna_optimize_sqlite(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler) diff --git a/experiments/P10/pc2_schedule_ddpg_P10.py b/experiments/P10/pc2_schedule_ddpg_P10.py new file mode 100644 index 00000000..355d708c --- /dev/null +++ b/experiments/P10/pc2_schedule_ddpg_P10.py @@ -0,0 +1,105 @@ +"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass +the allowed cpu core limit""" + +import os +import pathlib +import uuid +import time + +import optuna +from optuna.samplers import TPESampler + +from experiments.hp_tune.util import pc2 +from experiments.P10.util.config import cfg + +# config +USER = os.getenv('USER') +ALLOWED_MAX_CPU_CORES = 300 # 512 +STUDY_NAME = cfg['STUDY_NAME'] +DB_NAME = 'optuna' +# resources request +job_resource_plan = { + 'duration': 24, # in hours + 'ncpus': 2, + 'memory': 12, + 'vmemory': 16, +} + +MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus'] + +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 + + +def main(): + started_workers = 0 + print('Start slavedriving loop..') + old_ccsinfo_counts = None + while True: + + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study( + storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}', + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME, + load_if_exists=True, + direction='maximize') + + complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]) + print(f'Completed trials in study: {complete_trials}') + if complete_trials > 12000: + print('Maximal completed trials reached - STOPPING') + break + + job_files_path = pathlib.Path( + f"/scratch/hpc-prf-reinfl/weber/P10/ccs_job_files/{STUDY_NAME}") # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet + job_files_path.mkdir(parents=False, exist_ok=True) + + # read ccsinfo + ccsinfo = pc2.get_ccsinfo(USER) + ccsinfo_state_counts = ccsinfo.state.value_counts() + ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0) + ccs_planned = ccsinfo_state_counts.get('PLANNED', 0) + total_busy = ccs_running + ccs_planned + if not ccsinfo_state_counts.equals(old_ccsinfo_counts): + print("\n## ccs summary ##") + print(f"Running: {ccs_running}") + print(f"Planned : {ccs_planned}") + print(f"Total busy workers (ccs): {total_busy}") + + if total_busy < MAX_WORKERS: + # call workers to work + n_workers = MAX_WORKERS - total_busy + print(f'Start {n_workers} workers:') + for w in range(n_workers): + started_workers += 1 + jobid = str(uuid.uuid4()).split('-')[0] + cluster = "oculus" + job_name = job_files_path / f"pc2_job_{jobid}.sh" + res_plan = pc2.calculate_resources(**job_resource_plan) + + execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \ + "python $HOME/openmodelica-microgrid-gym/experiments/P10/hp_tune_ddpg_objective_P10.py -n 1" + + print(f'Start job {jobid} ..') + pc2.create_n_run_script( + job_name, + pc2.build_shell_script_lines(job_files_path, cluster, + job_name, res_plan, + execution_line), + dry=False) + print('sleep 10s for better DB interaction', end='\r') + time.sleep(10) + + old_ccsinfo_counts = ccsinfo_state_counts + + print('sleep..', end='\r') + time.sleep(300) + + +if __name__ == '__main__': + main() diff --git a/experiments/P10/retrain_agent.py b/experiments/P10/retrain_agent.py new file mode 100644 index 00000000..878e93b7 --- /dev/null +++ b/experiments/P10/retrain_agent.py @@ -0,0 +1,212 @@ +print('Start script') +import gym +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper +from experiments.P10.env.rewards_P10 import Reward +from experiments.P10.env.vctrl_single_inv_P10 import net # , folder_name +from experiments.P10.util.config import cfg +from experiments.P10.util.recorder_P10 import Recorder +import pandas as pd + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +show_plots = True +save_results = False +# 2128 ->0; 3125 -> -1, 956-> best +trial = '956' + +number_learning_steps = 200000 + +folder_name = 'experiments/P10/retrain/' +os.makedirs(folder_name, exist_ok=True) +model_path = 'experiments/P10/viz/data/' +node = platform.uname().node +model_name = 'model_'+trial+'.zip' + +################DDPG Config Stuff######################################################################### +print('Using model_'+trial+' setting') +if trial == '956': + actor_number_layers = 2 + alpha_relu_actor = 0.0225049 + alpha_relu_critic = 0.00861825 + antiwindup_weight = 0.350646 + critic_number_layers = 4 + error_exponent = 0.5 + gamma = 0.794337 + integrator_weight = 0.214138 + use_gamma_in_rew = 1 + n_trail = 50001 + number_past_vals = 18 + training_episode_length = 2577 + penalty_I_weight = 1.46321 + penalty_P_weight = 0.662572 + t_start_penalty_I = 100000 + t_start_penalty_P = 100000 + + + +mongo_recorder = Recorder(node=node, database_name=folder_name) + +current_directory = os.getcwd() + +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom, det_run=True) + +####################################DDPG Stuff############################################## + +rew.gamma = 0 +# episodes will not abort, if limit is exceeded reward = -1 +rew.det_run = True +rew.exponent = 0.5 # 1 + +env = gym.make('experiments.P10.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + +env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + +env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + +# model2 = DDPG.load(model_path + f'model.zip') # , env=env_test) +print('Before load') + +model = DDPG.load(model_path + f'{model_name}', env=env) + +print('After load') + +count = 0 +for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + +count = 0 + +for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + +env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + +################################################################################### +# retrain! +print('Train ' +str(number_learning_steps) + ' steps') +model.learn(total_timesteps=number_learning_steps) +print('Finished ' +str(number_learning_steps) + 'training steps') +model.save(folder_name + f'model' + trial + '_retrained.zip') + +# Log Train-info data +train_data = { # "Name": "After_Training", + "Mean_eps_env_reward_raw": env.reward_episode_mean, + "Mean_eps_reward_sum": env.reward_plus_addon_episode_mean, +} +df = pd.DataFrame(train_data) +df.to_pickle(f'{folder_name}/' + 'trainRewards_model' + trial + '_retrained' + ".pkl.bz2") + +""" +####### Run Test ######### +return_sum = 0.0 +rew.gamma = 0 +# episodes will not abort, if limit is exceeded reward = -1 +rew.det_run = True +rew.exponent = 0.5 # 1 +limit_exceeded_in_test = False + +env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + +env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3, + integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, number_past_vals=number_past_vals, + training_episode_length=training_episode_length, ) + +obs = env_test.reset() +rew_list = [] + +for step in range(env_test.max_episode_steps): + + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + + if rewards == -1 and not limit_exceeded_in_test: # and env_test.rew[-1]: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + + if limit_exceeded_in_test: + # if limit was exceeded once, reward will be kept to -1 till the end of the episode, + # nevertheless what the agent does + rewards = -1 + + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + + if step % 1000 == 0 and step != 0: + env_test.close() + obs = env_test.reset() + + if done: + env_test.close() + break + +ts = time.gmtime() + +reward_test_after_training = {"Reward": rew_list} + +df = pd.DataFrame(reward_test_after_training) +df.to_pickle(f'{folder_name}/' + 'model' + trial + '_retrained' + ".pkl.bz2") + +print((return_sum / env_test.max_episode_steps)) +""" diff --git a/experiments/P10/util/config.py b/experiments/P10/util/config.py new file mode 100644 index 00000000..db49a173 --- /dev/null +++ b/experiments/P10/util/config.py @@ -0,0 +1,18 @@ +cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay', + 'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35', 'webbah-ThinkPad-T14-Gen-2a'], + STUDY_NAME='P10_SEC_R_load', + meas_data_folder='experiment_data/', + MONGODB_PORT=12001, + loglevel='setting', # setting ~ config + return/learning curve (most is stored anyway, only effects in + # test saving stuff + # test ~ setting + test-results (measurements) + # train ~ test + training measurements + is_dq0=True, + + # train_episode_length=2881, # defines when in training the env is reset e.g. for exploring starts, + + # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals + # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator + env_wrapper='past', + pc2_logpath='/scratch/hpc-prf-reinfl/weber/P10' + ) diff --git a/experiments/P10/util/recorder_P10.py b/experiments/P10/util/recorder_P10.py new file mode 100644 index 00000000..550e79b3 --- /dev/null +++ b/experiments/P10/util/recorder_P10.py @@ -0,0 +1,78 @@ +import json +from os import makedirs +import pandas as pd + +import sshtunnel +from pymongo import MongoClient + +from experiments.P10.util.config import cfg + +MONGODB_PORT = cfg['MONGODB_PORT'] # 12001 + + +class Recorder: + + def __init__(self, node, database_name): + """ + Class to record measured data to mongo database using pymongo + Depending on the node we are operating at it connects via ssh to + - in lea_vpn: to cyberdyne port 12001 + - else: assume pc2 node -> connect to frontend + and stores data to mongoDB at port MONGODB_PORT ( =12001). + HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001 + is needed (assuming Mongod-Process running on cyberdyne + :params node: platform.uname().node + :params database_name: string for the database name to store data in + """ + self.node = node + self.save_count = 0 + + if self.node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + #self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + self.save_folder = cfg['pc2_logpath'] + '/' + cfg['meas_data_folder'] + + self.database_name = database_name + makedirs(self.save_folder, exist_ok=True) + # pathlib.Path(self.save_folder.mkdir(exist_ok=True)) + + def save_to_mongodb(self, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[self.database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def save_to_json(self, col: str = ' trails', data=None, n_trail=999999): + """ + Stores data to json file in specified directory. From there the data can be grept by another process + and can be stored to a DB via ssh + To distinguish the files of one trail a save_count is incremented and added to the filename + """ + + with open(self.save_folder + str(n_trail) + '_' + self.database_name + '_' + col + '_' + str(self.save_count) + '.json', + 'w') as outfile: + json.dump(data, outfile) + + self.save_count += 1 + + def save_local_to_pkl(self, col: str = ' trails', data=None, n_trail=999999): + """ + Stores data locally to comp. pkl + """ + df = pd.DataFrame(data) + df.to_pickle(self.save_folder + str(n_trail) + '_' + self.database_name + '_' + col + '_' + str(self.save_count) + ".pkl.bz2") diff --git a/experiments/P10/util/reporter_P10.py b/experiments/P10/util/reporter_P10.py new file mode 100644 index 00000000..ebd59f81 --- /dev/null +++ b/experiments/P10/util/reporter_P10.py @@ -0,0 +1,149 @@ +import json +import os +import platform +import re +import time + +import numpy as np + +import sshtunnel +from pymongo import MongoClient +from experiments.P10.util.config import cfg + + +class Reporter: + + def __init__(self): + """ + Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB + on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT + """ + + MONGODB_PORT = cfg['MONGODB_PORT'] + + node = platform.uname().node + + if node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = './' + cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + + def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def oldest_file_in_tree(self, extension=".json"): + """ + Returns the oldest file-path string + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"): + """ + Returns the oldest file-path string + + :param count_number_to_find: List of count_numbers to find and store instead of storing all + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(str(count_number_to_find) + extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def json_to_mongo_via_sshtunnel(self, file_name_to_store=None): + + if not len(os.listdir(self.save_folder)) == 0: + + if file_name_to_store is None: + try: + oldest_file_path = self.oldest_file_in_tree() + except(ValueError) as e: + print('Folder seems empty or no matching data found!') + print(f'ValueError{e}') + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + return + else: + oldest_file_path = file_name_to_store + + with open(oldest_file_path) as json_file: + data = json.load(json_file) + + successfull = False + retry_counter = 0 + + while not successfull: + try: + now = time.time() + if os.stat(oldest_file_path).st_mtime < now - 60: + self.save_to_mongodb(database_name=data['Database name'], + col='Trial_number_' + data['Trial number'], data=data) + print('Reporter: Data stored successfully to MongoDB and will be removed locally!') + os.remove(oldest_file_path) + successfull = True + except (sshtunnel.BaseSSHTunnelForwarderError) as e: + wait_time = np.random.randint(1, 60) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s') + time.sleep(wait_time) + + else: + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + + +if __name__ == "__main__": + + reporter = Reporter() + print("Starting Reporter for logging from local savefolder to mongoDB") + + file_ending_number = [178, 179] + + print(f"Searching for files in directory with number ending on {file_ending_number}") + + # print(reporter.oldest_file_in_tree()) + while True: + reporter.json_to_mongo_via_sshtunnel() + + """ + # to send only files ending with number file_ending_number + for number in file_ending_number: + try: + oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number) + print(oldest_named_file_path) + + reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path) + + except(ValueError) as e: + print(f'No file with number {number} ending') + print(f'ValueError{e}') + print('Go to sleep for 5 seconds and go on with next number!') + time.sleep(5) + """ diff --git a/experiments/P10/viz/Com_models_pc2_P10.py b/experiments/P10/viz/Com_models_pc2_P10.py new file mode 100644 index 00000000..3f6ad320 --- /dev/null +++ b/experiments/P10/viz/Com_models_pc2_P10.py @@ -0,0 +1,723 @@ +print('Start script') +import gym +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.P10.env.env_wrapper_P10 import FeatureWrapper, FeatureWrapper_pastVals, BaseWrapper +from experiments.P10.env.rewards_P10 import Reward +from experiments.P10.env.vctrl_single_inv_P10 import net # , folder_name +from experiments.P10.util.config import cfg +from experiments.P10.util.recorder_P10 import Recorder + +# imports for PIPI +from experiments.P10.env.random_load_P10 import RandomLoad +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +show_plots = True +save_results = False +# 2128 ->0; 3125 -> -1, 956-> best +trial = '956' + +folder_name = 'saves/P10_SEC_R_load' +os.makedirs(folder_name, exist_ok=True) +wrapper = ['past'] +model_path = 'experiments/P10/viz/data/' +model_path = 'experiments/P10/retrain/' +node = platform.uname().node +model_name = ['model_'+trial+'.zip'] +model_name = ['model'+trial+'_retrained'] + +################DDPG Config Stuff######################################################################### +print('Using model_'+trial+' setting') +if trial == '956': + actor_number_layers = 2 + alpha_relu_actor = 0.0225049 + alpha_relu_critic = 0.00861825 + antiwindup_weight = 0.350646 + critic_number_layers = 4 + error_exponent = 0.5 + gamma = 0.794337 + integrator_weight = 0.214138 + use_gamma_in_rew = 1 + n_trail = 50001 + number_past_vals = [18] + +if trial == '3125': + actor_number_layers = 1 + alpha_relu_actor = 0.305758 + alpha_relu_critic = 0.0119687 + antiwindup_weight = 0.767766 + critic_number_layers = 4 + error_exponent = 0.5 + gamma = 0.922121 + integrator_weight = 0.237488 + use_gamma_in_rew = 1 + n_trail = 50001 + number_past_vals = [2] + +if trial == '2128': + actor_number_layers = 1 + alpha_relu_actor = 0.334101 + alpha_relu_critic = 0.0729528 + antiwindup_weight = 0.648373 + critic_number_layers = 4 + error_exponent = 0.5 + gamma = 0.798319 + integrator_weight = 0.122662 + use_gamma_in_rew = 1 + n_trail = 50001 + number_past_vals = [7] + +mongo_recorder = Recorder(node=node, database_name=folder_name) + +num_average = 1 +max_episode_steps_list = [1000] + +data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl' +#data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl' +# data_str = 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl' + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +#################PI Config stuff############################################################################## +current_directory = os.getcwd() + +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_p10.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +# max_episode_steps = 1002 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC + +print("P10 stuff!") +L_filter = 70e-6 # / H +R_filter = 1.1e-3 # / Ohm +C_filter = 250e-6 # / F + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 1 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 1 +upper_bound_load_clip_std = 0 +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +#From MATLAB: +################################################################################# +#Layout using mangtude optimum for inverter with LC-filter using L = 7e-05 H, C = 0.00025 F, R = 0.001 Ohm. +#Current controller: +#Kp = 0.0009773 A/V and Ki = 0.13159 A/(Vs) +#Voltage controller: +#Kp = 0.45052 V/A and Ki = 305.6655 V/(As) +# P10: +print('using p10 setting') +kp_v = 0.45052 +ki_v = 305.6655 +kp_c = 0.0009773 +ki_c = 0.13159 + +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-i_lim * 10, i_lim * 10)) + +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +"""""" +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom, det_run=True) + + ####################################PI Stuff################################################ + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl')) + # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl')) + data_str)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + + # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': R_filter, + 'lc.resistor5.R': R_filter, + 'lc.resistor6.R': R_filter, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + # on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + rew.gamma = 0 + return_sum_PI = 0.0 + rew_list_PI = [] + v_d_PI = [] + v_q_PI = [] + v_0_PI = [] + R_load_PI = [] + limit_exceeded_in_test_PI = False + limit_exceeded_penalty_PI = 0 + """ + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + agent_fig = None + obs_PI = env.reset() + + for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + agent.observe(None, False) + act_PI = agent.act(obs_PI) + obs_PI, r_PI, done_PI, info_PI = env.step(act_PI) + rew_list_PI.append(r_PI) + env.render() + return_sum_PI += r_PI + if r_PI == -1 and not limit_exceeded_in_test_PI: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test_PI = True + limit_exceeded_penalty_PI = -1 + + # _, env_fig = env.close() + agent.observe(r_PI, done_PI) + + v_a_PI = env.history.df['lc.capacitor1.v'] + v_b_PI = env.history.df['lc.capacitor2.v'] + v_c_PI = env.history.df['lc.capacitor3.v'] + i_a_PI = env.history.df['lc.inductor1.i'] + i_b_PI = env.history.df['lc.inductor2.i'] + i_c_PI = env.history.df['lc.inductor3.i'] + R_load_PI = (env.history.df['r_load.resistor1.R'].tolist()) + phase_PI = env.history.df['inverter1.phase.0'] # env.net.components[0].phase + + i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) + + i_d_PI = i_dq0_PI[0].tolist() + i_q_PI = i_dq0_PI[1].tolist() + i_0_PI = i_dq0_PI[2].tolist() + v_d_PI = (v_dq0_PI[0].tolist()) + v_q_PI = (v_dq0_PI[1].tolist()) + v_0_PI = (v_dq0_PI[2].tolist()) + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "time": ts, + "PI_Kp_c": kp_c, + "PI_Ki_c": ki_c, + "PI_Kp_v": kp_v, + "PI_Ki_v": ki_v, + "DDPG_model_path": model_path, + "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI), + "Reward PI": rew_list_PI, + "env_hist_PI": env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "PI result for comparison with RL agent", + "optimization node": 'Thinkpad', + "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2' + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps') + """ + ####################################DDPG Stuff############################################## + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + + #net = Network.load('net/net_vctrl_single_inv_dq0.yaml') # is used from vctrl_single_env, not needed here + + for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals): + + if wrapper_mode == 'i_load': + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2' # ], + , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': R_filter, + 'lc.resistor5.R': R_filter, + 'lc.resistor6.R': R_filter, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + else: + env_test = gym.make('experiments.P10.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': R_filter, + 'lc.resistor5.R': R_filter, + 'lc.resistor6.R': R_filter, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + + if wrapper_mode in ['past', 'i_load']: + env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3, + # training_episode_length=training_episode_length, (da aus pickle!) + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + number_past_vals=used_number_past_vales) + + + elif wrapper_mode == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_past_vals=used_number_past_vales) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, + recorder=mongo_recorder, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, + penalty_P_weight=0) # , use_past_vals=True, number_past_vals=30) + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + # model2 = DDPG.load(model_path + f'model.zip') # , env=env_test) + print('Before load') + + model = DDPG.load(model_path + f'{used_model}', env=env_test) + + print('After load') + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + return_sum = 0.0 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + rew_list = [] + v_d = [] + v_q = [] + v_0 = [] + action_P0 = [] + action_P1 = [] + action_P2 = [] + action_I0 = [] + action_I1 = [] + action_I2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + R_load = [] + + ####### Run Test ######### + # agent ~ PI Controllerv using env + # model ~ RL Controller using env_test + # Both run in the same loop + + obs = env_test.reset() + + for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + action, _states = model.predict(obs, deterministic=True) + if step == 988: + asd = 1 + obs, rewards, done, info = env_test.step(action) + action_P0.append(np.float64(action[0])) + action_P1.append(np.float64(action[1])) + action_P2.append(np.float64(action[2])) + if wrapper_mode not in ['no-I-term', 'I-controller']: + action_I0.append(np.float64(action[3])) + action_I1.append(np.float64(action[4])) + action_I2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + # print(rewards) + if done: + env_test.close() + + # print(limit_exceeded_in_test) + break + + env_test.close() + + v_a = env_test.history.df['lc.capacitor1.v'] + v_b = env_test.history.df['lc.capacitor2.v'] + v_c = env_test.history.df['lc.capacitor3.v'] + i_a = env_test.history.df['lc.inductor1.i'] + i_b = env_test.history.df['lc.inductor2.i'] + i_c = env_test.history.df['lc.inductor3.i'] + R_load = (env_test.history.df['r_load.resistor1.R'].tolist()) + phase = env_test.history.df['inverter1.phase.0'] # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + + i_d = i_dq0[0].tolist() + i_q = i_dq0[1].tolist() + i_0 = i_dq0[2].tolist() + v_d = (v_dq0[0].tolist()) + v_q = (v_dq0[1].tolist()) + v_0 = (v_dq0[2].tolist()) + + + plt.plot(v_d) + plt.show() + plt.plot(v_q) + plt.show() + plt.plot(v_0) + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}') + # print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}') + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "model name": model_name, + "Wrapper": wrapper, + "used_number_past_vales": used_number_past_vales, + "time": ts, + "ActionP0": action_P0, + "ActionP1": action_P1, + "ActionP2": action_P2, + "ActionI0": action_I0, + "ActionI1": action_I1, + "ActionI2": action_I2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "DDPG_model_path": model_path, + "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "Reward DDPG": rew_list, + "env_hist_DDPG": env_test.env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "execution of RL agent on 10 s test case-loading values", + "optimization node": 'Thinkpad', + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps') + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +# df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 diff --git a/experiments/P10/viz/CompareModelsPlotting_P10.py b/experiments/P10/viz/CompareModelsPlotting_P10.py new file mode 100644 index 00000000..e5488d37 --- /dev/null +++ b/experiments/P10/viz/CompareModelsPlotting_P10.py @@ -0,0 +1,234 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +make_pyplot = False +show_load = True +interval_plt = True + +# interval_list_x = [[0, 0.01], [0.01, 1.0], [0.78, 0.9]] +# interval_list_y = [[-25, 210], [-40, 210], [165, 175]] + + +# Fuer den Detzerministc case +interval_list_x = [[0, 0.01], [0.105, 0.2], [0.695, 0.71], [0.85, 0.88]] +interval_list_y = [[-25, 400], [-25, 400], [-25, 400], [-25, 400]] + +# Fuer den 10s Fall +# interval_list_x = [[0, 0.02], [2.09, 2.1], [2.11, 2.12], [7.08, 7.16], [7.145, 7.16]] +# interval_list_y = [[-50, 400], [-25, 340], [160, 190], [-25, 340], [125, 340]] + +run = '204 Return: -2 ' +# run = '374 Return: 0 ' +# run = '213 Return: 0.8 ' +run = '1080 Return: 0.9 ' + +folder_names = ['saves/P10_SEC_R_load'] # _deterministic' + + +number_of_steps = '_1000steps' +""" +df = pd.read_pickle('saves/P10_I_term_2/204_deterministic/PI' + number_of_steps) + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +reward_PI = df['Reward PI'][0] +return_PI = df['Return PI'][0] +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] +""" +model_names = ['model.zip'] +ylabels = ['DDPG-I+pastVals'] + +pastVals = ['16'] +return_list_DDPG = [] +reward_list_DDPG = [] + +ts = 1e-4 # if ts stored: take from db + +# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist() + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() +t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist() + +# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12)) # , sharex=True) # a new figure window +fig, axs = plt.subplots(len(model_names) + 3, len(interval_list_y), + figsize=(12, 10)) # , sharex=True) # a new figure window + +for i in range(len(interval_list_y)): + plt_count = 3 + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + + for model_name, pV, folder_name, ylabel_use in zip(model_names, pastVals, folder_names, ylabels): + + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps) + # df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427') + + if i == 0: + return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7)) + # reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + + axs[0, i].plot(t_test, R_load_PI) + axs[0, i].grid() + axs[0, i].set_xlim(interval_list_x[i]) + # axs[0, i].set_ylim([15, 75]) + if i == 0: + axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$") + # ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + + DDPG_reward = df_DDPG['Reward DDPG'][0] + if plt_count == 3: + axs[1, i].plot(t_reward, reward_PI, 'b', label=f' PI: ' + f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[1, i].plot(t_reward, DDPG_reward, 'r', label=f'DDPG: ' + f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[1, i].grid() + axs[1, i].set_xlim(interval_list_x[i]) + # axs[1, i].set_ylim(interval_list_y[i]) + axs[1, i].legend() + if i == 0: + axs[1, i].set_ylabel("Reward") + + axs[2, i].plot(t_test, v_d_PI, 'b', label='v_d') + axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q') + axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0') + axs[2, i].grid() + axs[2, i].set_xlim(interval_list_x[i]) + axs[2, i].set_ylim(interval_list_y[i]) + if i == 0: + axs[2, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$") + # else: + # axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$") + + axs[plt_count, i].plot(t_test, v_d_DDPG, 'b') + axs[plt_count, i].plot(t_test, v_q_DDPG, 'r') + axs[plt_count, i].plot(t_test, v_0_DDPG, 'g') + axs[plt_count, i].grid() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel(ylabel_use) + # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + +fig.suptitle(run) + +fig.subplots_adjust(wspace=0.2, hspace=0.2) +plt.show() + +fig.savefig(f'{folder_name}/Ausschnitt_q0' + number_of_steps + '.pdf') + +if make_pyplot: + # pyplot Load + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=R_load_PI)) # , title='R_load') + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + # pyplot PI + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_reward, y=DDPG_reward)) + plot.add_trace( + px.Scatter(x=t_reward, y=reward_PI)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + for model_name in model_names: + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + # pyplot ddpg + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_DDPG)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + plot.add_trace( + px.Scatter(x=t_test, y=v_d_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_PI)) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + +plt.plot(t_test, v_d_DDPG, 'b') +plt.plot(t_test, v_q_DDPG, 'r') +plt.plot(t_test, v_0_DDPG, 'g') +# plt.plot(t_test, v_d_PI, 'r') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0.1, 0.11]) +# plt.ylim([290, 360]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG") +plt.title(f'DDPG' + run) +plt.show() + +plt.plot(t_test, v_d_PI, 'b') +plt.plot(t_test, v_q_PI, 'r') +plt.plot(t_test, v_q_PI, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0.1, 0.2]) +# plt.ylim([290, 360]) +plt.xlabel("time") +plt.ylabel("v_dq0_PI") +plt.title(f'PI') +plt.show() diff --git a/experiments/hp_tune/agents/my_ddpg.py b/experiments/hp_tune/agents/my_ddpg.py new file mode 100644 index 00000000..8998afc7 --- /dev/null +++ b/experiments/hp_tune/agents/my_ddpg.py @@ -0,0 +1,25 @@ +from stable_baselines3 import DDPG + +from experiments.hp_tune.agents.my_td3 import myTD3 + + +class myDDPG(DDPG, myTD3): + """ + Deep Deterministic Policy Gradient (DDPG) based on pytorch version from stable_baseline3. + + Additionally makes more training data accessible for logging e.g. in database + + Hint: for model.train() the train algorithm form myTD3 is used because python3 does NOT use depth-first search + (in that case DDPG->TD3.train() would have been taken) + See https://www.python-kurs.eu/python3_mehrfachvererbung.php "diamond_problem" + """ + + def __init__(self, *args, **kwargs): + super(myDDPG, self).__init__(*args, **kwargs) + # training variables for logging + self.critic_loss_batch_mean = [] # mean of critic losses of the batch + self.critic_estimate_target_diff_mean = [] # sum(Q_estimat - target)/N_batch_size + self.actor_loss_batch_mean = [] # mean of critic losses of the batch + self.current_q_estimates_batch_mean = [] # Q(s,a) (mean of the batch!) + self.target_q_batch_mean = [] # yi = r + gamma*Q_target(s',µ_target(s')) (mean of the batch!) + self.reward_batch_mean = [] # mean of the batch reward used in training diff --git a/experiments/hp_tune/agents/my_off_policy_algorithm.py b/experiments/hp_tune/agents/my_off_policy_algorithm.py new file mode 100644 index 00000000..95500236 --- /dev/null +++ b/experiments/hp_tune/agents/my_off_policy_algorithm.py @@ -0,0 +1,137 @@ +from typing import Optional +import numpy as np + +from stable_baselines3.common.buffers import ReplayBuffer +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.noise import ActionNoise +from stable_baselines3.common.off_policy_algorithm import OffPolicyAlgorithm + +from stable_baselines3.common.type_aliases import RolloutReturn +from stable_baselines3.common.vec_env import VecEnv + + +class myOffPolicyAlgorithm(OffPolicyAlgorithm): + + def collect_rollouts( + self, + env: VecEnv, + callback: BaseCallback, + n_episodes: int = 1, + n_steps: int = -1, + action_noise: Optional[ActionNoise] = None, + learning_starts: int = 0, + replay_buffer: Optional[ReplayBuffer] = None, + log_interval: Optional[int] = None, + ) -> RolloutReturn: + """ + Collect experiences and store them into a ReplayBuffer. + + :param env: The training environment + :param callback: Callback that will be called at each step + (and at the beginning and end of the rollout) + :param n_episodes: Number of episodes to use to collect rollout data + You can also specify a ``n_steps`` instead + :param n_steps: Number of steps to use to collect rollout data + You can also specify a ``n_episodes`` instead. + :param action_noise: Action noise that will be used for exploration + Required for deterministic policy (e.g. TD3). This can also be used + in addition to the stochastic policy for SAC. + :param learning_starts: Number of steps before learning for the warm-up phase. + :param replay_buffer: + :param log_interval: Log data every ``log_interval`` episodes + :return: + """ + episode_rewards, total_timesteps = [], [] + total_steps, total_episodes = 0, 0 + + assert isinstance(env, VecEnv), "You must pass a VecEnv" + assert env.num_envs == 1, "OffPolicyAlgorithm only support single environment" + + if self.use_sde: + self.actor.reset_noise() + + callback.on_rollout_start() + continue_training = True + + while total_steps < n_steps or total_episodes < n_episodes: + done = False + episode_reward, episode_timesteps = 0.0, 0 + + while not done: + + if self.use_sde and self.sde_sample_freq > 0 and total_steps % self.sde_sample_freq == 0: + # Sample a new noise matrix + self.actor.reset_noise() + + # Select action randomly or according to policy + action, buffer_action = self._sample_action(learning_starts, action_noise) + + # Rescale and perform action + new_obs, reward, done, infos = env.step(action) + + self.num_timesteps += 1 + episode_timesteps += 1 + total_steps += 1 + + # Give access to local variables + callback.update_locals(locals()) + # Only stop training if return value is False, not when it is None. + if callback.on_step() is False: + return RolloutReturn(0.0, total_steps, total_episodes, continue_training=False) + + episode_reward += reward + + # Retrieve reward and episode length if using Monitor wrapper + self._update_info_buffer(infos, done) + + # Store data in replay buffer + if replay_buffer is not None: + # Store only the unnormalized version + if self._vec_normalize_env is not None: + new_obs_ = self._vec_normalize_env.get_original_obs() + reward_ = self._vec_normalize_env.get_original_reward() + else: + # Avoid changing the original ones + self._last_original_obs, new_obs_, reward_ = self._last_obs, new_obs, reward + + if not infos[0]['timelimit_reached']: + replay_buffer.add(self._last_original_obs, new_obs_, buffer_action, reward_, done) + + if infos[0]['timelimit_reached']: + # self._last_original_obs = None + self._last_obs = self.env.reset() + else: + self._last_obs = new_obs + # Save the unnormalized observation + if self._vec_normalize_env is not None: + self._last_original_obs = new_obs_ + + self._update_current_progress_remaining(self.num_timesteps, self._total_timesteps) + + # For DQN, check if the target network should be updated + # and update the exploration schedule + # For SAC/TD3, the update is done as the same time as the gradient update + # see https://github.com/hill-a/stable-baselines/issues/900 + self._on_step() + + if 0 < n_steps <= total_steps: + break + + if done: + total_episodes += 1 + self._episode_num += 1 + episode_rewards.append(episode_reward) + total_timesteps.append(episode_timesteps) + + if action_noise is not None: + action_noise.reset() + + # Log training infos + if log_interval is not None and self._episode_num % log_interval == 0: + self._dump_logs() + + mean_reward = np.mean(episode_rewards) if total_episodes > 0 else 0.0 + + callback.on_rollout_end() + + return RolloutReturn(mean_reward, total_steps, total_episodes, continue_training) diff --git a/experiments/hp_tune/agents/my_td3.py b/experiments/hp_tune/agents/my_td3.py new file mode 100644 index 00000000..cc74b8e2 --- /dev/null +++ b/experiments/hp_tune/agents/my_td3.py @@ -0,0 +1,153 @@ +from stable_baselines3 import TD3 + +import numpy as np +import torch as th +from torch.nn import functional as F +from stable_baselines3.common import logger +from stable_baselines3.common.utils import polyak_update + +from experiments.hp_tune.agents.my_off_policy_algorithm import myOffPolicyAlgorithm + + +# class myTD3(TD3, myOffPolicyAlgorithm): # so, falls timelimit_reached verwendet werden soll +class myTD3(TD3): + + def train(self, gradient_steps: int, batch_size: int = 100) -> None: + + # Update learning rate according to lr schedule + self._update_learning_rate([self.actor.optimizer, self.critic.optimizer]) + + actor_losses, critic_losses = [], [] + + for gradient_step in range(gradient_steps): + + self._n_updates += 1 + # Sample replay buffer + replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env) + + with th.no_grad(): + # Select action according to policy and add clipped noise + noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise) + noise = noise.clamp(-self.target_noise_clip, self.target_noise_clip) + next_actions = (self.actor_target(replay_data.next_observations) + noise).clamp(-1, 1) + + # Compute the next Q-values: min over all critics targets + next_q_values = th.cat(self.critic_target(replay_data.next_observations, next_actions), dim=1) + next_q_values, _ = th.min(next_q_values, dim=1, keepdim=True) + target_q_values = replay_data.rewards + (1 - replay_data.dones) * self.gamma * next_q_values + + # Get current Q-values estimates for each critic network + current_q_values = self.critic(replay_data.observations, replay_data.actions) + + # Compute critic loss + critic_loss = sum([F.mse_loss(current_q, target_q_values) for current_q in current_q_values]) + critic_losses.append(critic_loss.item()) + + # store data for logging - use mean from batch + self.critic_loss_batch_mean.append(critic_loss.item()) + self.critic_estimate_target_diff_mean.append( + (sum(current_q_values[0] - target_q_values) / target_q_values.shape[0]).item()) + self.current_q_estimates_batch_mean.append(current_q_values[0].mean().item()) + self.target_q_batch_mean.append(np.mean(target_q_values.mean().item())) + self.reward_batch_mean.append(np.mean(replay_data.rewards.mean().item())) + + # Optimize the critics + self.critic.optimizer.zero_grad() + critic_loss.backward() + self.critic.optimizer.step() + + # Delayed policy updates + if self._n_updates % self.policy_delay == 0: + # Compute actor loss + actor_loss = -self.critic.q1_forward(replay_data.observations, + self.actor(replay_data.observations)).mean() + actor_losses.append(actor_loss.item()) + + # Optimize the actor + self.actor.optimizer.zero_grad() + actor_loss.backward() + self.actor.optimizer.step() + + polyak_update(self.critic.parameters(), self.critic_target.parameters(), self.tau) + polyak_update(self.actor.parameters(), self.actor_target.parameters(), self.tau) + + # store data for logging - use mean from batch + self.actor_loss_batch_mean.append(np.mean(actor_losses)) + + logger.record("train/n_updates", self._n_updates, exclude="tensorboard") + if len(actor_losses) > 0: + logger.record("train/actor_loss", np.mean(actor_losses)) + logger.record("train/critic_loss", np.mean(critic_losses)) + + """ + def train(self, gradient_steps: int, batch_size: int = 100) -> None: + + # Update learning rate according to lr schedule + self._update_learning_rate([self.actor.optimizer, self.critic.optimizer]) + + actor_losses, critic_losses = [], [] + + for gradient_step in range(gradient_steps): + + # Sample replay buffer + replay_data = self.replay_buffer.sample(batch_size, env=self._vec_normalize_env) + + with th.no_grad(): + # Select action according to policy and add clipped noise + noise = replay_data.actions.clone().data.normal_(0, self.target_policy_noise) + noise = noise.clamp(-self.target_noise_clip, self.target_noise_clip) + next_actions = (self.actor_target(replay_data.next_observations) + noise).clamp(-1, 1) + + # Compute the target Q value: min over all critics targets + targets = th.cat(self.critic_target(replay_data.next_observations, next_actions), dim=1) + target_q, _ = th.min(targets, dim=1, keepdim=True) + # toDo: Fusch am Bau + # if timelimit -> reset: use target_q! + # if done = True caused by abort -> do not use target_q + target_q = replay_data.rewards + (1 - replay_data.dones) * self.gamma * target_q + + # Get current Q estimates for each critic network + current_q_estimates = self.critic(replay_data.observations, replay_data.actions) + + # Compute critic loss + critic_loss = sum([F.mse_loss(current_q, target_q) for current_q in current_q_estimates]) + critic_losses.append(critic_loss.item()) + + # store data for logging - use mean from batch + self.critic_loss_batch_mean.append(critic_loss.item()) + self.critic_estimate_target_diff_mean.append( + (sum(current_q_estimates[0] - target_q) / target_q.shape[0]).item()) + self.current_q_estimates_batch_mean.append(current_q_estimates[0].mean().item()) + self.target_q_batch_mean.append(np.mean(target_q.mean().item())) + self.reward_batch_mean.append(np.mean(replay_data.rewards.mean().item())) + + # Optimize the critics + self.critic.optimizer.zero_grad() + critic_loss.backward() + self.critic.optimizer.step() + + # Delayed policy updates + if gradient_step % self.policy_delay == 0: + # Compute actor loss + actor_loss = -self.critic.q1_forward(replay_data.observations, + self.actor(replay_data.observations)).mean() + actor_losses.append(actor_loss.item()) + + # Optimize the actor + self.actor.optimizer.zero_grad() + actor_loss.backward() + self.actor.optimizer.step() + + polyak_update(self.critic.parameters(), self.critic_target.parameters(), self.tau) + polyak_update(self.actor.parameters(), self.actor_target.parameters(), self.tau) + + # store data for logging - use mean from batch + self.actor_loss_batch_mean.append(np.mean(actor_losses)) + + self._n_updates += gradient_steps + # print('new Training function!') + + logger.record("train/n_updates", self._n_updates, exclude="tensorboard") + logger.record("train/actor_loss", np.mean(actor_losses)) + logger.record("train/critic_loss", np.mean(critic_losses)) + """ diff --git a/experiments/hp_tune/comparison_PI_ddpg.py b/experiments/hp_tune/comparison_PI_ddpg.py new file mode 100644 index 00000000..55d935ba --- /dev/null +++ b/experiments/hp_tune/comparison_PI_ddpg.py @@ -0,0 +1,726 @@ +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# imports for PIPI +from experiments.hp_tune.env.random_load import RandomLoad +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +import gym + +# np.random.seed(0) + +show_plots = False +balanced_load = False +save_results = False + +folder_name = 'Comparison_PI_DDPG_retrain_oneLoadstep' # cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + +num_average = 1 +max_episode_steps_list = [10000] # [1000, 5000, 10000, 20000, 50000, 100000] + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +#################PI Config stuff############################################################################## + +current_directory = os.getcwd() +# folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +# max_episode_steps = 1002 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +v_DC = 600 # DC-link voltage / V; will be set as model parameter in the FMU +nomFreq = 60 # nominal grid frequency / Hz +nomVoltPeak = 169.7 # 230 * 1.414 # nominal grid voltage / V +iLimit = 16 # inverter current limit / A +iNominal = 12 # nominal inverter current / A +vNominal = 190 # nominal inverter current / A +vLimit = vNominal * 1.5 # inverter current limit / A +funnelFactor = 0.02 +vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor]) +mu = 400 # factor for barrier function (see below) +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale +# for both of them +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times +# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as +# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) +# parameter set +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop +# expanding points eventually. +# The following variable is multiplied with the first performance of the initial set by the factor below: +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# kp_v = 0.002 +# ki_v = 143 +kp_v = 0 # 0.0095 # 0.0 +ki_v = 182 # 173.22 # 200 +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-iLimit, iLimit)) + +# kp_c = 0.033 +# ki_c = 17.4 # 11.8 + +kp_c = 0.0308 # 0.0404 # 0.04 +ki_c = 13.3584 # 4.065 # 11.8 +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values + +# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the +# filter and the nominal frequency +# Droop controller used to calculate the virtual frequency drop due to load changes +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) + +# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the +# filter and the nominal voltage +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +# Define a voltage forming inverter using the PIPI and droop parameters from above + +# Controller with observer +# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param, +# observer=[Lueneberger(*params) for params in +# repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample, +# name='master') + +# Controller without observer +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +##################################### +# Definition of the optimization agent +# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example +# Arguments described above +# History is used to store results +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 + +################DDPG Config Stuff######################################################################### +gamma = 0.946218 +integrator_weight = 0.311135 +antiwindup_weight = 0.660818 +model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/' +error_exponent = 0.5 +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 4 +alpha_relu_actor = 0.208098 +alpha_relu_critic = 0.00678497 + +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ####################################PI Stuff################################################ + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + + # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'), + # color=[['b', 'r', 'g'], ['b', 'r', 'g']], + # style=[['-*'], ['--*']] + # ), + # PlotTmpl([[f'master.m{i}' for i in 'dq0']], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$', + # filename='Sim_m_dq0') + # ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + # log_level=logging.INFO, + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.random_load_step, + 'r_load.resistor3.R': rand_load_train.random_load_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + # on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + return_sum_PI = 0.0 + rew_list_PI = [] + v_d_PI = [] + v_q_PI = [] + v_0_PI = [] + R_load_PI = [] + limit_exceeded_in_test_PI = False + limit_exceeded_penalty_PI = 0 + + ####################################DDPG Stuff############################################## + return_sum = 0.0 + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.random_load_step, + 'r_load.resistor3.R': rand_load_train.random_load_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + on_episode_reset_callback=cb.fire + ) + + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + # model = DDPG.load(model_path + f'model.zip') # , env=env_test) + model = DDPG.load(model_path + f'model_retrained.zip') # , env=env_test) + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + rew_list = [] + v_d = [] + v_q = [] + v_0 = [] + R_load = [] + + ####### Run Test ######### + # agent ~ PI Controllerv using env + # model ~ RL Controller using env_test + # Both run in the same loop + + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + agent_fig = None + + obs = env_test.reset() + obs_PI = env.reset() + + for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + agent.observe(None, False) + act_PI = agent.act(obs_PI) + obs_PI, r_PI, done_PI, info_PI = env.step(act_PI) + rew_list_PI.append(r_PI) + env.render() + return_sum_PI += r_PI + if r_PI == -1 and not limit_exceeded_in_test_PI: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test_PI = True + limit_exceeded_penalty_PI = -1 + + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + """ + v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1] + v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1] + v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1] + R_load.append(env_test.history.df['r_load.resistor1.R'].iloc[-1]) + + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase) + + v_d.append(v_dq0[0]) + v_q.append(v_dq0[1]) + v_0.append(v_dq0[2]) + + v_a_PI = env.history.df['lc.capacitor1.v'].iloc[-1] + v_b_PI = env.history.df['lc.capacitor2.v'].iloc[-1] + v_c_PI = env.history.df['lc.capacitor3.v'].iloc[-1] + R_load_PI.append(env.history.df['r_load.resistor1.R'].iloc[-1]) + + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), env.net.components[0].phase) + + v_d_PI.append(v_dq0_PI[0]) + v_q_PI.append(v_dq0_PI[1]) + v_0_PI.append(v_dq0_PI[2]) + """ + + if step % 10000 == 0 and step != 0: + print("10%") + """ + env_test.close() + obs = env_test.reset() + + env.close() + agent.reset() + obs_PI = env.reset() + """ + + # print(rewards) + if done: + env_test.close() + + # print(limit_exceeded_in_test) + break + v_a = env_test.history.df['lc.capacitor1.v'] + v_b = env_test.history.df['lc.capacitor2.v'] + v_c = env_test.history.df['lc.capacitor3.v'] + i_a = env_test.history.df['lc.inductor1.i'] + i_b = env_test.history.df['lc.inductor2.i'] + i_c = env_test.history.df['lc.inductor3.i'] + R_load = (env_test.history.df['r_load.resistor1.R'].tolist()) + phase = env_test.history.df['inverter1.phase.0'] # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + + i_d = i_dq0[0].tolist() + i_q = i_dq0[1].tolist() + i_0 = i_dq0[2].tolist() + v_d = (v_dq0[0].tolist()) + v_q = (v_dq0[1].tolist()) + v_0 = (v_dq0[2].tolist()) + + v_a_PI = env.history.df['lc.capacitor1.v'] + v_b_PI = env.history.df['lc.capacitor2.v'] + v_c_PI = env.history.df['lc.capacitor3.v'] + i_a_PI = env.history.df['lc.inductor1.i'] + i_b_PI = env.history.df['lc.inductor2.i'] + i_c_PI = env.history.df['lc.inductor3.i'] + R_load_PI = (env.history.df['r_load.resistor1.R'].tolist()) + phase_PI = env.history.df['inverter1.phase.0'] # env.net.components[0].phase + + i_dq0_PI = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) + + i_d_PI = i_dq0_PI[0].tolist() + i_q_PI = i_dq0_PI[1].tolist() + i_0_PI = i_dq0_PI[2].tolist() + v_d_PI = (v_dq0_PI[0].tolist()) + v_q_PI = (v_dq0_PI[1].tolist()) + v_0_PI = (v_dq0_PI[2].tolist()) + + env_test.close() + _, env_fig = env.close() + agent.observe(r_PI, done_PI) + + plt.plot(v_d, 'b') + plt.plot(v_q, 'r') + plt.plot(v_0, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title('DDPG') + plt.show() + + plt.plot(R_load, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('Test') + plt.show() + + plt.plot(v_d_PI, 'b') + plt.plot(v_q_PI, 'r') + plt.plot(v_0_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title('PI') + plt.show() + + plt.plot(R_load_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('Test') + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}') + print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}') + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "time": ts, + "PI_Kp_c": kp_c, + "PI_Ki_c": ki_c, + "PI_Kp_v": kp_v, + "PI_Ki_v": ki_v, + "DDPG_model_path": model_path, + "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI), + "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "v_d_PI": v_d_PI, + "v_q_PI": v_q_PI, + "v_0_PI": v_0_PI, + "v_d_DDPG": v_d, + "v_q_DDPG": v_q, + "v_0_DDPG": v_0, + "R_load": R_load, + "R_load_PI": R_load_PI, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "best of new 4D unsafe optimization of 300 runs (picard) to figure out the boundaries " + "of the statespace without reset", + "optimization node": 'Thinkpad', + "info2": 'storing the current and modulation indices as well', + "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2' + } + node = platform.uname().node + + # mongo_recorder = Recorder(database_name=folder_name) + + # mongo_recorder.save_to_mongodb('Comparison1' + n_trail, compare_result) + mongo_recorder.save_to_mongodb('Comparison_4D_optimizedPIPI_retrainDDPG', + compare_result) + # mongo_recorder.save_to_mongodb('Comparison_2D_optimizedPIPI', compare_result) + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +# df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 +""" +m = np.array(df['Mean']) +s = np.array(df['Std']) +max_episode_steps_list = np.array(df['max_episode_steps_list']) + +plt.plot(max_episode_steps_list, m) +plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() + +# plt.plot(max_episode_steps_list, m) +# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.errorbar(max_episode_steps_list, m, s, fmt='-o') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() + +plt.plot(max_episode_steps_list, s) +plt.ylabel('std') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() +""" diff --git a/experiments/hp_tune/data/R_load_dessca.pkl b/experiments/hp_tune/data/R_load_dessca.pkl new file mode 100644 index 00000000..6a804f58 Binary files /dev/null and b/experiments/hp_tune/data/R_load_dessca.pkl differ diff --git a/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl b/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl new file mode 100644 index 00000000..3512f7c0 Binary files /dev/null and b/experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl differ diff --git a/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl b/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl new file mode 100644 index 00000000..4a16afee Binary files /dev/null and b/experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl differ diff --git a/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl b/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl new file mode 100644 index 00000000..73659a6f Binary files /dev/null and b/experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl differ diff --git a/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl b/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl new file mode 100644 index 00000000..e6bfbc06 Binary files /dev/null and b/experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl differ diff --git a/experiments/hp_tune/data/R_load_test_case_1_second b/experiments/hp_tune/data/R_load_test_case_1_second new file mode 100644 index 00000000..506c8389 Binary files /dev/null and b/experiments/hp_tune/data/R_load_test_case_1_second differ diff --git a/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl b/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl new file mode 100644 index 00000000..756a199d Binary files /dev/null and b/experiments/hp_tune/data/R_load_test_case_2_seconds.pkl differ diff --git a/experiments/hp_tune/env/__init__.py b/experiments/hp_tune/env/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/experiments/hp_tune/env/env_wrapper.py b/experiments/hp_tune/env/env_wrapper.py new file mode 100644 index 00000000..b38bc5e8 --- /dev/null +++ b/experiments/hp_tune/env/env_wrapper.py @@ -0,0 +1,1059 @@ +import platform +from functools import partial +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.type_aliases import GymStepReturn +from stochastic.processes import VasicekProcess + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.vctrl_single_inv import net +from experiments.hp_tune.util.config import cfg +from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0, Fastqueue, RandProcess + + +class BaseWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", gamma=0, + number_learing_steps=500000, number_past_vals=0): + """ + Base Env Wrapper to add features to the env-observations and adds information to env.step output which can be + used in case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.used_P = np.zeros(self.action_space.shape) + self.gamma = gamma + self.number_learing_steps = number_learing_steps + self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action, self.env.net.components[0].phase) + else: + action_abc = action + + obs, reward, done, info = super().step(action_abc) + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + super().render() + + self._n_training_steps += 1 + + # if self._n_training_steps % round(self.training_episode_length / 10) == 0: + # self.env.on_episode_reset_callback() + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + # self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + # self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + # todo efficiency? + self.used_P = np.copy(action) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + self._n_training_steps = 0 + self.used_P = np.zeros(self.action_space.shape) + + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000): # , use_past_vals=False, number_past_vals=0): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + self.gamma = gamma + self.penalty_I_weight = penalty_I_weight + self.penalty_P_weight = penalty_P_weight + self.t_start_penalty_I = t_start_penalty_I + self.t_start_penalty_P = t_start_penalty_P + self.number_learing_steps = number_learing_steps + self.integrator_sum_list0 = [] + self.integrator_sum_list1 = [] + self.integrator_sum_list2 = [] + self.action_P0 = [] + self.action_P1 = [] + self.action_P2 = [] + self.action_I0 = [] + self.action_I1 = [] + self.action_I2 = [] + + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + action_P = action[0:3] + action_I = action[3:6] + + self.integrator_sum += action_I * self.integrator_weight + + action_PI = action_P + self.integrator_sum + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase) + + + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + + clipped_action = np.clip(action_abc, -1, 1) + + delta_action = clipped_action - action_abc + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(delta_action, self.env.net.components[0].phase) + self.integrator_sum += action_delta * self.antiwindup_weight + + # clip_reward = np.clip(np.sum(np.abs(delta_action) * \ + # (-1 / (self.env.net.components[0].v_lim / self.env.net.components[ + # 0].v_DC))) / 3 * (1 - self.gamma), + # -1, 0) + + clip_reward = 0 + # toDo reset clip reward for P10 experiment + + action_abc = clipped_action + + else: + clip_reward = 0 + + obs, reward, done, info = super().step(action_abc) + + reward = reward + clip_reward + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + super().render() + + integrator_penalty = np.sum(-((np.abs(action_I)) ** 0.5)) * (1 - self.gamma) / 3 + # action_P_penalty = - np.sum((np.abs(action_P - self.used_P)) ** 0.5) * (1 - self.gamma) / 3 + action_P_penalty = np.sum(-((np.abs(action_P)) ** 0.5)) * (1 - self.gamma) / 3 + + # reward_weight is = 1 + + if self.total_steps > self.t_start_penalty_I: + penalty_I_weight_scale = 1 / (self.t_start_penalty_I - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_I - self.number_learing_steps) + + else: + penalty_I_weight_scale = 1 + + if self.total_steps > self.t_start_penalty_P: + penalty_P_weight_scale = 1 / (self.t_start_penalty_P - self.number_learing_steps) * self.total_steps - \ + self.number_learing_steps / (self.t_start_penalty_P - self.number_learing_steps) + + else: + + penalty_P_weight_scale = 1 + + reward = (reward + (self.penalty_I_weight * penalty_I_weight_scale) * integrator_penalty + + self.penalty_P_weight * penalty_P_weight_scale * action_P_penalty) \ + / (1 + self.penalty_I_weight * penalty_I_weight_scale + self.penalty_P_weight * penalty_P_weight_scale) + + self._n_training_steps += 1 + + # if self._n_training_steps % round(self.training_episode_length / 10) == 0: + # self.env.on_episode_reset_callback() + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + self.integrator_sum_list0.append(self.integrator_sum[0]) + self.integrator_sum_list1.append(self.integrator_sum[1]) + self.integrator_sum_list2.append(self.integrator_sum[2]) + self.action_P0.append(np.float64(action_P[0])) + self.action_P1.append(np.float64(action_P[1])) + self.action_P2.append(np.float64(action_P[2])) + self.action_I0.append(np.float64(action_I[0])) + self.action_I1.append(np.float64(action_I[1])) + self.action_I2.append(np.float64(action_I[2])) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + # todo efficiency? + self.used_P = np.copy(action_P) + self.used_I = np.copy(self.integrator_sum) + # self.used_P = action_P + # self.used_I = self.integrator_sum + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + 'Integrator0': self.integrator_sum_list0, + 'Integrator1': self.integrator_sum_list1, + 'Integrator2': self.integrator_sum_list2, + 'actionP0': self.action_P0, + 'actionP1': self.action_P1, + 'actionP2': self.action_P2, + 'actionI0': self.action_I0, + 'actionI1': self.action_I1, + 'actionI2': self.action_I2 + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + # if self._n_training_steps > 500: + super().close() + # plt.plot(self.integrator_sum_list0) + # plt.plot(self.integrator_sum_list1) + # plt.plot(self.integrator_sum_list2) + # plt.ylabel('intergratorzustand') + # plt.show() + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + + obs = super().reset() + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = (obs[6:9] - obs[3:6]) / 2 # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + +class FeatureWrapper_pastVals(FeatureWrapper): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 500000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000, number_past_vals=10): + """ + Env Wrapper which adds the number_past_vals voltage ([3:6]!!!) observations to the observations. + Initialized with zeros! + """ + super().__init__(env, number_of_features, training_episode_length, + recorder, n_trail, integrator_weight, antiwindup_weight, gamma, + penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P, + number_learing_steps) + + # self.observation_space = gym.spaces.Box( + # low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + # high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + obs, reward, done, info = super().step(action) + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + + return obs + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array + + +class FeatureWrapper_futureVals(FeatureWrapper): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000, number_future_vals=0, future_data=''): + """ + Env Wrapper which adds the number_future_vals R-values to the observations. + Initialized with zeros! + Therfore it uses the in the init defined pkl + """ + super().__init__(env, number_of_features + number_future_vals, training_episode_length, + recorder, n_trail, integrator_weight, antiwindup_weight, gamma, + penalty_I_weight, penalty_P_weight, t_start_penalty_I, t_start_penalty_P, + number_learing_steps) + + # not needed... toDo Chage in Randload init? + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=50), initial=50, + bounds=(14, 200)) + self.load_curve = RandomLoad(2881, net.ts, gen, + load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl')) + future_data)) + + self.future_vals = [] + self.number_future_vals = number_future_vals + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + obs, reward, done, info = super().step(action) + + self.future_vals = [2 * (self.load_curve.give_dataframe_value(self.env.sim_time_interval[0] + + i * self.env.time_step_size, + col='r_load.resistor' + Rx + '.R') - 14) / ( + 200 - 14) - 1 + # NORMALIZATION! + for i in range(self.number_future_vals) for Rx in ['1']] # , '2', '3']] + # toDo: if Load is not balanced, different values have to be sampled! (till now only 1 value per future step is sufficent + + obs = np.append(obs, self.future_vals) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + obs = super().reset() + self.future_vals = [2 * (self.load_curve.give_dataframe_value(self.env.sim_time_interval[0] + + i * self.env.time_step_size, + col='r_load.resistor' + Rx + '.R') - 14) / ( + 200 - 14) - 1 + # NORMALIZATION! + for i in range(self.number_future_vals) for Rx in ['1']] # , '2', '3']] + # toDo: if Load is not balanced, different values have to be sampled! (till now only 1 value per future step is sufficent + + obs = np.append(obs, self.future_vals) + return obs + + +class FeatureWrapper_I_controller(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = 5000000, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts, gamma=0, + penalty_I_weight=1, penalty_P_weight=1, t_start_penalty_I=0, t_start_penalty_P=0, + number_learing_steps=500000, Ki=12, number_past_vals=0): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + self.gamma = gamma + self.penalty_I_weight = penalty_I_weight + self.penalty_P_weight = penalty_P_weight + self.t_start_penalty_I = t_start_penalty_I + self.t_start_penalty_P = t_start_penalty_P + self.number_learing_steps = number_learing_steps + self.Ki = Ki + self.delay_queues = [Fastqueue(1, 3) for _ in range(number_past_vals)] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + action_PI = action + self.integrator_sum + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase) + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase) + # self.integrator_sum += action_delta * self.antiwindup_weight + self.integrator_sum += action_delta * self.env.time_step_size + + obs, reward, done, info = super().step(action_abc) + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + super().render() + + self._n_training_steps += 1 + + # if self._n_training_steps % round(self.training_episode_length / 10) == 0: + # self.env.on_episode_reset_callback() + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + super().close() + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": cfg['STUDY_NAME'], + "Reward function": 'rew.rew_fun_dq0', + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + self.recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + # self.integrator_sum += error * self.integrator_weight * self.Ki + self.integrator_sum += error * self.env.time_step_size * self.Ki + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + """ + Add pastvals + """ + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + obs = np.append(obs, self.integrator_sum) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + # todo efficiency? + self.used_P = np.copy(action) + self.used_I = np.copy(self.integrator_sum) + # self.used_P = action_P + # self.used_I = self.integrator_sum + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + + [x.clear() for x in self.delay_queues] + obs = super().reset() + + if len(obs) > 9: + # ASSUME THAT LOADCURRENT is included! + obs[9:12] = obs[9:12] / net['inverter1'].i_lim + + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + # self.integrator_sum += error * self.integrator_weight * self.Ki + self.integrator_sum += error * self.env.time_step_size * self.Ki + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + # obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + # obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + + """ + Add pastvals and integrator sum + """ + obs_delay_array = self.shift_and_append(obs[3:6]) + obs = np.append(obs, obs_delay_array) + obs = np.append(obs, self.integrator_sum) + + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, self.used_action) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + def shift_and_append(self, obs): + """ + Takes the observation and shifts throught the queue + every queue output is added to total obs + """ + obs_delay_array = np.array([]) + obs_temp = obs + for queue in self.delay_queues: + obs_temp = queue.shift(obs_temp) + obs_delay_array = np.append(obs_delay_array, obs_temp) + + return obs_delay_array diff --git a/experiments/hp_tune/env/random_load.py b/experiments/hp_tune/env/random_load.py new file mode 100644 index 00000000..6b13f43b --- /dev/null +++ b/experiments/hp_tune/env/random_load.py @@ -0,0 +1,151 @@ +import numpy as np +import pandas as pd + +from openmodelica_microgrid_gym.util import RandProcess + + +class RandomLoad: + def __init__(self, train_episode_length: int, ts: float, rand_process: RandProcess, loadstep_time: int = None, + load_curve: pd.DataFrame = None, bounds=None, bounds_std=None): + """ + + :param max_episode_steps: number of steps per training episode (can differ from env.max_episode_steps) + :param ts: sampletime of env + :param rand_pocess: Instance of random process defines noise added to load + :param loadstep_time: number of env step where load step should happen + :param load_curve: Stored load data to sample from instead of smaple from distribution + :param bounds: Bounds to clip the sampled load data + :param bounds_std: Chosen bounds are sampled from a distribution with std=bounds_std and mean=bounds + + """ + self.train_episode_length = train_episode_length + self.ts = ts + self.rand_process = rand_process + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.train_episode_length) + else: + self.loadstep_time = loadstep_time + self.load_curve = load_curve + if bounds is None: + self.bounds = (-np.inf, np.inf) + else: + self.bounds = bounds + if bounds_std is None: + self.bounds_std = (0, 0) + else: + self.bounds_std = bounds_std + + self.lowerbound_std = 0 + self.upperbound_std = 0 + + def reset(self, loadstep_time=None): + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.train_episode_length) + else: + self.loadstep_time = loadstep_time + + def load_step(self, t, gain): + """ + Changes the load parameters + :param t: + :param gain: device parameter + :return: Sample from SP + """ + # Defines a load step after 0.01 s + if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts: + self.rand_process.proc.mean = gain * 0.55 + self.rand_process.reserve = gain * 0.55 + elif t <= self.ts: + self.rand_process.proc.mean = gain + + return self.rand_process.sample(t) + + def clipped_step(self, t): + return np.clip(self.rand_process.sample(t), + self.bounds[0] + self.lowerbound_std, + self.bounds[1] + self.upperbound_std + ) + + def one_random_loadstep_per_episode(self, t): + if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts: + # do with 100 percent propability + self.do_change(1002, 102) + # else: + # with 2 permill change drift + # self.do_change(2, 0) + + return np.clip(self.rand_process.sample(t), + self.bounds[0] + self.lowerbound_std, + self.bounds[1] + self.upperbound_std + ) + + def give_dataframe_value(self, t, col): + """ + Gives load values from a stored dataframe (self.load_curve) + :parma t: time - represents here the row of the dataframe + :param col: colon name of the dataframe (typically str) + """ + if t < 0: + # return None + return self.load_curve[col][0] + if self.load_curve is None: + raise ValueError('No dataframe given! Please feed load class (.load_curve) with data') + return self.load_curve[col][int(t / self.ts)] + + def random_load_step(self, t, event_prob: int = 2, step_prob: int = 50): + """ + Changes the load parameters applying a loadstep with 0.2% probability which is a pure step with 50 % + probability otherwise a drift. In every event the random process variance is drawn randomly [1, 150]. + :param t: time + :param event_prob: probability (in pre mill) that the step event is triggered in the current step + :param step_prob: probability (in pre cent) that event is a abrupt step (drift otherwise!, random process speed + not adjustable yet + :return: Sample from SP + """ + # Changes rand process data with probability of 5% and sets new value randomly + if np.random.randint(0, 1001) < 2: + + gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1]) + + self.rand_process.proc.mean = gain + self.rand_process.proc.vol = np.random.randint(1, 150) + self.rand_process.proc.speed = np.random.randint(10, 1200) + # define sdt for clipping once every event + # np.maximum to not allow negative values + self.lowerbound_std = np.maximum(np.random.normal(scale=self.bounds_std[0]), 0.0001) + self.upperbound_std = np.random.normal(scale=self.bounds_std[1]) + + # With 50% probability do a step or a drift + if np.random.randint(0, 101) < 50: + # step + self.rand_process.reserve = gain + + else: + # drift -> Lower speed to allow + self.rand_process.proc.speed = np.random.randint(10, 100) + + return np.clip(self.rand_process.sample(t), + self.bounds[0] + self.lowerbound_std, + self.bounds[1] + self.upperbound_std + ) + + def do_change(self, event_prob_permill=2, step_prob_percent=50): + if np.random.randint(0, 1001) < event_prob_permill: + + gain = np.random.randint(self.rand_process.bounds[0], self.rand_process.bounds[1]) + + self.rand_process.proc.mean = gain + self.rand_process.proc.vol = np.random.randint(1, 150) + self.rand_process.proc.speed = np.random.randint(10, 1200) + # define sdt for clipping once every event + self.lowerbound_std = np.random.normal(scale=self.bounds_std[0]) + self.upperbound_std = np.random.normal(scale=self.bounds_std[1]) + + # With 50% probability do a step or a drift + if np.random.randint(0, 101) < step_prob_percent: + # step + self.rand_process.reserve = gain + + else: + # drift -> Lower speed to allow + self.rand_process.proc.speed = np.random.randint(10, 100) diff --git a/experiments/hp_tune/env/rewards.py b/experiments/hp_tune/env/rewards.py new file mode 100644 index 00000000..2e6bb435 --- /dev/null +++ b/experiments/hp_tune/env/rewards.py @@ -0,0 +1,552 @@ +import numpy as np +from openmodelica_microgrid_gym.util import nested_map, abc_to_dq0, dq0_to_abc +from typing import List + + +class Reward: + def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region: float = 1.1, use_gamma_normalization=1, + error_exponent: float = 1.0, i_lim: float = np.inf, i_nom: float = np.inf, i_exponent: float = 1.0): + """ + + :param nom: Nominal value for the voltage + :param lim: Limit value for the voltage + :param v_DC: DC-Link voltage + :param gamma: Discount factor to map critic values -> [-1, 1] + :param use_gamma_normalization: if 0 normalization depending on gamma is not used + :param nom_region: Defines cliff in the reward landscape where the reward is pulled down because the nominal + value is exceeded. nom_region defines how much the nominal value can be exceeded before + the cliff (e.g. 1.1 -> cliff @ 1.1*self.nom + :param error_exponent: defines the used error-function: E.g.: 1 -> Mean absolute error + 2 -> Mean squared error + 0.5 -> Mean root error + :param i_lim: Limit value for the current + :param i_nom: Nominal value for the current + """ + self._idx = None + self.nom = nom + self.lim = lim + self.v_DC = v_DC + self.use_gamma_normalization = use_gamma_normalization + if self.use_gamma_normalization == 1: + self.gamma = gamma + else: + self.gamma = 0 + self.det_run = det_run + self.nom_region = nom_region + self.exponent = error_exponent + self.i_lim = i_lim + self.i_nom = i_nom + self.i_exponent = i_exponent + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'], + [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012'], + 'inverter1.phase.0']) + + def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of + the used parameters. + Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a + logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using + parameter mu. + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + iabc_master = data[idx[0]] # 3 phase currents at LC inductors + vabc_master = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + SP = vsp_abc_master * self.lim + mess = vabc_master * self.lim + + if all(np.abs(mess) <= self.nom * 1.1): + # if all(np.abs(mess) <= self.lim*self.nom_region): + """ + 1st area - inside wanted (nom) operation range + -v_nom -> + v_nom + rew = 1; if mess = SP + rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) + """ + # devided by 3 because of sums up all 3 phases + rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * 1 / 3 + + 1 / 3) / 3 + + + elif any(np.abs(mess) > self.lim): + """ + 3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS + +-v_lim -> +-v_DC + + V1: + @ SP = +v_nom AND mess = -v_DC: + rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured + @ SP = -v_nom AND mess = -v_lim + rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] + rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) + The latter fraction is quite small but leads to depending on the system less then 2/3 is + substracted and we have a gap to the 2nd area! :) + + V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?) + + V3: rew = -1 + """ + + # V1: + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 + + # V2: + # if return -> rew = None and in env abort_reward is given to agent + if self.det_run: + return -(1 - self.gamma) + else: + return + + # V3: + # rew = (1 - gamma) + + + else: + """ + 2nd area + +-v_nom -> +- v_lim + + @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) + rew = 1/3 + @ SP = v_nom AND mess = -v_lim + rew = -1/3 + + """ + rew = np.sum( + (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * 1 / 3 - 1 / 3) / 3 + + return rew * (1 - self.gamma) + # return -np.clip(error.squeeze(), 0, 1e5) + + def rew_fun_include_current(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above but also includes reward depending on the current + If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage + Before r_voltage is scaled to the region [0,1]: + - r_voltage = (r_voltage+1)/2 + - r = r_voltage * r_current + - r = r-1 + + If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + iabc_master = data[idx[0]] # 3 phase currents at LC inductors + vabc_master = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + i_mess = iabc_master * self.i_lim + + SP = vsp_abc_master * self.lim + mess = vabc_master * self.lim + + if any(np.abs(mess) > self.lim) or any(np.abs(i_mess) > self.i_lim): + """ + 3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS + +-v_lim -> +-v_DC + Valid for v_lim OR i_lim exceeded + + V1: + @ SP = +v_nom AND mess = -v_DC: + rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured + @ SP = -v_nom AND mess = -v_lim + rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] + rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) + The latter fraction is quite small but leads to depending on the system less then 2/3 is + substracted and we have a gap to the 2nd area! :) + + V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?) + + V3: rew = -1 + """ + + # V1: + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 + + # V2: + # if return -> rew = None and in env abort_reward is given to agent + if self.det_run: + return -(1 - self.gamma) + else: + return + + # V3: + # rew = (1 - gamma) + + elif all(np.abs(mess) <= self.nom * 1.1): + # if all(np.abs(mess) <= self.lim*self.nom_region): + """ + 1st area - inside wanted (nom) operation range + -v_nom -> + v_nom + rew = 1; if mess = SP + rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) + """ + # devided by 3 because of sums up all 3 phases + # rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + ( + # 1 - self.gamma) / 3) / 3 + + rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma)) / 3 + + + + + else: + """ + 2nd area + +-v_nom -> +- v_lim + + @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) + rew = 1/3 + @ SP = v_nom AND mess = -v_lim + rew = -1/3 + + """ + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3 + rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2 + + if any(abs(i_mess) > ((self.i_nom + self.i_lim) / 2)): + rew = (rew + 1) / 2 # map rew_voltage -> [0,1] + + # Scale rew_voltage with rew_current + # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3 + rew = rew * (((self.i_nom - max(abs(i_mess))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent + + rew = rew * 2 - 1 # map rew -> [-1, 1] + + if rew < -1: + asd = 1 + return rew # * (1-0.9) + # return -np.clip(error.squeeze(), 0, 1e5) + + def rew_fun_include_current_dq0(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above but also includes reward depending on the current + If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage + Before r_voltage is scaled to the region [0,1]: + - r_voltage = (r_voltage+1)/2 + - r = r_voltage * r_current + - r = r-1 + + If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + phase = data[idx[4]] + + idq0_master = abc_to_dq0(data[idx[0]], phase) # 3 phase currents at LC inductors + vdq0_master = abc_to_dq0(data[idx[2]], phase) # 3 phase currents at LC inductors + + # set points (sp) + # isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + vsp_dq0_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + vsp_abc = dq0_to_abc(data[idx[3]], phase) + + i_mess = idq0_master * self.i_lim + i_mess_abc = data[idx[0]] * self.i_lim + + SP = vsp_abc * self.lim + mess = data[idx[2]] * self.lim + + mess_abc = data[idx[2]] * self.lim + + if any(np.abs(mess_abc) > self.lim) or any(np.abs(i_mess_abc) > self.i_lim): + """ + 3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS + +-v_lim -> +-v_DC + Valid for v_lim OR i_lim exceeded + + V1: + @ SP = +v_nom AND mess = -v_DC: + rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured + @ SP = -v_nom AND mess = -v_lim + rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] + rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) + The latter fraction is quite small but leads to depending on the system less then 2/3 is + substracted and we have a gap to the 2nd area! :) + + V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?) + + V3: rew = -1 + """ + + # V1: + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 + + # V2: + # if return -> rew = None and in env abort_reward is given to agent + if self.det_run: + return -(1 - self.gamma) + else: + return + + # V3: + # rew = (1 - gamma) + + elif all(np.abs(mess_abc) <= self.nom * 1.1): + # if all(np.abs(mess) <= self.lim*self.nom_region): + """ + 1st area - inside wanted (nom) operation range + -v_nom -> + v_nom + rew = 1; if mess = SP + rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) + """ + # devided by 3 because of sums up all 3 phases + #rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + ( + # 1 - self.gamma) / 3) / 3 + + rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma) ) / 3 + + + + + else: + """ + 2nd area + +-v_nom -> +- v_lim + + @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) + rew = 1/3 + @ SP = v_nom AND mess = -v_lim + rew = -1/3 + + """ + #rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3 + rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2 + + if any(abs(i_mess_abc) > ((self.i_nom + self.i_lim) / 2)): + rew = (rew + 1) / 2 # map rew_voltage -> [0,1] + + # Scale rew_voltage with rew_current + # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3 + rew = rew * (((self.i_nom - max(abs(i_mess_abc))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent + + rew = rew * 2 - 1 # map rew -> [-1, 1] + + if rew < -1: + asd = 1 + return rew # * (1-0.9) + # return -np.clip(error.squeeze(), 0, 1e5) + + def rew_fun_dq0(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above + + If v_lim is exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + phase = data[idx[4]] + + vdq0_master = abc_to_dq0(data[idx[2]], phase) # 3 phase currents at LC inductors + + # set points (sp) + vsp_dq0_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + """ + if any(np.abs(data[idx[2]]) > 1): + if self.det_run: + return -(1 - self.gamma) + else: + return + else: + rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 # /2 + """ + rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 # /2 + + return rew + + def rew_fun_PIPI_MRE(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above but also includes reward depending on the current + If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage + Before r_voltage is scaled to the region [0,1]: + - r_voltage = (r_voltage+1)/2 + - r = r_voltage * r_current + - r = r-1 + + If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + i_mess = data[idx[0]] # 3 phase currents at LC inductors + mess = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + SP = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + + # rew = np.sum(-((np.abs(SP - mess)) ** 0.5)) * (1 - self.gamma) / 3 + + phase = data[idx[4]] + + vdq0_master = abc_to_dq0(data[idx[2]], phase) / self.lim # 3 phase currents at LC inductors + + # set points (sp) + vsp_dq0_master = abc_to_dq0(data[idx[3]], + phase) / self.lim # convert dq set-points into three-phase abc coordinates + + # SP = vsp_dq0_master * self.lim + # mess = vdq0_master * self.lim + + rew = np.sum(-((np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 + + # rew = np.sum(1 - (2 * (np.abs(vsp_dq0_master - vdq0_master)) ** self.exponent)) * (1 - self.gamma) / 3 + + return rew + + def rew_fun_PIPI(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + uses the same reward for voltage like defined above but also includes reward depending on the current + If i_nom is exceeded r_current: f(i_mess) -> [0, 1] is multiplied to the r_voltage + Before r_voltage is scaled to the region [0,1]: + - r_voltage = (r_voltage+1)/2 + - r = r_voltage * r_current + - r = r-1 + + If v_lim or i_lim are exceeded, episode abort -> env.abort_reward (should be -1) is given back + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + i_mess = data[idx[0]] # 3 phase currents at LC inductors + mess = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + SP = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + # i_mess = iabc_master * self.i_lim + + # SP = vsp_abc_master * self.lim + # mess = vabc_master * self.lim + + if any(np.abs(mess) > self.lim) or any(np.abs(i_mess) > self.i_lim): + """ + 3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS + +-v_lim -> +-v_DC + Valid for v_lim OR i_lim exceeded + + V1: + @ SP = +v_nom AND mess = -v_DC: + rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured + @ SP = -v_nom AND mess = -v_lim + rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] + rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) + The latter fraction is quite small but leads to depending on the system less then 2/3 is + substracted and we have a gap to the 2nd area! :) + + V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?) + + V3: rew = -1 + """ + + # V1: + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 + + # V2: + # if return -> rew = None and in env abort_reward is given to agent + if self.det_run: + return -(1 - self.gamma) + else: + return + + # V3: + # rew = (1 - gamma) + + elif all(np.abs(mess) <= self.nom * 1.1): + # if all(np.abs(mess) <= self.lim*self.nom_region): + """ + 1st area - inside wanted (nom) operation range + -v_nom -> + v_nom + rew = 1; if mess = SP + rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) + """ + # devided by 3 because of sums up all 3 phases + # rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * 2 * (1 - self.gamma) / 3 + ( + # 1 - self.gamma) / 3) / 3 + rew = np.sum((1 - (np.abs(SP - mess) / (2 * self.nom)) ** self.exponent) * (1 - self.gamma)) / 3 + + + + else: + """ + 2nd area + +-v_nom -> +- v_lim + + @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) + rew = 1/3 + @ SP = v_nom AND mess = -v_lim + rew = -1/3 + + """ + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3 + rew = (1 - np.max(np.abs(SP - mess)) / (self.nom + self.lim)) * (1 - self.gamma) / 2 - (1 - self.gamma) / 2 + if any(abs(i_mess) > self.i_nom): + rew = (rew + 1) / 2 # map rew_voltage -> [0,1] + + # Scale rew_voltage with rew_current + # rew = rew * np.sum((((self.i_nom - i_mess) / (self.i_lim - self.i_nom))+1) ** self.i_exponent) / 3 + rew = rew * (((self.i_nom - max(abs(i_mess))) / (self.i_lim - self.i_nom)) + 1) ** self.i_exponent + + rew = rew * 2 - 1 # map rew -> [-1, 1] + + if rew < -1: + asd = 1 + return rew # * (1-0.9) + # return -np.clip(error.squeeze(), 0, 1e5) diff --git a/experiments/hp_tune/env/vctrl_single_inv.py b/experiments/hp_tune/env/vctrl_single_inv.py new file mode 100644 index 00000000..a108dc18 --- /dev/null +++ b/experiments/hp_tune/env/vctrl_single_inv.py @@ -0,0 +1,419 @@ +from datetime import datetime, time +from functools import partial +from itertools import accumulate +from os import makedirs + +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from stochastic.processes import VasicekProcess + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.rewards import Reward +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network + +from openmodelica_microgrid_gym.util import RandProcess +from gym.envs.registration import register +from experiments.hp_tune.util.config import cfg + +folder_name = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' +# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED' +experiment_name = 'plots' +timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X') + +makedirs(folder_name, exist_ok=True) +# makedirs(folder_name + experiment_name, exist_ok=True) + + +# Simulation definitions +if not cfg['is_dq0']: + # load net using abc reference values + net = Network.load('net/net_vctrl_single_inv.yaml') +else: + # load net using dq0 reference values + net = Network.load('net/net_vctrl_single_inv_dq0.yaml') + # net = Network.load('net/net_p10.yaml') + +# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP +max_episode_steps = 1500000 # net.max_episode_steps # number of simulation steps per episode + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC + +# plant +""" +print('Using P10 setting') +L_filter = 70e-6 # / H +R_filter = 1.1e-3 # / Ohm +C_filter = 250e-6 # / F +lower_bound_load_clip = 1 # to allow maximal load that draws i_limit (let exceed?) +lower_bound_load_clip_std = 1 +""" +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +# R = 40 # nomVoltPeak / 7.5 # / Ohm +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +# if save needed in dependence of trial ( -> foldername) shift to executive file? +def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + plt.close() + + +def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + # ts = time.gmtime() + # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +rand_load_train = RandomLoad(round(cfg['train_episode_length'] / 10), net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + +cb = CallbackList() +# set initial = None to reset load random in range of bounds +cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) +cb.append(rand_load_train.reset) + +register(id='vctrl_single_inv_train-v0', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +register(id='vctrl_single_inv_train-v1', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +rand_train2 = RandomLoad(2881, net.ts, gen, + load_curve=pd.read_pickle( + 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl')) + +register(id='vctrl_single_inv_train-v2', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_train2.give_dataframe_value, col='r_load.resistor3.R'), + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +rand_load_test = RandomLoad(2881, net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + load_curve=pd.read_pickle( + 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl')) +# load_curve=pd.read_pickle('experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl')) + +register(id='vctrl_single_inv_test-v0', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=20000, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R') + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) + +register(id='vctrl_single_inv_test-v1', + entry_point='openmodelica_microgrid_gym.env:ModelicaEnv', + kwargs=dict( # reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=100001, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + # 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + ) diff --git a/experiments/hp_tune/examples/DDPG_init_execution.py b/experiments/hp_tune/examples/DDPG_init_execution.py new file mode 100644 index 00000000..96318547 --- /dev/null +++ b/experiments/hp_tune/examples/DDPG_init_execution.py @@ -0,0 +1,517 @@ +import platform +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stable_baselines3.common.monitor import Monitor +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.type_aliases import GymStepReturn + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder +from experiments.hp_tune.util.scheduler import linear_schedule +from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0 + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + + # increase action-space for PI-seperation + # self.action_space=gym.spaces.Box(low=np.full(d_i, -1), high=np.full(d_i, 1)) + + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + self.used_I_action = np.zeros(self.action_space.shape) + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + action_P = action[0:3] + action_I = action[3:6] + + self.used_I_action = np.copy(action_I) + + self.integrator_sum += action_I * self.integrator_weight + + action_PI = action_P + self.integrator_sum + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase) + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase) + self.integrator_sum += action_delta * self.antiwindup_weight + + obs, reward, done, info = super().step(action_abc) + self._n_training_steps += 1 + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": folder_name, + "Reward function": 'rew.rew_fun_dq0', + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + mongo_recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, delta_i_lim_i_phasor) + + """ + Add used action to the NN input to learn delay + """ + # obs = np.append(obs, self.used_action) + + # todo efficiency? + self.used_P = np.copy(action_P) + self.used_I = np.copy(self.integrator_sum) + # self.used_P = action_P + # self.used_I = self.integrator_sum + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + obs = super().reset() + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + self.used_P = np.zeros(self.action_space.shape) + self.used_I = np.zeros(self.action_space.shape) + + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + obs = np.append(obs, self.used_P) + obs = np.append(obs, self.used_I) + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + # obs = np.append(obs, self.used_action) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + +number_learning_steps = 10000 +integrator_weight = 0.216 # trial.suggest_loguniform("integrator_weight", 1 / 20, 20) +# integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) +# antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) +antiwindup_weight = 0.96 # trial.suggest_float("antiwindup_weight", 0.00001, 1) + +learning_rate = 1.42e-5 # trial.suggest_loguniform("learning_rate", 100e-9, 100e-6) # 0.0002# + +lr_decay_start = 0.88 # trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? +lr_decay_duration = 0.064 # trial.suggest_float("lr_decay_duration", 0.00001, +# 1) # 3000 # 0.2 * number_learning_steps? +t_start = int(lr_decay_start * number_learning_steps) +t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) +final_lr = 0.3 # trial.suggest_float("final_lr", 0.00001, 1) + +gamma = 0.927 # trial.suggest_float("gamma", 0.8, 0.99) +weight_scale = 0.000132 # trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + +bias_scale = 0.1 # trial.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 +alpha_relu_actor = 0.1 # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 +alpha_relu_critic = 0.1 # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + +batch_size = 1024 # trial.suggest_int("batch_size", 32, 1024) # 128 +buffer_size = int(1e6) # trial.suggest_int("buffer_size", 10, 1000000) # 128 + +actor_hidden_size = 131 # trial.suggest_int("actor_hidden_size", 10, 200) # 100 # Using LeakyReLU +actor_number_layers = 3 # trial.suggest_int("actor_number_layers", 1, 5) + +critic_hidden_size = 324 # trial.suggest_int("critic_hidden_size", 10, 500) # 100 +critic_number_layers = 3 # trial.suggest_int("critic_number_layers", 1, 4) + +n_trail = str(9999999) +use_gamma_in_rew = 1 +noise_var = 0.012 # trial.suggest_loguniform("noise_var", 0.01, 1) # 2 +# min var, action noise is reduced to (depends on noise_var) +noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) +# min var, action noise is reduced to (depends on training_episode_length) +noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), +# number_learning_steps) +noise_theta = 1.74 # trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU +error_exponent = 0.5 # trial.suggest_loguniform("error_exponent", 0.01, 4) + +training_episode_length = 2000 # trial.suggest_int("training_episode_length", 200, 5000) # 128 +learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 +tau = 0.005 # trial.suggest_loguniform("tau", 0.0001, 0.2) # 2 + +learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + +rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + +env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + +env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight) + +# todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... +env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + +n_actions = env.action_space.shape[-1] +noise_var = noise_var # 20#0.2 +noise_theta = noise_theta # 50 # stiffness of OU +action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + +# action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, +# sigma_min=noise_var * np.ones(n_actions) * noise_var_min, +# mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), +# sigma=noise_var * np.ones(n_actions), dt=net.ts) + +policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers)) + +model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log='test', + # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(1, "episode"), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + +# Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html +# adn scale weights and biases +count = 0 +for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + +count = 0 + +for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + +# todo: Downscale actionspace - lessulgy possible? Interaction pytorch... +env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + +for ex_run in range(10): + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log='test', + # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(1, "episode"), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + count = 0 + for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + I_list0 = [] + I_list1 = [] + I_list2 = [] + I_action_list0 = [] + I_action_list1 = [] + I_action_list2 = [] + P_list0 = [] + P_list1 = [] + P_list2 = [] + return_sum = 0.0 + obs = env.reset() + while True: + + action, _states = model.predict(obs) + obs, rewards, done, info = env.step(action) + # I_list.append(env.used_I) + I_list0.append(env.used_I[0]) + I_list1.append(env.used_I[1]) + I_list2.append(env.used_I[2]) + P_list0.append(env.used_P[0]) + P_list1.append(env.used_P[1]) + P_list2.append(env.used_P[2]) + I_action_list0.append(env.used_I_action[0]) + I_action_list1.append(env.used_I_action[1]) + I_action_list2.append(env.used_I_action[2]) + env.render() + return_sum += rewards + if done: + break + env.close() + + plt.plot(P_list0, 'b') + plt.plot(P_list1, 'r') + plt.plot(P_list2, 'g') + # plt.xlim([0, 0.01]) + plt.grid() + plt.xlabel("time") + plt.ylabel("action_P") + plt.title('Test') + plt.show() + + plt.plot(I_list0, 'b') + plt.plot(I_list1, 'r') + plt.plot(I_list2, 'g') + # plt.xlim([0, 0.01]) + plt.grid() + plt.xlabel("time") + plt.ylabel("Integratorzustand") + plt.title('Test') + plt.show() + + plt.plot(I_action_list0, 'b') + plt.plot(I_action_list1, 'r') + plt.plot(I_action_list2, 'g') + # plt.xlim([0, 0.01]) + plt.grid() + plt.xlabel("time") + plt.ylabel("action_I") + plt.title('Test') + plt.show() diff --git a/experiments/hp_tune/examples/Pipi_testcase_eval.py b/experiments/hp_tune/examples/Pipi_testcase_eval.py new file mode 100644 index 00000000..396b44a4 --- /dev/null +++ b/experiments/hp_tune/examples/Pipi_testcase_eval.py @@ -0,0 +1,434 @@ +##################################### +# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter +# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt +# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters +# b) connecting via ssh to a testbench to perform real-world measurement +import logging +import os +import time +from functools import partial + +import GPy +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pymongo import MongoClient +from stochastic.processes import VasicekProcess +from tqdm import tqdm + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.rewards import Reward +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +show_plots = False +balanced_load = False +save_results = False + +num_average = 25 +max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000] + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +# Files saves results and resulting plots to the folder saves_VI_control_safeopt in the current directory +current_directory = os.getcwd() +folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +np.random.seed(1) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +max_episode_steps = 1000 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +v_DC = 600 # DC-link voltage / V; will be set as model parameter in the FMU +nomFreq = 60 # nominal grid frequency / Hz +nomVoltPeak = 169.7 # 230 * 1.414 # nominal grid voltage / V +iLimit = 16 # inverter current limit / A +iNominal = 12 # nominal inverter current / A +vNominal = 190 # nominal inverter current / A +vLimit = vNominal * 1.5 # inverter current limit / A +funnelFactor = 0.02 +vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor]) +mu = 400 # factor for barrier function (see below) +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + + +class Recorder: + + def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ): + self.client = MongoClient(URI) + self.db = self.client[database_name] + + def save_to_mongodb(self, col: str = ' trails', data=None): + trial_coll = self.db[col] # get collection named col + if data is None: + raise ValueError('No data given to store in database!') + trial_coll.insert_one(data) + + +rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0, + use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale +# for both of them +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times +# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as +# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) +# parameter set +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop +# expanding points eventually. +# The following variable is multiplied with the first performance of the initial set by the factor below: +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(0.002), voltageI=MutableFloat(143)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-iLimit, iLimit)) + +kp_c = 0.033 +ki_c = 17.4 # 11.8 +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values + +# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the +# filter and the nominal frequency +# Droop controller used to calculate the virtual frequency drop due to load changes +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) + +# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the +# filter and the nominal voltage +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +# Define a voltage forming inverter using the PIPI and droop parameters from above + +# Controller with observer +# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param, +# observer=[Lueneberger(*params) for params in +# repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample, +# name='master') + +# Controller without observer +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +##################################### +# Definition of the optimization agent +# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example +# Arguments described above +# History is used to store results +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +# plant + +# toDo: shift this to net?! +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 + +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps, net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + rand_load_test = RandomLoad(max_episode_steps, net.ts, gen, + load_curve=pd.read_pickle( + 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'), + # color=[['b', 'r', 'g'], ['b', 'r', 'g']], + # style=[['-*'], ['--*']] + # ), + # PlotTmpl([[f'master.m{i}' for i in 'dq0']], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$', + # filename='Sim_m_dq0') + # ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + log_level=logging.INFO, + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.random_load_step, + 'r_load.resistor3.R': rand_load_train.random_load_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + return_sum = 0.0 + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + + reward_list = [] + + agent_fig = None + obs = env.reset() + # for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + for step in range(env.max_episode_steps): + # for _ in tqdm(range(1000), desc='steps', unit='step', leave=False): + + done, r = False, None + + if len(reward_list) > 10000: + asd = 1 + + agent.observe(r, done) + act = agent.act(obs) + obs, r, done, info = env.step(act) + reward_list.append(r) + env.render() + return_sum += r + if r == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + + if step % 1000 == 0 and step != 0: + env.close() + agent.reset() + obs = env.reset() + + # break + # close env before calling final agent observe to see plots even if agent crashes + _, env_fig = env.close() + agent.observe(r, done) + print(limit_exceeded_in_test) + + # return (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + ret_list.append((return_sum / env.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +df.to_pickle("Pipi.pkl") +asd = 1 + +m = np.array(df['Mean']) +s = np.array(df['Std']) +max_episode_steps_list = np.array(df['max_episode_steps_list']) + +plt.plot(max_episode_steps_list, m) +plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('') +plt.show() + +# plt.plot(max_episode_steps_list, m) +# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.errorbar(max_episode_steps_list, m, s, fmt='-o') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('') +plt.show() + +plt.plot(max_episode_steps_list, s) +plt.ylabel('std') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('') +plt.show() diff --git a/experiments/hp_tune/examples/Pipi_testcase_optimization.py b/experiments/hp_tune/examples/Pipi_testcase_optimization.py new file mode 100644 index 00000000..7d46e116 --- /dev/null +++ b/experiments/hp_tune/examples/Pipi_testcase_optimization.py @@ -0,0 +1,375 @@ +##################################### +# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter +# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt +# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters +# b) connecting via ssh to a testbench to perform real-world measurement +import logging +import os +import time +from functools import partial + +import GPy +import gym +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pymongo import MongoClient +from stochastic.processes import VasicekProcess + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.rewards import Reward +# from experiments.model_validation.execution.monte_carlo_runner import MonteCarloRunner +from experiments.hp_tune.execution.reset_runner import MonteCarloRunner +from experiments.hp_tune.execution.runner import Runner +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +show_plots = False +balanced_load = False +save_results = True +PIPI = True + +num_average = 25 +max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000] + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +# Files saves results and resulting plots to the folder saves_VI_control_safeopt in the current directory +current_directory = os.getcwd() +folder_name = 'Pipi_new_testcase_opt_4D_reset2' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +np.random.seed(1) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +max_episode_steps = 99999#10000 # number of simulation steps per episode +num_episodes = 100 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +v_DC = 600 # DC-link voltage / V; will be set as model parameter in the FMU +nomFreq = 60 # nominal grid frequency / Hz +nomVoltPeak = 169.7 # 230 * 1.414 # nominal grid voltage / V +iLimit = 16 # inverter current limit / A +iNominal = 12 # nominal inverter current / A +vNominal = 190 # nominal inverter current / A +vLimit = vNominal * 1.5 # inverter current limit / A +funnelFactor = 0.02 +vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor]) +mu = 400 # factor for barrier function (see below) +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + + +class Recorder: + + def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ): + self.client = MongoClient(URI) + self.db = self.client[database_name] + + def save_to_mongodb(self, col: str = ' trails', data=None): + trial_coll = self.db[col] # get collection named col + if data is None: + raise ValueError('No data given to store in database!') + trial_coll.insert_one(data) + + +rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0, + use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +# Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale +# for both of them +if PIPI: + # bounds = [(0.001, 0.07), (2, 150), (0.000, 0.045), (4, 450)] + # lengthscale = [0.005, 25., 0.008, 150] # .003, 50.] + bounds = [(0.001, 0.07), (2, 150), (0.000, 0.05), (4, 600)] + lengthscale = [0.01, 35., 0.01, 175] # .003, 50.] + mutable_params = dict(currentP=MutableFloat(0.04), currentI=MutableFloat(11.8), + voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12)) + current_dqp_iparams = PI_params(kP=mutable_params['currentP'], kI=mutable_params['currentI'], + limits=(-1, 1)) # Best set from paper III-D + +else: + bounds = [(0.000, 0.045), (4, 450)] + lengthscale = [0.01, 150] # [0.003, 50] + mutable_params = dict(voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12)) # 300Hz + kp_c = 0.04 + ki_c = 11.8 # 11.8 + current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) + +# The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times +# the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as +# unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) +# parameter set +safe_threshold = 0 +j_min = -50000 # -5 # 15000? # cal min allowed performance + +# The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop +# expanding points eventually. +# The following variable is multiplied with the first performance of the initial set by the factor below: +explore_threshold = -200000 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# Choose Kp and Ki for the current and voltage controller as mutable parameters + +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-iLimit, iLimit)) + +# Current controller values + +# Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the +# filter and the nominal frequency +# Droop controller used to calculate the virtual frequency drop due to load changes +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) + +# Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the +# filter and the nominal voltage +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +# Define a voltage forming inverter using the PIPI and droop parameters from above + +# Controller with observer +# ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param, +# observer=[Lueneberger(*params) for params in +# repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample, +# name='master') + +# Controller without observer +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +##################################### +# Definition of the optimization agent +# The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example +# Arguments described above +# History is used to store results +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +# plant + +# toDo: shift this to net?! +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 + +R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + +rand_load_train = RandomLoad(max_episode_steps, net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + +cb = CallbackList() +# set initial = None to reset load random in range of bounds +cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) +cb.append(rand_load_train.reset) + +plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + +rand_load_test = RandomLoad(max_episode_steps, net.ts, gen, + load_curve=pd.read_pickle( + 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + +def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + +env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'), + # color=[['b', 'r', 'g'], ['b', 'r', 'g']], + # style=[['-*'], ['--*']] + # ), + # PlotTmpl([[f'master.m{i}' for i in 'dq0']], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$', + # filename='Sim_m_dq0') + # ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + log_level=logging.INFO, + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor1.R'), + # 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor2.R'), + # 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, col='r_load.resistor3.R'), + 'r_load.resistor1.R': rand_load_train.random_load_step, + 'r_load.resistor2.R': rand_load_train.random_load_step, + 'r_load.resistor3.R': rand_load_train.random_load_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstepWIN.fmu', + history=FullHistory(), + on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + +runner = MonteCarloRunner(agent, env) +runner.run(num_episodes, n_mc=n_MC, visualise=True, # prepare_mc_experiment=reset_loads, + return_gradient_extend=False) + +df_len = pd.DataFrame({'lengthscale': lengthscale, + 'bounds': bounds, + 'balanced_load': balanced_load, + 'barrier_param_mu': mu, + 'J_min': j_min}) + +if save_results: + agent.history.df.to_csv(save_folder + '/_result.csv') + df_len.to_csv(save_folder + '/_params.csv') +if not PIPI: + best_agent_plt = runner.run_data['last_agent_plt'] + ax = best_agent_plt.axes[0] + ax.grid(which='both') + ax.set_axisbelow(True) + + agent.params.reset() + ax.set_ylabel(r'$K_\mathrm{i}\,/\,\mathrm{(AV^{-1}s^{-1})}$') + ax.set_xlabel(r'$K_\mathrm{p}\,/\,\mathrm{(AV^{-1})}$') + ax.get_figure().axes[1].set_ylabel(r'$J$') + plt.title('Lengthscale = {}; balanced = '.format(lengthscale, balanced_load)) + # ax.plot([0.01, 0.01], [0, 250], 'k') + # ax.plot([mutable_params['currentP'].val, mutable_params['currentP'].val], bounds[1], 'k-', zorder=1, + # lw=4, + # alpha=.5) + best_agent_plt.show() + if save_results: + best_agent_plt.savefig(save_folder + '/_agent_plt.pdf') + #best_agent_plt.savefig(save_folder + '/_agent_plt.pgf') + agent.history.df.to_csv(save_folder + '/_result.csv') + +print('\n Experiment finished with best set: \n\n {}'.format(agent.history.df.round({'J': 4, 'Params': 4}))) +print('\n Experiment finished with best set: \n') +print('\n Current-Ki&Kp and voltage-Ki&Kp = {}'.format( + agent.history.df.at[np.argmax(agent.history.df['J']), 'Params'])) +print(' Resulting in a performance of J = {}'.format(np.max(agent.history.df['J']))) +print('\n\nBest experiment results are plotted in the following:') diff --git a/experiments/hp_tune/examples/Stoch_load_test.py b/experiments/hp_tune/examples/Stoch_load_test.py new file mode 100644 index 00000000..a1eaa3f4 --- /dev/null +++ b/experiments/hp_tune/examples/Stoch_load_test.py @@ -0,0 +1,46 @@ +from functools import partial + +import gym +import matplotlib.pyplot as plt +import pandas as pd +from stochastic.processes import VasicekProcess + +from experiments.hp_tune.env.random_load import RandomLoad +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import RandProcess + +load = 55 # 28 +upper_bound_load = 200 +lower_bound_load = -10 +net = Network.load('net/net_vctrl_single_inv.yaml') +max_episode_steps = 10000 # int(2 / net.ts) + +if __name__ == '__main__': + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=200, mean=load), initial=load, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load = RandomLoad(max_episode_steps, net.ts, gen, bounds=(14, 200), bounds_std=(2, 0)) + + R_load = [] + t_vec = [] + t = 0 + + for ii in range(2000): + # if ii % 1000 == 0: + # gen.reset() + + R_load.append(rand_load.random_load_step(t)) + + t += net.ts + + t_vec.append(t) + + plt.plot(t_vec, R_load) + # plt.ylim([5,20]) + plt.show() + + df = pd.DataFrame(R_load) + + hist = df.hist(bins=100) + plt.show() diff --git a/experiments/hp_tune/examples/__init__.py b/experiments/hp_tune/examples/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/experiments/hp_tune/examples/action_noise_example.py b/experiments/hp_tune/examples/action_noise_example.py new file mode 100644 index 00000000..74733cd1 --- /dev/null +++ b/experiments/hp_tune/examples/action_noise_example.py @@ -0,0 +1,51 @@ +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +import numpy as np +import matplotlib.pyplot as plt + +from experiments.hp_tune.util.action_noise_wrapper import myOrnsteinUhlenbeckActionNoise + +noise_var = 2. +noise_theta = 25 # stiffness of OU + +action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(3), theta=noise_theta * np.ones(3), + sigma=noise_var * np.ones(3), dt=1e-4) + +action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=1000, sigma_min=np.zeros(3), mean=np.zeros(3), + theta=noise_theta * np.ones(3), + sigma=noise_var * np.ones(3), dt=1e-4) + +noise = np.zeros([3, 1000]) +noise2 = np.zeros([3, 1000]) +noise3 = np.zeros([3, 1000]) + +for i in range(1000): + noise[:, i] = action_noise.__call__() + +action_noise.reset() # does not reset the noise reduction! Reduction not per episode but per learing, since action noise +# is redifiend then, no reset of annealing needed +for i in range(1000): + noise2[:, i] = action_noise.__call__() + +action_noise3 = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=1000, sigma_min=np.zeros(3), mean=np.zeros(3), + theta=noise_theta * np.ones(3), + sigma=noise_var * np.ones(3), dt=1e-4) +for i in range(1000): + noise3[:, i] = action_noise3.__call__() + +plt.plot(noise[0, :]) +plt.plot(noise[1, :]) +plt.plot(noise[2, :]) +plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}') +plt.show() + +plt.plot(noise2[0, :]) +plt.plot(noise2[1, :]) +plt.plot(noise2[2, :]) +plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}') +plt.show() + +plt.plot(noise3[0, :]) +plt.plot(noise3[1, :]) +plt.plot(noise3[2, :]) +plt.title(f'Stiffness theta = {noise_theta}, Varianz = {noise_var}') +plt.show() diff --git a/experiments/hp_tune/examples/critic_gamma_investigation.py b/experiments/hp_tune/examples/critic_gamma_investigation.py new file mode 100644 index 00000000..4a7eb3b6 --- /dev/null +++ b/experiments/hp_tune/examples/critic_gamma_investigation.py @@ -0,0 +1,294 @@ +import time +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from pymongo import MongoClient +from stable_baselines3 import DDPG +from stable_baselines3.common.callbacks import BaseCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.type_aliases import GymStepReturn + +from experiments.hp_tune.agents.my_ddpg import myDDPG +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net, folder_name, max_episode_steps +from experiments.hp_tune.util.record_env import RecordEnvCallback +from openmodelica_microgrid_gym.env import PlotTmpl + +np.random.seed(0) + +# toDo: what to store: +""" +Alle importieren vom Recorder der in DB speichert und interagieren an den richtungen stellen mit dem env/agent... + +after training: -> like: SaveOnBestTrainingRewardCallback(BaseCallback): after training + hyperopt-data + weights + model / net-architecture + +Each step: -> StepRecorder (ggf. StepMonitor?) + training_reward + messdaten? (aus der net.yaml die outs?) + + training_return -> if episode done: store return(-> sollte der Monitor kennen) + +config +skriptname +start- und endzeit stempel +Computername +Architektur des Netzes (mit model.to_json() ) +Gewichte des Netzes (mit model.get_layer('layer_name').weights) +Prädiktion (für jede Zielgröße eine längere Liste) +Testset (profilnummern von den messschrieben die prädiziert wurden) + +""" + + +class Recorder: + + def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ): + self.client = MongoClient(URI) + self.db = self.client[database_name] + + def save_to_mongodb(self, col: str = ' trails', data=None): + trial_coll = self.db[col] + if data is None: + raise ValueError('No data given to store in database!') + trial_coll.insert_one(data) + + +class StepRecorder(Monitor): + + def __init__(self, env): + super().__init__(env) + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + observation, reward, done, info = super().step(action) + # print(reward) + + # hier vll noch die Messung loggen? aus der obs die richtigen suchen? wie figure out die augmented states? + + return observation, reward, done, info + + +class TrainRecorder(BaseCallback): + + def __init__(self, verbose=1): + super(TrainRecorder, self).__init__(verbose) + + def _on_training_end(self) -> None: + """ + This event is triggered before exiting the `learn()` method. + """ + # asd = 1 + # ads = 2 + pass + + def _on_step(self) -> bool: + # asd = 1 + return True + + def _on_rollout_end(self) -> None: + # asd = 1 + pass + + +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size, + actor_hidden_size, critic_hidden_size, noise_var, noise_theta, error_exponent, n_trail): + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent) + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + fig.savefig( + f'{folder_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{folder_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun, + abort_reward=-(1 - gamma), + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + # on_episode_reset_callback=cb.fire # needed? + ) + + env = StepRecorder(env) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size, + critic_hidden_size, + critic_hidden_size])) + + callback = TrainRecorder() + + # instead of new agent load trained one and train on + # toDo: load optuna data from model + + model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=5000, learning_starts=100, + batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + model.actor.mu._modules['0'].weight.data = model.actor.mu._modules['0'].weight.data * weight_scale + model.actor.mu._modules['2'].weight.data = model.actor.mu._modules['2'].weight.data * weight_scale + model.actor_target.mu._modules['0'].weight.data = model.actor_target.mu._modules['0'].weight.data * weight_scale + model.actor_target.mu._modules['2'].weight.data = model.actor_target.mu._modules['2'].weight.data * weight_scale + + # todo: instead /here? store reward per step?! + plot_callback = EveryNTimesteps(n_steps=1000, callback=RecordEnvCallback(env, model, max_episode_steps)) + model.learn(total_timesteps=10000, callback=[callback, plot_callback]) + # model.learn(total_timesteps=1000, callback=callback) + + plt.plot(model.critic_loss_batch_mean) + plt.ylabel('Critic_loss (mean per batch)') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + plt.plot(model.actor_loss_batch_mean) + plt.ylabel('Actor_loss (mean per batch)') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + plt.plot(model.critic_estimate_target_diff_mean) + plt.ylabel('sum(Q_estimat - target)/N_batch_size') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + plt.plot(model.current_q_estimates_batch_mean) + plt.ylabel('Q_estimat (mean per batch)') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + plt.plot(model.target_q_batch_mean) + plt.ylabel('target (mean per batch)') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + plt.plot(model.reward_batch_mean) + plt.ylabel('reward (mean per batch)') + plt.xlabel('Training-step') + plt.title(f'gamma = {gamma}') + plt.show() + + monitor_rewards = env.get_episode_rewards() + print(monitor_rewards) + # todo: instead: store model(/weights+bias?) to database + model.save(f'{folder_name}/{n_trail}/model.zip') + + return_sum = 0.0 + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + ) + + obs = env_test.reset() + + # toDo: - Use other Test-episode + # - Rückgabewert = (Summe der üblichen Rewards) / (Anzahl steps Validierung) + (Penalty i.H.v. -1) + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + +learning_rate = 0.00018 # trail.suggest_loguniform("lr", 1e-5, 5e-3) # 0.0002# +gamma = 0 # trail.suggest_loguniform("gamma", 0.1, 0.99) +weight_scale = 0.1 # trail.suggest_loguniform("weight_scale", 5e-4, 1) # 0.005 +batch_size = 150 # trail.suggest_int("batch_size", 32, 1024) # 128 +actor_hidden_size = 100 # trail.suggest_int("actor_hidden_size", 10, 500) # 100 # Using LeakyReLU +critic_hidden_size = 100 # trail.suggest_int("actor_hidden_size", 10, 500) # 100 +n_trail = str(0) +use_gamma_in_rew = 0 +noise_var = 1 # trail.suggest_loguniform("noise_var", 0.01, 10) # 2 +noise_theta = 25 # trail.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + +# toDo: +error_exponent = 0.15 # trail.suggest_loguniform("error_exponent", 0.01, 4) +# alpha_lRelu = trail.suggest_loguniform("alpha_lRelu", 0.0001, 0.5) #0.1 +# memory_interval = 1 +# weigth_regularizer = 0.5 +# memory_lim = 5000 # = buffersize? +# warm_up_steps_actor = 2048 +# warm_up_steps_critic = 1024 # learning starts? +# target_model_update = 1000 + + +episode_return = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size, + actor_hidden_size, critic_hidden_size, noise_var, noise_theta, error_exponent, + n_trail) + +print(episode_return) diff --git a/experiments/hp_tune/examples/ddpg_testcase_eval.py b/experiments/hp_tune/examples/ddpg_testcase_eval.py new file mode 100644 index 00000000..d9b70bb5 --- /dev/null +++ b/experiments/hp_tune/examples/ddpg_testcase_eval.py @@ -0,0 +1,285 @@ +import platform +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +import pandas as pd + +# np.random.seed(0) +from openmodelica_microgrid_gym.util import abc_to_dq0 + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + +num_average = 25 +max_episode_steps_list = [1000, 5000, 10000, 20000, 50000, 100000] + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +# def run_testcase_DDPG(gamma=0.8003175741091463, integrator_weight=0.6618979905182214, +# antiwindup_weight=0.9197062574269099, +# model_path='experiments/hp_tune/trained_models/study_18_run_6462/', +# error_exponent=0.3663140388100587, use_gamma_in_rew=1, n_trail=50000, +# actor_number_layers=2, critic_number_layers=3, +# alpha_relu_actor=0.04768952563400553, +# alpha_relu_critic=0.00019026593928712137 +# ): +gamma = 0.8003175741091463 +integrator_weight = 0.6618979905182214 +antiwindup_weight = 0.9197062574269099 +model_path = 'experiments/hp_tune/trained_models/study_18_run_6462_new/' +error_exponent = 0.3663140388100587 +use_gamma_in_rew = 1 +n_trail = 50000 +actor_number_layers = 2 +critic_number_layers = 3 +alpha_relu_actor = 0.04768952563400553 +alpha_relu_critic = 0.00019026593928712137 + +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + max_episode_steps=max_episode_steps_list[max_eps_steps] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + model = DDPG.load(model_path + f'model.zip') # , env=env_test) + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + v_d = [] + v_q = [] + v_0 = [] + + for step in range(env_test.max_episode_steps): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1] + v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1] + v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1] + + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase) + + v_d.append(v_dq0[0]) + v_q.append(v_dq0[1]) + v_0.append(v_dq0[2]) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + if step % 1000 == 0 and step != 0: + env_test.close() + + obs = env_test.reset() + + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + plt.plot(v_d) + plt.plot(v_q) + plt.plot(v_0) + plt.xlabel("") + plt.ylabel("v_dq0") + plt.title('Test') + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 + +m = np.array(df['Mean']) +s = np.array(df['Std']) +max_episode_steps_list = np.array(df['max_episode_steps_list']) + +plt.plot(max_episode_steps_list, m) +plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() + +# plt.plot(max_episode_steps_list, m) +# plt.fill_between(max_episode_steps_list, m - s, m + s, facecolor='r') +plt.errorbar(max_episode_steps_list, m, s, fmt='-o') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() + +plt.plot(max_episode_steps_list, s) +plt.ylabel('std') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title('DDPG') +plt.show() diff --git a/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py b/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py new file mode 100644 index 00000000..e89b9e2c --- /dev/null +++ b/experiments/hp_tune/examples/experiment_vctrl_single_inv_optuna_standalone.py @@ -0,0 +1,551 @@ +import itertools +import time +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +import optuna +import torch as th +from stable_baselines3 import DDPG +from stable_baselines3.common.callbacks import EveryNTimesteps +from stable_baselines3.common.monitor import Monitor +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.type_aliases import GymStepReturn + +from experiments.hp_tune.agents.my_ddpg import myDDPG +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net, folder_name +from experiments.hp_tune.util.action_noise_wrapper import myOrnsteinUhlenbeckActionNoise +from experiments.hp_tune.util.record_env import RecordEnvCallback +from experiments.hp_tune.util.recorder import Recorder +from experiments.hp_tune.util.training_recorder import TrainRecorder +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc + +np.random.seed(0) + +print('!!!!!!!!!!!!!') +print('Old examplefile for standalone!') +print('Still needs to be refactored using config -> is_dq0,...') +print('Better use hp_tune + sqlite if local wanted') +print('Here some examples for features in abc....') + +number_learning_steps1 = 500000 +number_plotting_steps = 100000 +number_trails = 10 + +params_change = [] + +mongo_recorder = Recorder(database_name=folder_name) + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf, recorder=None, + n_trail=""): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + : + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + #action_abc = dq0_to_abc(action, self.env.net.components[0].phase) + + # clipping? + obs, reward, done, info = super().step(action) + + self._n_training_steps += 1 + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + + # log measurement here? + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + # todo: delta (ref-mess), letzte aktion, beides, delta i_phasor zur stromgrenze + + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + mongo_recorder.save_to_mongodb('Trail_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + self.n_episode += 1 + + # if setpoint in dq: Transform measurement to dq0!!!! + #obs[3:6] = dq0_to_abc(obs[3:6], self.env.net.components[0].phase) + #obs[0:3] = dq0_to_abc(obs[0:3], self.env.net.components[0].phase) + + """ + Feature control error: v_setpoint - v_mess + """ + error = obs[6:9] - obs[3:6] + + """ + Feature delta to current limit + """ + delta_i_lim_i_phasor = 1 - self.i_phasor + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, delta_i_lim_i_phasor) + + + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_action) + + # add sin/cos of phase to obs + obs = np.append(obs, 0.1*np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, 0.1*np.cos(self.env.net.components[0].phase)) + obs = np.append(obs, (self.env.net.components[0].phase) / (2 * np.pi)) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + obs = super().reset() + self._n_training_steps = 0 + + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = dq0_to_abc(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = dq0_to_abc(obs[0:3], self.env.net.components[0].phase) + """ + Feature control error: v_setpoint - v_mess + """ + error = obs[6:9] - obs[3:6] + + """ + Feature delta to current limit + """ + delta_i_lim_i_phasor = 1 - self.i_phasor + + obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, delta_i_lim_i_phasor) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_action) + + # add sin/cos of phase to obs + obs = np.append(obs, 0.1 * np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, 0.1 * np.cos(self.env.net.components[0].phase)) + obs = np.append(obs, (self.env.net.components[0].phase)/(2*np.pi)) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, + learning_starts, tau, number_learning_steps, activation_function, n_trail): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + fig.savefig( + f'{folder_name}/{n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + ts = time.gmtime() + fig.savefig( + f'{folder_name}/{n_trail}/Inductor_currents{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{folder_name}/{n_trail}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_include_current, + # reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + + if activation_function == "LeakyReLU": + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[ + critic_hidden_size] * critic_number_layers)) + + if activation_function == "Tanh": + policy_kwargs = dict(activation_fn=th.nn.Tanh, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers)) + + callback = TrainRecorder() + + # model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(1, "episode"), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + count = 0 + + for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # Plotting tests during Training (no training in here!) + plot_callback = EveryNTimesteps(n_steps=number_plotting_steps, + callback=RecordEnvCallback(env, model, 1000, mongo_recorder, + n_trail)) + model.learn(total_timesteps=number_learning_steps, callback=[callback, plot_callback]) + + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Sum_eps_reward": env.get_episode_rewards() + } + + mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, train_data) + + model.save(f'{folder_name}/{n_trail}/model.zip') + + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_include_current, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + )], + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11) + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "Phase": env_test.phase} + + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + +def objective(trail): + number_learning_steps = number_learning_steps1 # trail.suggest_int("number_learning_steps", 1000, 1000000) + + learning_rate = trail.suggest_loguniform("lr", 1e-9, 1e-3) # 0.0002# + gamma = trail.suggest_loguniform("gamma", 0.6, 0.99) + weight_scale = 0.05 # trail.suggest_loguniform("weight_scale", 5e-4, 0.1) # 0.005 + + bias_scale = 0.05 # trail.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 + alpha_relu_actor = 0.1 # trail.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 + alpha_relu_critic = 0.1 # trail.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + + batch_size = 1024 # trail.suggest_int("batch_size", 32, 1024) # 128 + buffer_size = int(1e6) # trail.suggest_int("buffer_size", 10, 20000) # 128 + + activation_function = trail.suggest_categorical("activation_functions", ["LeakyReLU", "Tanh"]) + # activation_function = trail.suggest_categorical('activation_functions', ['linear', 'poly', 'rbf']) + + actor_hidden_size = trail.suggest_int("actor_hidden_size", 10, 500) # 100 # Using LeakyReLU + actor_number_layers = trail.suggest_int("actor_number_layers", 1, 3) + + critic_hidden_size = trail.suggest_int("critic_hidden_size", 10, 600) # 100 + critic_number_layers = trail.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trail.number) + use_gamma_in_rew = 1 + noise_var = trail.suggest_loguniform("noise_var", 0.01, 4) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = trail.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + number_learning_steps) + noise_theta = trail.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # trail.suggest_loguniform("error_exponent", 0.01, 0.5) + + training_episode_length = 2000 # trail.suggest_int("training_episode_length", 200, 5000) # 128 + learning_starts = 0.32 # trail.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = 0.005 # trail.suggest_loguniform("tau", 0.0001, 0.2) # 2 + + trail_config_mongo = {"Name": "Config"} + trail_config_mongo.update(trail.params) + mongo_recorder.save_to_mongodb('Trail_number_' + n_trail, trail_config_mongo) + + return experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, + learning_starts, tau, number_learning_steps, activation_function, n_trail) + + +# for gamma grid search: +# gamma_list = list(itertools.chain(*[[0.001]*5, [0.25]*5, [0.5]*5, [0.75]*5, [0.99]*5])) +# search_space = {'gamma': gamma_list} + +# number_learning_steps_list = list(itertools.chain(*[[100000] * 3, [300000] * 3, [600000] * 3, [1000000] * 3])) +# number_learning_steps_list = list(itertools.chain(*[[2000] * 3, [30000] * 3, [60000] * 3, [100000] * 3])) +# search_space = {'number_learning_steps': number_learning_steps_list} + +# toDo: postgresql instead of sqlite +study = optuna.create_study(study_name=folder_name, + direction='maximize', + storage=f'sqlite:///optuna_sqlite.sqlite', + load_if_exists=True, + # sampler=optuna.samplers.GridSampler(search_space) + ) + +study.optimize(objective, n_trials=number_trails, n_jobs=1) diff --git a/experiments/hp_tune/examples/generate_testcases.py b/experiments/hp_tune/examples/generate_testcases.py new file mode 100644 index 00000000..4267bd07 --- /dev/null +++ b/experiments/hp_tune/examples/generate_testcases.py @@ -0,0 +1,280 @@ +from functools import partial +import numpy as np +import pandas as pd +import gym +import matplotlib.pyplot as plt +from stochastic.processes import VasicekProcess +from tqdm import tqdm + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.vctrl_single_inv import CallbackList +from experiments.hp_tune.util.config import cfg +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import RandProcess + +# load = 55 # 28 +# net = Network.load('net/net_vctrl_single_inv.yaml') +# max_episode_steps = int(2 / net.ts) + + +# Simulation definitions +if not cfg['is_dq0']: + # load net using abc reference values + net = Network.load('net/net_vctrl_single_inv.yaml') +else: + # load net using dq0 reference values + net = Network.load('net/net_vctrl_single_inv_dq0.yaml') + +# set high to not terminate env! Termination should be done in wrapper by env after episode-length-HP +max_episode_steps = 10000 # net.max_episode_steps # number of simulation steps per episode + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC + +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +# R = 40 # nomVoltPeak / 7.5 # / Ohm +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + +""" + Tescases need to have: + - Full load + - (nearly) No load + - Step up/down + - Drift up/down +1 second, start at nominal power +""" +time_to_nomPower = 0.1 +time_nomPower_drift = 0.32 +time_loadshading = 0.587 +time_power_ramp_up = 0.741 +time_power_ramp_down = 0.985 +time_power_Ramp_stop = 1.3 +time_drift_down2 = 1.52 +time_step_up2 = 1.66 +time_drift_down3 = 1.72 + +R_load = [] + + +def load_step_deterministic(t): + if -2 < t <= 0.1: + return 100.0 + if 0.1 < t <= 0.2: + return 50.0 + if 0.2 < t <= 0.3: + return 100.0 + if 0.3 < t <= 0.4: + return 50.0 + if 0.4 < t <= 0.5: + return 200.0 + if 0.5 < t <= 0.6: + return 50.0 + if 0.7 < t <= 0.7: + return 14.0 + if 0.7 < t <= 0.8: + return 200.0 + else: + return 14 + + +def load_step(t): + """ + Doubles the load parameters + :param t: + :param gain: device parameter + :return: Dictionary with load parameters + """ + # Defines a load step after 0.01 s + if time_to_nomPower < t <= time_to_nomPower + net.ts: + # step to p_nom + gen.proc.mean = 14 + gen.reserve = 14 + + elif time_nomPower_drift < t <= time_nomPower_drift + net.ts: + # drift + gen.proc.mean = 40 + gen.proc.speed = 40 + # gen.reserve = 40 + + + elif time_loadshading < t <= time_loadshading + net.ts: + # loadshading + gen.proc.mean = upper_bound_load + gen.reserve = upper_bound_load + gen.proc.vol = 25 + + elif time_power_ramp_up < t <= time_power_ramp_up + net.ts: + # drift + gen.proc.mean = 80 + gen.proc.speed = 10 + # gen.reserve = 40 + + + elif time_power_ramp_down < t <= time_power_ramp_down + net.ts: + gen.proc.mean = 30 + gen.proc.speed = 80 + gen.proc.vol = 10 + # gen.reserve = 40 + + elif time_power_Ramp_stop < t <= time_power_Ramp_stop + net.ts: + gen.proc.mean = 30 + gen.proc.speed = 1000 + gen.proc.vol = 100 + # gen.reserve = 40 + + elif time_drift_down2 < t <= time_drift_down2 + net.ts: + gen.proc.mean = 100 + gen.proc.speed = 100 + # gen.reserve = 40 + + elif time_step_up2 < t <= time_step_up2 + net.ts: + gen.proc.mean = 20 + gen.proc.speed = 1000 + gen.reserve = 20 + + elif time_drift_down3 < t <= time_drift_down3 + net.ts: + gen.proc.mean = 50 + gen.proc.speed = 60 + gen.proc.vol = 2 + # gen.reserve = 40 + + R_load_sample = gen.sample(t) + R_load.append(R_load_sample) + + return R_load_sample + + +R_L_dessca = [0.9383603849247186, 0.01370747099315972, 0.436663566297538, 0.2261261999434656, 0.6485002895059251, + 0.11839290006977787, 0.5463985295511345, 0.7530789892142805, 0.32964132905168747, 0.9944504372633558, + 0.49206163189268537, 0.8079144275290111, 0.3794638365771582, 0.17277441320360834, 0.0658824263134536, + 0.7017633533405172, 0.2733925217683726, 0.5982364701739138, 0.9008471432389613, 0.0034681769531965667, + 0.9875802744573191] + + +class Load_runner(): + def __init__(self): + self.count = 0 + self.cc = 0 + + def load_step_dessca(self, t): + self.cc += 1 + + if self.cc % 500 == 0: + self.count += 1 + + return R_L_dessca[self.count] * (200 - 14) + 14 + + def give_val(self, t): + return R_L_dessca[self.count] * (200 - 14) + 14 + + +Load_runner_dessca = Load_runner() + +if __name__ == '__main__': + # gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=load), initial=load, + # bounds=(lower_bound_load, upper_bound_load)) + + # rand_load = RandomLoad(max_episode_steps, net.ts, gen) + + rand_load = RandomLoad(round(cfg['train_episode_length'] / 10), net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + + rand_load_train = RandomLoad(cfg['train_episode_length'], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + + def xylables(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + # plt.title('Load example drawn from Ornstein-Uhlenbeck process \n- Clipping outside the shown y-range') + plt.legend() + fig.show() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + net=net, + # model_params={'r_load.resistor1.R': rand_load.random_load_step, # For use upper function + # model_params={'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode, + # model_params={'r_load.resistor1.R': rand_load_train.random_load_step, + # # For use upper function + # 'r_load.resistor2.R': rand_load.clipped_step, + # 'r_load.resistor3.R': rand_load.clipped_step}, + # model_params={'r_load.resistor1.R': load_step_deterministic, # for check train-random + # 'r_load.resistor2.R': load_step_deterministic, # loadstep + # 'r_load.resistor3.R': load_step_deterministic}, + # model_params={'r_load.resistor1.R': 25, # for check train-random + # 'r_load.resistor2.R': 25, # loadstep + # 'r_load.resistor3.R': 25}, + model_params={'r_load.resistor1.R': Load_runner_dessca.load_step_dessca, # for check train-random + 'r_load.resistor2.R': Load_runner_dessca.give_val, # loadstep + 'r_load.resistor3.R': Load_runner_dessca.give_val}, + viz_cols=[ + PlotTmpl([f'r_load.resistor{i}.R' for i in '123'], + callback=xylables + )], + model_path='omg_grid/grid.paper_loadstep.fmu', + max_episode_steps=max_episode_steps, + on_episode_reset_callback=cb.fire, ) + + env.reset() + R_load1 = [] + R_load2 = [] + R_load3 = [] + # for _ in range(max_episode_steps): + for current_step in tqdm(range(max_episode_steps), desc='steps', unit='step', leave=False): + env.render() + + obs, rew, done, info = env.step(env.action_space.sample()) # take a random action + + # If env is reset for several loadsteps, store env.df + """ + if current_step % round(cfg['train_episode_length'] / 10) == 0 and current_step != 0: + R_load1.extend(env.history.df['r_load.resistor1.R'].copy().values.tolist()) + R_load2.extend(env.history.df['r_load.resistor2.R'].copy().values.tolist()) + R_load3.extend(env.history.df['r_load.resistor3.R'].copy().values.tolist()) + + # obs = env.reset() + env.on_episode_reset_callback() + """ + if done: + break + env.close() + R_load1.extend(env.history.df['r_load.resistor1.R'].copy().values.tolist()) + R_load2.extend(env.history.df['r_load.resistor2.R'].copy().values.tolist()) + R_load3.extend(env.history.df['r_load.resistor3.R'].copy().values.tolist()) + + df_store = pd.DataFrame(list(zip(R_load1, R_load2, R_load3)), + columns=['r_load.resistor1.R', 'r_load.resistor2.R', 'r_load.resistor3.R']) + + # df_store = env.history.df[['r_load.resistor1.R', 'r_load.resistor2.R', 'r_load.resistor3.R']] + # df_store.to_pickle('R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl') + # df_store.to_pickle('R_load_deterministic_test_case_25_ohm_1_seconds.pkl') + df_store.to_pickle('R_load_dess' + ',322' + ',23,,' + 'ca.pkl') diff --git a/experiments/hp_tune/examples/reset_loop.py b/experiments/hp_tune/examples/reset_loop.py new file mode 100644 index 00000000..88560916 --- /dev/null +++ b/experiments/hp_tune/examples/reset_loop.py @@ -0,0 +1,238 @@ +import time +from typing import Union + +import gym +import matplotlib.pyplot as plt +import numpy as np +from pymongo import MongoClient +from stable_baselines3.common.callbacks import BaseCallback +from stable_baselines3.common.monitor import Monitor +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.type_aliases import GymStepReturn + +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net, folder_name +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.util import abc_to_alpha_beta + +np.random.seed(0) + +# toDo: what to store: +""" +Alle importieren vom Recorder der in DB speichert und interagieren an den richtungen stellen mit dem env/agent... + +after training: -> like: SaveOnBestTrainingRewardCallback(BaseCallback): after training + hyperopt-data + weights + model / net-architecture + +Each step: -> StepRecorder (ggf. StepMonitor?) + training_reward + messdaten? (aus der net.yaml die outs?) + + training_return -> if episode done: store return(-> sollte der Monitor kennen) + +config +skriptname +start- und endzeit stempel +Computername +Architektur des Netzes (mit model.to_json() ) +Gewichte des Netzes (mit model.get_layer('layer_name').weights) +Prädiktion (für jede Zielgröße eine längere Liste) +Testset (profilnummern von den messschrieben die prädiziert wurden) + +""" + + +class Recorder: + + def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ): + self.client = MongoClient(URI) + self.db = self.client[database_name] + + def save_to_mongodb(self, col: str = ' trails', data=None): + trial_coll = self.db[col] # get collection named col + if data is None: + raise ValueError('No data given to store in database!') + trial_coll.insert_one(data) + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + : + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + self.training_episode_length = training_episode_length + self._n_training_steps = 0 + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + + obs, reward, done, info = super().step(action) + + self._n_training_steps += 1 + + if self._n_training_steps % self.training_episode_length == 0: + info["timelimit_reached"] = True + + # log measurement here? + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + feature_diff_imax_iphasor = self.cal_phasor_magnitude(obs[0:3]) + + obs = np.append(obs, feature_diff_imax_iphasor) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + obs = super().reset() + self._n_training_steps = 0 + + # reset timelimit_reached flag + # self.info["timelimit_reached"] = False + + feature_diff_imax_iphasor = self.cal_phasor_magnitude(obs[0:3]) + obs = np.append(obs, feature_diff_imax_iphasor) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. Maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (1 - phasor_mag) - 0.5. -0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + # mapping [0,1+] + # feature_diff_imax_iphasor = 1 - (1 - i_phasor_mag) + + # mapping [-0.5 -,0.5] (can be < 0.5 if phasor exceeds lim) + feature_diff_imax_iphasor = (1 - i_phasor_mag) - 0.5 + + return feature_diff_imax_iphasor + + +class TrainRecorder(BaseCallback): + + def __init__(self, verbose=1): + super(TrainRecorder, self).__init__(verbose) + + def _on_training_end(self) -> None: + """ + This event is triggered before exiting the `learn()` method. + """ + # asd = 1 + # ads = 2 + pass + + def _on_step(self) -> bool: + asd = 1 + + # nach env.step() + + return True + + def _on_rollout_end(self) -> None: + # asd = 1 + pass + + +mongo_recorder = Recorder(database_name=folder_name) + +rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, 1, + use_gamma_normalization=1, error_exponent=1, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + +def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + fig.savefig( + f'{folder_name}/{n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + ts = time.gmtime() + fig.savefig( + f'{folder_name}/{n_trail}/Inductor_currents{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{folder_name}/{n_trail}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + +env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + # reward_fun=rew.rew_fun, + reward_fun=rew.rew_fun_include_current, + # reward_fun=rew.rew_fun, + abort_reward=-(1 - rew.gamma), + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + +env = FeatureWrapper(env, number_of_features=1, training_episode_length=1000) + +while True: + obs = env.reset() + + asd = 1 diff --git a/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py b/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py new file mode 100644 index 00000000..8ca6cacf --- /dev/null +++ b/experiments/hp_tune/examples/single_inverter_voltage_current_control_PIPI.py @@ -0,0 +1,392 @@ +##################################### +# Experiment : Single voltage forming inverter supplying an RL-load via an LC-filter +# Controller: Cascaded PI-PI voltage and current controller gain parameters are optimized by SafeOpt +# a) FMU by OpenModelica and SafeOpt algorithm to find optimal controller parameters +# b) connecting via ssh to a testbench to perform real-world measurement +import time +import logging +import os +from functools import partial +from itertools import tee + +import GPy +import gym +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +from pymongo import MongoClient +from stochastic.processes import VasicekProcess +from tqdm import tqdm + +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.model_validation.env.testbench_voltage_ctrl import TestbenchEnvVoltage +from experiments.model_validation.execution.monte_carlo_runner import MonteCarloRunner +from experiments.model_validation.execution.runner_hardware import RunnerHardwareGradient +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from experiments.hp_tune.env.rewards import Reward +from experiments.model_validation.env.stochastic_components import Load +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +# Plot setting +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 8, # fontsize for x and y labels (was 10) + 'axes.titlesize': 8, + 'font.size': 8, # was 10 + 'legend.fontsize': 8, # was 10 + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'text.usetex': True, + 'figure.figsize': [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } +matplotlib.rcParams.update(params) + +include_simulate = True +show_plots = True +balanced_load = False +do_measurement = False +save_results = True + +# Files saves results and resulting plots to the folder saves_VI_control_safeopt in the current directory +current_directory = os.getcwd() +folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +np.random.seed(1) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +max_episode_steps = 2000 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +v_DC = 600 # DC-link voltage / V; will be set as model parameter in the FMU +nomFreq = 60 # nominal grid frequency / Hz +nomVoltPeak = 169.7 # 230 * 1.414 # nominal grid voltage / V +iLimit = 16 # inverter current limit / A +iNominal = 12 # nominal inverter current / A +vNominal = 190 # nominal inverter current / A +vLimit = vNominal * 1.5 # inverter current limit / A +funnelFactor = 0.02 +vFunnel = np.array([vNominal * funnelFactor, vNominal * funnelFactor, vNominal * funnelFactor]) +mu = 400 # factor for barrier function (see below) +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + + +class Recorder: + + def __init__(self, URI: str = 'mongodb://localhost:27017/', database_name: str = 'OMG', ): + self.client = MongoClient(URI) + self.db = self.client[database_name] + + def save_to_mongodb(self, col: str = ' trails', data=None): + trial_coll = self.db[col] # get collection named col + if data is None: + raise ValueError('No data given to store in database!') + trial_coll.insert_one(data) + + +def run_experiment(): + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma=0, + use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ##################################### + # Definitions for the GP + prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set + noise_var = 0.001 # ** 2 # measurement noise sigma_omega + prior_var = 2 # prior variance of the GP + + # Choose Kp and Ki (current and voltage controller) as mutable parameters (below) and define bounds and lengthscale + # for both of them + bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp + lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + + # The performance should not drop below the safe threshold, which is defined by the factor safe_threshold times + # the initial performance: safe_threshold = 1.2 means: performance measurement for optimization are seen as + # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) + # parameter set + safe_threshold = 0 + j_min = -5 # cal min allowed performance + + # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop + # expanding points eventually. + # The following variable is multiplied with the first performance of the initial set by the factor below: + explore_threshold = 0 + + # Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of + # limit exceeded + abort_reward = 100 * j_min + + # Definition of the kernel + kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + + ##################################### + # Definition of the controllers + # Choose Kp and Ki for the current and voltage controller as mutable parameters + mutable_params = dict(voltageP=MutableFloat(0.002), voltageI=MutableFloat(143)) # 300Hz + # mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz + voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-iLimit, iLimit)) + + kp_c = 0.033 + ki_c = 17.4 # 11.8 + current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values + + # Define the droop parameters for the inverter of the active power Watt/Hz (DroopGain), delta_t (0.005) used for the + # filter and the nominal frequency + # Droop controller used to calculate the virtual frequency drop due to load changes + droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) + + # Define the Q-droop parameters for the inverter of the reactive power VAR/Volt, delta_t (0.002) used for the + # filter and the nominal voltage + qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + + # Define a voltage forming inverter using the PIPI and droop parameters from above + + # Controller with observer + # ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param, + # observer=[Lueneberger(*params) for params in + # repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample, + # name='master') + + # Controller without observer + ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + + ##################################### + # Definition of the optimization agent + # The agent is using the SafeOpt algorithm by F. Berkenkamp (https://arxiv.org/abs/1509.01066) in this example + # Arguments described above + # History is used to store results + agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + + if include_simulate: + + i_lim = net['inverter1'].i_lim # inverter current limit / A + i_nom = net['inverter1'].i_nom # nominal inverter current / A + v_nom = net.v_nom + v_lim = net['inverter1'].v_lim + v_DC = net['inverter1'].v_DC + # plant + + # toDo: shift this to net?! + L_filter = 2.3e-3 # / H + R_filter = 400e-3 # / Ohm + C_filter = 10e-6 # / F + + lower_bound_load = 11 # to allow maximal load that draws i_limit (toDo: let exceed?) + upper_bound_load = 160 # to apply symmetrical load bounds + + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + loadstep_timestep = max_episode_steps / 2 + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + rand_load_test = RandomLoad(max_episode_steps, net.ts, gen, + load_curve=pd.read_pickle( + 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + # PlotTmpl([[f'master.I_hat{i}' for i in 'abc'], [f'r_load.resistor{i}.i' for i in '123'], ], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$i_{\mathrm{o estimate,abc}}\,/\,\mathrm{A}$'), + # color=[['b', 'r', 'g'], ['b', 'r', 'g']], + # style=[['-*'], ['--*']] + # ), + # PlotTmpl([[f'master.m{i}' for i in 'dq0']], + # callback=lambda fig: plotter.update_axes(fig, title='Simulation', + # ylabel='$m_{\mathrm{dq0}}\,/\,\mathrm{}$', + # filename='Sim_m_dq0') + # ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + log_level=logging.INFO, + viz_mode='episode', + max_episode_steps=20000, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R') + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + action_time_delay=1 * undersample + ) + + return_sum = 0.0 + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + + """ + # toDo: - Use other Test-episode + # - Rückgabewert = (Summe der üblichen Rewards) / (Anzahl steps Validierung) + (Penalty i.H.v. -1) + while True: + action = agent.act(obs) + obs, rewards, done, info = env_test.step(action) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + """ + + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + + reward_list = [] + + agent_fig = None + obs = env.reset() + for _ in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + # for _ in tqdm(range(1000), desc='steps', unit='step', leave=False): + + done, r = False, None + + if len(reward_list) > 10000: + asd = 1 + + agent.observe(r, done) + act = agent.act(obs) + obs, r, done, info = env.step(act) + reward_list.append(r) + env.render() + return_sum += r + if r == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + + # if done: + # break + # close env before calling final agent observe to see plots even if agent crashes + _, env_fig = env.close() + agent.observe(r, done) + print(limit_exceeded_in_test) + ret = return_sum / env.max_episode_steps + limit_exceeded_penalty + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": reward_list} + + # Add v-measurements + test_after_training.update({env.viz_col_tmpls[j].vars[i].replace(".", "_"): env.history[ + env.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + + test_after_training.update({env.viz_col_tmpls[2].vars[i].replace(".", "_"): env.history[ + env.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + # va = self.env.env.history[self.env.env.viz_col_tmpls[0].vars[0]].copy() + # mongo_recorder = Recorder(database_name=folder_name) + # mongo_recorder.save_to_mongodb('Trail_number_3', test_after_training) + + return (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + +ret = run_experiment() + +print(ret) diff --git a/experiments/hp_tune/execution/reset_runner.py b/experiments/hp_tune/execution/reset_runner.py new file mode 100644 index 00000000..6d5be09b --- /dev/null +++ b/experiments/hp_tune/execution/reset_runner.py @@ -0,0 +1,168 @@ +import numpy as np +from typing import Dict, Any +from tqdm import tqdm +from openmodelica_microgrid_gym.agents.episodic import EpisodicLearnerAgent +from openmodelica_microgrid_gym.env import ModelicaEnv + + +class MonteCarloRunner: + """ + This class will execute an agent on the environment. + It handles communication between agent and environment and handles the execution of multiple epochs + Additionally to runner, the Monte-Carlo runner has an additional loop to perform n_MC experiments using one + (controller) parameter set before update the (controller) parameters. + Therefore, the agent.observe function is used. + Inside the MC-loop the observe function is called with terminated = False to only update the return. + The return is stored in an array at the end of the MC-loop. + After finishing the MC-loop, the average of the return-array is used to update the (controller) parameters. + Therefore, the agent-observe function is called with terminated = True + """ + + def __init__(self, agent: EpisodicLearnerAgent, env: ModelicaEnv): + """ + + :param agent: Agent that acts on the environment + :param env: Environment tha Agent acts on + """ + self.env = env + self.agent = agent + self.agent.env = env + self.run_data = dict() # type: Dict[str,Any] + """ + Dictionary storing information about the experiment. + + - "best_env_plt": environment best plots + - "best_episode_idx": index of best episode + - "agent_plt": last agent plot + """ + + def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prepare_mc_experiment=lambda: True, + return_gradient_extend: bool = False): + """ + Trains/executes the agent on the environment for a number of epochs + + :param n_episodes: number of epochs to play + :param n_mc: number of Monte-Carlo experiments using the same parameter set before updating the latter + :param visualise: turns on visualization of the environment + :param prepare_mc_experiment: prepares experiment by resetting stochastic components + :param return_gradient_extend: calculates gradient extension for return if return_gradient_extend + """ + t = np.linspace(0, self.env.max_episode_steps * self.env.net.ts, self.env.max_episode_steps + 1) + self.agent.reset() + self.env.history.cols = self.env.history.structured_cols(None) + self.agent.measurement_cols + self.agent.obs_varnames = self.env.history.cols + self.env.measure = self.agent.measure + + initial_performance_mc = np.zeros(n_mc) + performance_mc = np.zeros(n_mc) + + if not visualise: + self.env.viz_mode = None + agent_fig = None + + for i in tqdm(range(n_episodes), desc='episodes', unit='epoch'): + done, r = False, None + np.random.seed(0) + for m in tqdm(range(n_mc), desc='monte_carlo_run', unit='epoch', leave=False): + prepare_mc_experiment() # reset stoch components + + r_vec = np.zeros(self.env.max_episode_steps + 1) + + obs = self.env.reset() + + for p in tqdm(range(self.env.max_episode_steps + 1), desc='steps', unit='step', leave=False): + self.agent.observe(r, False) + act = self.agent.act(obs) + if p % 1000 == 0 and p > 0: + asd = 1 + obs = self.env.reset() + self.agent.controllers['master'].reset() + obs, r, done, info = self.env.step(act) + r_vec[p] = r + self.env.render() + if p == self.env.max_episode_steps: + self.agent.observe(r, False) + + if return_gradient_extend: + w = self.env.history['master.CVVd'].values + w1 = self.env.history['master.CVVq'].values + w2 = self.env.history['master.CVV0'].values + v = self.env.history['master.SPVd'].values + + SP_sattle = (abs(w - v) < v * 0.12).astype(int) # 0.12 -> +-20V setpoint + + dw = np.gradient(w) + dw1 = np.gradient(w1) + dw2 = np.gradient(w2) + + dev_return = (np.mean(abs(SP_sattle * dw)) + np.mean(abs(SP_sattle * dw1)) + np.mean( + abs(SP_sattle * dw2))) + else: + dev_return = 0 + print('NO DEV RETURN!!!!') + + dev_fac = 0.5 # 3 + + print(self.agent.episode_return) + print(dev_return) + + self.agent.performance = (( + self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \ + / (self.agent.initial_performance - self.agent.min_performance) + + if m == 0 and i == 0: + self.agent.initial_performance = self.agent.episode_return - dev_return * dev_fac + self.agent.performance = (( + self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \ + / ( + self.agent.initial_performance - self.agent.min_performance) # instead of perf/initial_perf + self.agent.last_best_performance = self.agent.performance + self.agent.last_worst_performance = self.agent.performance + + self.agent.best_episode = self.agent.history.df.shape[0] + self.agent.last_best_performance = self.agent.performance + self.agent.worst_episode = self.agent.history.df.shape[0] + self.agent.last_worst_performance = self.agent.performance + + self.agent.performance = (( + self.agent.episode_return - dev_return * dev_fac) - self.agent.min_performance) \ + / (self.agent.initial_performance - self.agent.min_performance) + + performance_mc[m] = self.agent.performance + initial_performance_mc[m] = self.agent.episode_return + # set iterations and episode return = 0 + self.agent.prepare_episode() + + break + + _, env_fig = self.env.close() + + # vor break? + if (m == 0 and i == 0): # and self.agent.has_improved: + self.run_data['best_env_plt'] = env_fig + self.run_data['best_episode_idx'] = i + self.agent.last_best_performance = self.agent.performance + + if (m == 0 and i == 0): # and self.agent.has_worsened: + self.run_data['worst_env_plt'] = env_fig + self.run_data['worst_episode_idx'] = i + self.agent.last_worst_performance = self.agent.performance + + if i == 0: + # performance was normalized to first run -> use average of first episode so that J_initial for first + # is 1 + eps_ret = performance_mc * ( + self.agent.initial_performance - self.agent.min_performance) + self.agent.min_performance + self.agent.initial_performance = np.mean(eps_ret) + performance_mc = (eps_ret - self.agent.min_performance) \ + / (self.agent.initial_performance - self.agent.min_performance) + + self.agent.performance = np.mean(performance_mc) + if self.agent.performance > 1: + asd = 1 + self.agent.update_params() + + if visualise: + agent_fig = self.agent.render() + + self.run_data['last_agent_plt'] = agent_fig diff --git a/experiments/hp_tune/execution/runner.py b/experiments/hp_tune/execution/runner.py new file mode 100644 index 00000000..55cf1736 --- /dev/null +++ b/experiments/hp_tune/execution/runner.py @@ -0,0 +1,84 @@ +from typing import Dict, Any, Optional + +from tqdm import tqdm + +from openmodelica_microgrid_gym.agents import Agent +from openmodelica_microgrid_gym.env import ModelicaEnv +from openmodelica_microgrid_gym.execution.callbacks import Callback + + +class Runner: + """ + This class will execute an agent on the environment. + It handles communication between agent and environment and handles the execution of multiple epochs + """ + + def __init__(self, agent: Agent, env: ModelicaEnv, callback: Optional[Callback] = None): + """ + + :param agent: Agent that acts on the environment + :param env: Environment tha Agent acts on + """ + self.env = env + self.agent = agent + self.agent.env = env + self.run_data = dict() # type: Dict[str,Any] + self.callback = callback + """ + Dictionary storing information about the experiment. + + - "best_env_plt": environment best plots + - "best_episode_idx": index of best episode + - "agent_plt": last agent plot + """ + + def run(self, n_episodes: int = 10, visualise: bool = False): + """ + Trains/executes the agent on the environment for a number of epochs + + :param n_episodes: number of epochs to play + :param visualise: turns on visualization of the environment + """ + self.agent.reset() + self.agent.obs_varnames = self.env.history.cols + self.env.history.cols = self.env.history.structured_cols(None) + self.agent.measurement_cols + self.env.measure = self.agent.measure + + reward_list = [] + + agent_fig = None + + for i in tqdm(range(n_episodes), desc='episodes', unit='epoch'): + obs = self.env.reset() + if self.callback is not None: + self.callback.reset() + done, r = False, None + for _ in tqdm(range(self.env.max_episode_steps), desc='steps', unit='step', leave=False): + self.agent.observe(r, done) + act = self.agent.act(obs) + obs, r, done, info = self.env.step(act) + reward_list.append(r) + if self.callback is not None: + self.callback(self.env.history.cols, self.env.history.last()) + if visualise: + self.env.render() + if done: + break + # close env before calling final agent observe to see plots even if agent crashes + _, env_fig = self.env.close() + self.agent.observe(r, done) + + if visualise: + agent_fig = self.agent.render() + + self.run_data['last_agent_plt'] = agent_fig + + if i == 0 or self.agent.has_improved: + self.run_data['best_env_plt'] = env_fig + self.run_data['best_episode_idx'] = i + + if i == 0 or self.agent.has_worsened: + self.run_data['worst_env_plt'] = env_fig + self.run_data['worst_episode_idx'] = i + + return reward_list diff --git a/experiments/hp_tune/experiement_custom_td3.py b/experiments/hp_tune/experiement_custom_td3.py new file mode 100644 index 00000000..24955690 --- /dev/null +++ b/experiments/hp_tune/experiement_custom_td3.py @@ -0,0 +1,273 @@ +import platform +import time +from collections import OrderedDict + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG, TD3 +from experiments.hp_tune.policies.split_actor import CustomTD3Policy +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.configTD3 import cfg +from experiments.hp_tune.util.recorder import Recorder + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +def experiment_fit_Custom_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, + policy_delay, target_policy_noise, target_noise_clip + ): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = TD3(policy='CustomTD3Policy', + env=env, + learning_rate=learning_rate, + buffer_size=buffer_size, + learning_starts=100, + batch_size=batch_size, + tau=tau, + gamma=gamma, + train_freq=(train_freq, train_freq_type), + gradient_steps=-1, + action_noise=action_noise, + optimize_memory_usage=False, + policy_delay=policy_delay, + target_policy_noise=target_policy_noise, + target_noise_clip=target_noise_clip, + tensorboard_log=log_path, + create_eval_env=False, + policy_kwargs=policy_kwargs, + verbose=0, + seed=None, + device="auto", + _init_setup_model=True + ) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + param_dict_scaled = OrderedDict() + param_dict = model.get_parameters()['policy'] + for param in param_dict.items(): + print(param) + param_dict_scaled[param[0]] = param[1] * weight_scale + + new_param_dict = {'policy': param_dict_scaled, + 'actor.optimizer': model.get_parameters()['actor.optimizer'], + 'critic.optimizer': model.get_parameters()['critic.optimizer']} + + # new_param_dict = OrderedDict() + + model.set_parameters(new_param_dict) + + # print('alphaRelu fehlt noch!') + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor.I._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.I._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) diff --git a/experiments/hp_tune/experiment_fit_DDPG_custom.py b/experiments/hp_tune/experiment_fit_DDPG_custom.py new file mode 100644 index 00000000..06c496c3 --- /dev/null +++ b/experiments/hp_tune/experiment_fit_DDPG_custom.py @@ -0,0 +1,272 @@ +import platform +import time +from collections import OrderedDict + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG, TD3 +from experiments.hp_tune.policies.split_actor import CustomTD3Policy +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +def experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail + ): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = DDPG(policy='CustomTD3Policy', + env=env, + learning_rate=learning_rate, + buffer_size=buffer_size, + learning_starts=100, + batch_size=batch_size, + tau=tau, + gamma=gamma, + train_freq=(train_freq, train_freq_type), + gradient_steps=-1, + action_noise=action_noise, + optimize_memory_usage=False, + # policy_delay=policy_delay, + # target_policy_noise=target_policy_noise, + # target_noise_clip=target_noise_clip, + tensorboard_log=log_path, + create_eval_env=False, + policy_kwargs=policy_kwargs, + verbose=0, + seed=None, + device="auto", + _init_setup_model=True + ) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + param_dict_scaled = OrderedDict() + param_dict = model.get_parameters()['policy'] + for param in param_dict.items(): + print(param) + param_dict_scaled[param[0]] = param[1] * weight_scale + + new_param_dict = {'policy': param_dict_scaled, + 'actor.optimizer': model.get_parameters()['actor.optimizer'], + 'critic.optimizer': model.get_parameters()['critic.optimizer']} + + # new_param_dict = OrderedDict() + + model.set_parameters(new_param_dict) + + # print('alphaRelu fehlt noch!') + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor.I._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.I._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + # model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + # model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) diff --git a/experiments/hp_tune/experiment_vctrl_single_inv.py b/experiments/hp_tune/experiment_vctrl_single_inv.py new file mode 100644 index 00000000..03ea17b7 --- /dev/null +++ b/experiments/hp_tune/experiment_vctrl_single_inv.py @@ -0,0 +1,375 @@ +import platform +import time + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \ + FeatureWrapper_I_controller, BaseWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] +Ki_ddpg_combi = 182 + +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, + number_past_vals=0): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'] + ) + + if cfg['env_wrapper'] == 'past': + env = FeatureWrapper_pastVals(env, number_of_features=9 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + elif cfg['env_wrapper'] == 'future': + env = FeatureWrapper_futureVals(env, number_of_features=9, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_future_vals=10) + + elif cfg['env_wrapper'] == 'I-controller': + env = FeatureWrapper_I_controller(env, number_of_features=12 + number_past_vals * 3, # including integrator_sum + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, Ki=Ki_ddpg_combi, + number_past_vals=number_past_vals) + + elif cfg['env_wrapper'] == 'no-I-term': + env = BaseWrapper(env, number_of_features=6 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) # , use_past_vals=True, number_past_vals=30) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=log_path, + # model = myDDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name}/{n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=buffer_size, + # learning_starts=int(learning_starts * training_episode_length), + batch_size=batch_size, tau=tau, gamma=gamma, action_noise=action_noise, + train_freq=(train_freq, train_freq_type), gradient_steps=- 1, + optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + count = 0 + for kk in range(actor_number_layers + 1): + + model.actor.mu._modules[str(count)].weight.data = model.actor.mu._modules[str(count)].weight.data * weight_scale + model.actor_target.mu._modules[str(count)].weight.data = model.actor_target.mu._modules[ + str(count)].weight.data * weight_scale + + model.actor.mu._modules[str(count)].bias.data = model.actor.mu._modules[str(count)].bias.data * bias_scale + model.actor_target.mu._modules[str(count)].bias.data = model.actor.mu._modules[ + str(count)].bias.data * bias_scale + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + if cfg['env_wrapper'] not in ['no-I-term', 'I-controller']: + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'] + ) + + if cfg['env_wrapper'] == 'past': + env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + number_past_vals * 3, + integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, number_past_vals=number_past_vals, + training_episode_length=training_episode_length, ) + elif cfg['env_wrapper'] == 'future': + env_test = FeatureWrapper_futureVals(env_test, number_of_features=9, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, number_future_vals=10) + elif cfg['env_wrapper'] == 'I-controller': + env_test = FeatureWrapper_I_controller(env_test, number_of_features=12 + number_past_vals * 3, + # including integrator_sum + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps, Ki=Ki_ddpg_combi, + number_past_vals=number_past_vals) + + elif cfg['env_wrapper'] == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=6 + number_past_vals * 3, + training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_learing_steps=number_learning_steps, number_past_vals=number_past_vals) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, + penalty_P_weight=0, + training_episode_length=training_episode_length, ) # , use_past_vals=True, number_past_vals=30) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + va = [] + vb = [] + vc = [] + v_ref0 = [] + v_ref1 = [] + v_ref2 = [] + ia = [] + ib = [] + ic = [] + R_load = [] + + for step in range(env_test.max_episode_steps): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + if cfg['env_wrapper'] not in ['no-I-term']: + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + + if step % 1000 == 0 and step != 0: + # if step % cfg['train_episode_length'] == 0 and step != 0: + va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist()) + vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist()) + vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist()) + v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist()) + v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist()) + v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist()) + ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist()) + ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist()) + ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist()) + R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist()) + + env_test.close() + obs = env_test.reset() + phase_list.append(env_test.env.net.components[0].phase) + + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + reward_test_after_training = {"Name": "Test_Reward", + "time": ts, + "Reward": rew_list, + "Return": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "Trial number": n_trail, + "Database name": folder_name, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime())} + + mongo_recorder.save_to_json('Trial_number_' + n_trail, reward_test_after_training) + + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "lc_capacitor1_v": va, + "lc_capacitor2_v": vb, + "lc_capacitor3_v": vc, + "inverter1_v_ref_0": v_ref0, + "inverter1_v_ref_1": v_ref1, + "inverter1_v_ref_2": v_ref2, + "lc_inductor1_i": ia, + "lc_inductor2_i": ib, + "lc_inductor3_i": ic, + "r_load_resistor1_R": R_load, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + """ + In new testenv not used, because then only the last episode is stored + """ + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) diff --git a/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py b/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py new file mode 100644 index 00000000..aa5cd210 --- /dev/null +++ b/experiments/hp_tune/experiment_vctrl_single_inv_TD3.py @@ -0,0 +1,315 @@ +import platform +import time +from collections import OrderedDict + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG, TD3 +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.configTD3 import cfg +from experiments.hp_tune.util.recorder import Recorder + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +def experiment_fit_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, + policy_delay, target_policy_noise, target_noise_clip + ): + if node not in cfg['lea_vpn_nodes']: + # assume we are on pc2 + log_path = f'/scratch/hpc-prf-reinfl/weber/OMG/{folder_name}/{n_trail}/' + else: + log_path = f'{folder_name}/{n_trail}/' + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + n_actions = env.action_space.shape[-1] + noise_var = noise_var # 20#0.2 + noise_theta = noise_theta # 50 # stiffness of OU + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + # action_noise = myOrnsteinUhlenbeckActionNoise(n_steps_annealing=noise_steps_annealing, + # sigma_min=noise_var * np.ones(n_actions) * noise_var_min, + # mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + # sigma=noise_var * np.ones(n_actions), dt=net.ts) + print(optimizer) + if optimizer == 'SGD': + used_optimzer = th.optim.SGD + elif optimizer == 'RMSprop': + used_optimzer = th.optim.RMSprop + # elif optimizer == 'LBFGS': + # needs in step additional argument + # used_optimzer = th.optim.LBFGS + else: + used_optimzer = th.optim.Adam + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size] * actor_number_layers + , qf=[critic_hidden_size] * critic_number_layers), + optimizer_class=used_optimzer) + + model = TD3(policy='MlpPolicy', + env=env, + learning_rate=learning_rate, + buffer_size=buffer_size, + learning_starts=100, + batch_size=batch_size, + tau=tau, + gamma=gamma, + train_freq=(train_freq, train_freq_type), + gradient_steps=-1, + action_noise=action_noise, + optimize_memory_usage=False, + policy_delay=policy_delay, + target_policy_noise=target_policy_noise, + target_noise_clip=target_noise_clip, + tensorboard_log=log_path, + create_eval_env=False, + policy_kwargs=policy_kwargs, + verbose=0, + seed=None, + device="auto", + _init_setup_model=True + ) + + # Adjust network -> maybe change to Costume net like https://stable-baselines3.readthedocs.io/en/master/guide/custom_policy.html + # adn scale weights and biases + param_dict_scaled = OrderedDict() + param_dict = model.get_parameters()['policy'] + for param in param_dict.items(): + # print(param) + param_dict_scaled[param[0]] = param[1] * weight_scale + + new_param_dict = {'policy': param_dict_scaled, + 'actor.optimizer': model.get_parameters()['actor.optimizer'], + 'critic.optimizer': model.get_parameters()['critic.optimizer']} + + # new_param_dict = OrderedDict() + """ + new_param_dict["policy"] = param_dict_scaled + new_param_dict["actor.optimizer"] = model.get_parameters()['actor.optimizer'] + new_param_dict["critic.optimizer"] = model.get_parameters()['critic.optimizer'] + """ + + model.set_parameters(new_param_dict) + + # print('alphaRelu fehlt noch!') + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf1._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(log_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + va = [] + vb = [] + vc = [] + v_ref0 = [] + v_ref1 = [] + v_ref2 = [] + ia = [] + ib = [] + ic = [] + R_load = [] + + for step in range(env_test.max_episode_steps): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + + if step % 1000 == 0 and step != 0: + va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist()) + vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist()) + vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist()) + v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist()) + v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist()) + v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist()) + ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist()) + ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist()) + ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist()) + R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist()) + + env_test.close() + obs = env_test.reset() + phase_list.append(env_test.env.net.components[0].phase) + + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "lc_capacitor1_v": va, + "lc_capacitor2_v": vb, + "lc_capacitor3_v": vc, + "inverter1_v_ref_0": v_ref0, + "inverter1_v_ref_1": v_ref1, + "inverter1_v_ref_2": v_ref2, + "lc_inductor1_i": ia, + "lc_inductor2_i": ib, + "lc_inductor3_i": ic, + "r_load_resistor1_R": R_load, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + """ + In new testenv not used, because then only the last episode is stored + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + """ + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) diff --git a/experiments/hp_tune/hp_tune_ddpg_objective.py b/experiments/hp_tune/hp_tune_ddpg_objective.py new file mode 100644 index 00000000..96f0e688 --- /dev/null +++ b/experiments/hp_tune/hp_tune_ddpg_objective.py @@ -0,0 +1,470 @@ +import json +import os +import time + +import sqlalchemy +from optuna.samplers import TPESampler + + +os.environ['PGOPTIONS'] = '-c statement_timeout=1000' + +import optuna +import platform +import argparse +import sshtunnel +import numpy as np +# np.random.seed(0) +from experiments.hp_tune.util.config import cfg + +from experiments.hp_tune.experiment_vctrl_single_inv import mongo_recorder, experiment_fit_DDPG +from experiments.hp_tune.util.scheduler import linear_schedule + +model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/' + +PC2_LOCAL_PORT2PSQL = 11999 +SERVER_LOCAL_PORT2PSQL = 6432 +DB_NAME = 'optuna' +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 +STUDY_NAME = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' + +node = platform.uname().node + + +def ddpg_objective_fix_params(trial): + file_congfig = open(model_path + + 'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', ) + trial_config = json.load(file_congfig) + + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = trial_config["penalty_I_weight"] # trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = trial_config["penalty_P_weight"] # trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = trial_config[ + "penalty_I_decay_start"] # trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = trial_config[ + "penalty_P_decay_start"] # trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = trial_config["integrator_weight"] # trial.suggest_float("integrator_weight", 1 / 200, 2) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = trial_config["antiwindup_weight"] # trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial_config["learning_rate"] # trial.suggest_loguniform("learning_rate", 1e-6, 1e-1) # 0.0002# + + lr_decay_start = trial_config[ + "lr_decay_start"] # trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial_config["lr_decay_duration"] # trial.suggest_float("lr_decay_duration", 0.00001, + # 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial_config["final_lr"] # trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial_config["gamma"] # trial.suggest_float("gamma", 0.5, 0.9999) + weight_scale = trial_config["weight_scale"] # trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + bias_scale = trial_config["bias_scale"] # trial.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 + alpha_relu_actor = trial_config[ + "alpha_relu_actor"] # trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 + alpha_relu_critic = trial_config[ + "alpha_relu_critic"] # trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + + batch_size = trial_config["batch_size"] # trial.suggest_int("batch_size", 16, 1024) # 128 + buffer_size = trial_config[ + "buffer_size"] # trial.suggest_int("buffer_size", int(1e4), number_learning_steps) # 128 + + actor_hidden_size = trial_config[ + "actor_hidden_size"] # trial.suggest_int("actor_hidden_size", 10, 200) # 100 # Using LeakyReLU + actor_number_layers = trial_config["actor_number_layers"] # trial.suggest_int("actor_number_layers", 1, 4) + + critic_hidden_size = trial_config["critic_hidden_size"] # trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial_config["critic_number_layers"] # trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial_config["noise_var"] # trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial_config["noise_theta"] # trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial_config[ + "training_episode_length"] # trial.suggest_int("training_episode_length", 500, 5000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial_config["tau"] # trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial_config["train_freq"] # trial.suggest_int("train_freq", 1, 15000) + + optimizer = trial_config[ + "optimizer"] # trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + number_past_vals = 5 # trial.suggest_int("number_past_vals", 0, 15) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Info": "AltesTestcase setting mit Integrator-Actor; 50 runs mit bestem HP-setting", + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + +def ddpg_objective(trial): + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = 1 # trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = 1 # trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = 0.5 # trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = 0.5 # trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = 0.1 # trial.suggest_float("integrator_weight", 1 / 200, 0.5) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = 0.1 # trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial.suggest_loguniform("learning_rate", 1e-7, 1e-2) # 0.0002# + + lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001, + 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial.suggest_float("gamma", 0.8, 0.9999) + weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + bias_scale = trial.suggest_loguniform("bias_scale", 0.01, 0.1) # 0.005 + alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.001, 0.5) # 0.005 + alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.001, 0.5) # 0.005 + + batch_size = trial.suggest_int("batch_size", 16, 512) # 128 + buffer_size = trial.suggest_int("buffer_size", int(20e4), number_learning_steps) # 128 + + actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 75) # 100 # Using LeakyReLU + actor_number_layers = trial.suggest_int("actor_number_layers", 1, 3) + + critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial.suggest_int("training_episode_length", 1000, 4000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial.suggest_loguniform("tau", 0.0001, 0.3) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial.suggest_int("train_freq", 1, 5000) + + optimizer = trial.suggest_categorical("optimizer", ["Adam"]) # ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + number_past_vals = trial.suggest_int("number_past_vals", 0, 15) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "DDPG", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Optimierer/ Setting stuff": "DDPG HPO ohne Integrator, alle HPs fuer den I-Anteil " + "wurden daher fix gesetzt. Vgl. zu DDPG+I-Anteil" + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + # loss = experiment_fit_DDPG_custom(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, + alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, number_past_vals) + + return loss + + +def get_storage(url, storage_kws): + successfull = False + retry_counter = 0 + + while not successfull: + try: + storage = optuna.storages.RDBStorage( + url=url, **storage_kws) + successfull = True + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e: + wait_time = np.random.randint(60, 300) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Could not connect, retry in {wait_time} s') + time.sleep(wait_time) + + return storage + + +def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + if node in ('LEA-WORK35', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2MYSQL + else: + port = SERVER_LOCAL_PORT2MYSQL + + storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}') + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2MYSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2MYSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + study = optuna.create_study( + storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}", + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}", + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 100 + + print(n_trials) + print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in cfg['lea_vpn_nodes']: + optuna_path = './optuna/' + else: + # assume we are on not of pc2 -> store to project folder + optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/' + + os.makedirs(optuna_path, exist_ok=True) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f'sqlite:///{optuna_path}optuna.sqlite', + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_psql' + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + # set trial to failed if it seems dead for 20 minutes + storage_kws = dict(engine_kwargs={"pool_timeout": 600}) + if node in ('lea-cyberdyne', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2PSQL + else: + port = SERVER_LOCAL_PORT2PSQL + + storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2PSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2PSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + storage = get_storage(url=f'postgresql://{optuna_creds}' + f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws) + + # storage = optuna.storages.RDBStorage( + # url=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + # **storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +if __name__ == "__main__": + # learning_rate = list(itertools.chain(*[[1e-9] * 1])) + # search_space = {'learning_rate': learning_rate} # , 'number_learning_steps': number_learning_steps} + + TPE_sampler = TPESampler(n_startup_trials=400) # , constant_liar=True) + # TPE_sampler = TPESampler(n_startup_trials=2500) # , constant_liar=True) + + # optuna_optimize_mysql_lea35(ddpg_objective, study_name=STUDY_NAME, sampler=TPE_sampler) + + optuna_optimize_mysql_lea35(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler) + # optuna_optimize_sqlite(ddpg_objective_fix_params, study_name=STUDY_NAME, sampler=TPE_sampler) + + # optuna_optimize(ddpg_objective, study_name=STUDY_NAME, + # sampler=TPE_sampler) #, sampler=optuna.samplers.GridSampler(search_space)) diff --git a/experiments/hp_tune/hp_tune_td3_objective.py b/experiments/hp_tune/hp_tune_td3_objective.py new file mode 100644 index 00000000..d7db77eb --- /dev/null +++ b/experiments/hp_tune/hp_tune_td3_objective.py @@ -0,0 +1,360 @@ +import os +import time + +import sqlalchemy +from optuna.samplers import TPESampler + +from experiments.hp_tune.experiment_vctrl_single_inv_TD3 import experiment_fit_TD3 + +os.environ['PGOPTIONS'] = '-c statement_timeout=1000' + +import optuna +import platform +import argparse +import sshtunnel +import numpy as np +# np.random.seed(0) +from experiments.hp_tune.util.configTD3 import cfg + +# from experiments.hp_tune.experiment_vctrl_single_inv import experiment_fit_DDPG, mongo_recorder +from experiments.hp_tune.experiment_vctrl_single_inv_TD3 import mongo_recorder +from experiments.hp_tune.util.scheduler import linear_schedule + +PC2_LOCAL_PORT2PSQL = 11999 +SERVER_LOCAL_PORT2PSQL = 6432 +DB_NAME = 'optuna' +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 +STUDY_NAME = cfg['STUDY_NAME'] # 'DDPG_MRE_sqlite_PC2' + +node = platform.uname().node + + +def td3_objective(trial): + number_learning_steps = 500000 # trial.suggest_int("number_learning_steps", 100000, 1000000) + # rew_weigth = trial.suggest_float("rew_weigth", 0.1, 5) + # rew_penalty_distribution = trial.suggest_float("antiwindup_weight", 0.1, 5) + penalty_I_weight = trial.suggest_float("penalty_I_weight", 100e-6, 2) + penalty_P_weight = trial.suggest_float("penalty_P_weight", 100e-6, 2) + + penalty_I_decay_start = trial.suggest_float("penalty_I_decay_start", 0.00001, 1) + penalty_P_decay_start = trial.suggest_float("penalty_P_decay_start", 0.00001, 1) + + t_start_penalty_I = int(penalty_I_decay_start * number_learning_steps) + t_start_penalty_P = int(penalty_P_decay_start * number_learning_steps) + + integrator_weight = trial.suggest_float("integrator_weight", 1 / 200, 2) + # integrator_weight = trial.suggest_loguniform("integrator_weight", 1e-6, 1e-0) + # antiwindup_weight = trial.suggest_loguniform("antiwindup_weight", 50e-6, 50e-3) + antiwindup_weight = trial.suggest_float("antiwindup_weight", 0.00001, 1) + + learning_rate = trial.suggest_loguniform("learning_rate", 1e-6, 1e-1) # 0.0002# + + lr_decay_start = trial.suggest_float("lr_decay_start", 0.00001, 1) # 3000 # 0.2 * number_learning_steps? + lr_decay_duration = trial.suggest_float("lr_decay_duration", 0.00001, + 1) # 3000 # 0.2 * number_learning_steps? + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + final_lr = trial.suggest_float("final_lr", 0.00001, 1) + + gamma = trial.suggest_float("gamma", 0.5, 0.9999) + weight_scale = trial.suggest_loguniform("weight_scale", 5e-5, 0.2) # 0.005 + + # bias_scale = trial.suggest_loguniform("bias_scale", 5e-4, 0.1) # 0.005 + alpha_relu_actor = trial.suggest_loguniform("alpha_relu_actor", 0.0001, 0.5) # 0.005 + alpha_relu_critic = trial.suggest_loguniform("alpha_relu_critic", 0.0001, 0.5) # 0.005 + + batch_size = trial.suggest_int("batch_size", 16, 1024) # 128 + buffer_size = trial.suggest_int("buffer_size", int(1e4), number_learning_steps) # 128 + + actor_hidden_size = trial.suggest_int("actor_hidden_size", 10, 250) # 100 # Using LeakyReLU + actor_number_layers = trial.suggest_int("actor_number_layers", 1, 5) + + critic_hidden_size = trial.suggest_int("critic_hidden_size", 10, 300) # 100 + critic_number_layers = trial.suggest_int("critic_number_layers", 1, 4) + + n_trail = str(trial.number) + use_gamma_in_rew = 1 + noise_var = trial.suggest_loguniform("noise_var", 0.01, 1) # 2 + # min var, action noise is reduced to (depends on noise_var) + noise_var_min = 0.0013 # trial.suggest_loguniform("noise_var_min", 0.0000001, 2) + # min var, action noise is reduced to (depends on training_episode_length) + noise_steps_annealing = int( + 0.25 * number_learning_steps) # trail.suggest_int("noise_steps_annealing", int(0.1 * number_learning_steps), + # number_learning_steps) + noise_theta = trial.suggest_loguniform("noise_theta", 1, 50) # 25 # stiffness of OU + error_exponent = 0.5 # trial.suggest_loguniform("error_exponent", 0.001, 4) + + training_episode_length = trial.suggest_int("training_episode_length", 500, 5000) # 128 + # learning_starts = 0.32 # trial.suggest_loguniform("learning_starts", 0.1, 2) # 128 + tau = trial.suggest_loguniform("tau", 0.0001, 0.2) # 2 + + train_freq_type = "step" # trial.suggest_categorical("train_freq_type", ["episode", "step"]) + train_freq = trial.suggest_int("train_freq", 1, 15000) + + optimizer = trial.suggest_categorical("optimizer", ["Adam", "SGD", "RMSprop"]) # , "LBFGS"]) + + learning_rate = linear_schedule(initial_value=learning_rate, final_value=learning_rate * final_lr, + t_start=t_start, + t_end=t_end, + total_timesteps=number_learning_steps) + + policy_delay = trial.suggest_int("policy_delay", 1, 50) + target_policy_noise = trial.suggest_float("target_policy_noise", 0.0001, 1) + target_noise_clip = trial.suggest_float("target_noise_clip", 0.0001, 2) + + trail_config_mongo = {"Name": "Config", + "Node": node, + "Agent": "TD3", + "Number_learning_Steps": number_learning_steps, + "Trial number": n_trail, + "Database name": cfg['STUDY_NAME'], + "Start time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Optimierer/ Setting stuff": "Kein Const_liar_feature, hoehere Grenzen, INtergrator Gewicht als HP," + "Actionspace = 6, da P und I-Anteil seperate ausgänge und im wrapper addiert werden" + "Integratorzustand+used_P_Action (je um einen verzoegert) wird mit als feature uebergeben" + "Penalties fuer action_P und action_P" + "Mehr HPs: trainfreq, batch/buffer_size, a_relu ", + "additionalInfo": "Long Holiday run" + } + trail_config_mongo.update(trial.params) + # mongo_recorder.save_to_mongodb('Trial_number_' + n_trail, trail_config_mongo) + mongo_recorder.save_to_json('Trial_number_' + n_trail, trail_config_mongo) + + loss = experiment_fit_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, alpha_relu_actor, + # loss = experiment_fit_Custom_TD3(learning_rate, gamma, use_gamma_in_rew, weight_scale, bias_scale, alpha_relu_actor, + batch_size, + actor_hidden_size, actor_number_layers, critic_hidden_size, critic_number_layers, + alpha_relu_critic, + noise_var, noise_theta, noise_var_min, noise_steps_annealing, error_exponent, + training_episode_length, buffer_size, # learning_starts, + tau, number_learning_steps, integrator_weight, + integrator_weight * antiwindup_weight, penalty_I_weight, penalty_P_weight, + train_freq_type, train_freq, t_start_penalty_I, t_start_penalty_P, optimizer, + n_trail, + policy_delay, target_policy_noise, target_noise_clip) + + return loss + + +def get_storage(url, storage_kws): + successfull = False + retry_counter = 0 + + while not successfull: + try: + storage = optuna.storages.RDBStorage( + url=url, **storage_kws) + successfull = True + except (sqlalchemy.exc.OperationalError, sqlalchemy.exc.DatabaseError) as e: + wait_time = np.random.randint(60, 300) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Could not connect, retry in {wait_time} s') + time.sleep(wait_time) + + return storage + + +def optuna_optimize_mysql_lea35(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + if node in ('LEA-WORK35', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2MYSQL + else: + port = SERVER_LOCAL_PORT2MYSQL + + storage = get_storage(f'mysql://{optuna_creds}@localhost:{port}/{DB_NAME}') + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2MYSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2MYSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + study = optuna.create_study( + storage=f"mysql+pymysql://{optuna_creds}@127.0.0.1:{tun.local_bind_port}/{DB_NAME}", + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_mysql(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=1, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run - logs to MYSQL but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_mysql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f"mysql://{optuna_creds}@localhost/{DB_NAME}", + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize_sqlite(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + print('Local optimization is run but measurement data is logged to MongoDB on Cyberdyne!') + print('Take care, trail numbers can double if local opt. is run on 2 machines and are stored in ' + 'the same MongoDB Collection!!!') + print('Measurment data is stored to cfg[meas_data_folder] as json, from there it is grept via reporter to ' + 'safely store it to ssh port for cyberdyne connection to mongodb') + + if node in cfg['lea_vpn_nodes']: + optuna_path = './optuna/' + else: + # assume we are on not of pc2 -> store to project folder + optuna_path = '/scratch/hpc-prf-reinfl/weber/OMG/optuna/' + + os.makedirs(optuna_path, exist_ok=True) + + study = optuna.create_study(study_name=study_name, + direction='maximize', + storage=f'sqlite:///{optuna_path}optuna.sqlite', + load_if_exists=True, + sampler=sampler + ) + study.optimize(objective, n_trials=n_trials) + + +def optuna_optimize(objective, sampler=None, study_name='dummy'): + parser = argparse.ArgumentParser(description='Train DDPG Single Inverter V-ctrl') + parser.add_argument('-n', '--n_trials', default=50, required=False, + help='number of trials to execute', type=int) + args = parser.parse_args() + n_trials = args.n_trials or 10 + + print(n_trials) + + if node in ('lea-picard', 'lea-barclay'): + creds_path = 'C:\\Users\\webbah\\Documents\\creds\\optuna_psql.txt' + else: + # read db credentials + creds_path = f'{os.getenv("HOME")}/creds/optuna_psql' + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + # set trial to failed if it seems dead for 20 minutes + storage_kws = dict(engine_kwargs={"pool_timeout": 600}) + if node in ('lea-cyberdyne', 'fe1'): + if node == 'fe1': + port = PC2_LOCAL_PORT2PSQL + else: + port = SERVER_LOCAL_PORT2PSQL + + storage = get_storage(f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', storage_kws=storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + else: + if node in cfg['lea_vpn_nodes']: + # we are in LEA VPN + server_name = 'lea38' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + SERVER_LOCAL_PORT2PSQL)} + else: + # assume we are on a PC2 compute node + server_name = 'fe.pc2.uni-paderborn.de' + tun_cfg = {'remote_bind_address': ('127.0.0.1', + PC2_LOCAL_PORT2PSQL), + 'ssh_username': 'webbah'} + with sshtunnel.open_tunnel(server_name, **tun_cfg) as tun: + + storage = get_storage(url=f'postgresql://{optuna_creds}' + f'@localhost:{tun.local_bind_port}/{DB_NAME}', storage_kws=storage_kws) + + # storage = optuna.storages.RDBStorage( + # url=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + # **storage_kws) + + study = optuna.create_study( + storage=storage, + # storage=f'postgresql://{optuna_creds}' + # f'@localhost:{tun.local_bind_port}/{DB_NAME}', + sampler=sampler, study_name=study_name, + load_if_exists=True, + direction='maximize') + study.optimize(objective, n_trials=n_trials) + + +if __name__ == "__main__": + # learning_rate = list(itertools.chain(*[[1e-9] * 1])) + # search_space = {'learning_rate': learning_rate} # , 'number_learning_steps': number_learning_steps} + + TPE_sampler = TPESampler(n_startup_trials=2500) # , constant_liar=True) + + optuna_optimize_mysql_lea35(td3_objective, study_name=STUDY_NAME, sampler=TPE_sampler) + + # optuna_optimize_sqlite(td3_objective, study_name=STUDY_NAME, sampler=TPE_sampler) + # optuna_optimize(td3_objective, study_name=STUDY_NAME, + # sampler=TPE_sampler) #, sampler=optuna.samplers.GridSampler(search_space)) diff --git a/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py b/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py new file mode 100644 index 00000000..12a1ccdd --- /dev/null +++ b/experiments/hp_tune/pc2_run_allowed_workers_once_ddpg.py @@ -0,0 +1,81 @@ +"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass +the allowed cpu core limit""" + +import os +import pathlib +import time +import uuid + +from experiments.hp_tune.util import pc2 +# config +from experiments.hp_tune.util.config import cfg + +USER = os.getenv('USER') +ALLOWED_MAX_CPU_CORES = 512 + +# resources request +job_resource_plan = { + 'duration': 24, # in hours + 'ncpus': 2, + 'memory': 12, + 'vmemory': 16, +} + +MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus'] +STUDY_NAME = cfg['STUDY_NAME'] +NUMBER_INTERATIONS = 1 + +def main(): + print('Start slavedriving loop..') + print('Will start MAX_WORKERS and terminate.') + old_ccsinfo_counts = None + for _ in range(MAX_WORKERS): + job_files_path = pathlib.Path( + f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}") # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet + job_files_path.mkdir(parents=False, exist_ok=True) + + # read ccsinfo + ccsinfo = pc2.get_ccsinfo(USER) + ccsinfo_state_counts = ccsinfo.state.value_counts() + ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0) + ccs_planned = ccsinfo_state_counts.get('PLANNED', 0) + total_busy = ccs_running + ccs_planned + if not ccsinfo_state_counts.equals(old_ccsinfo_counts): + print("\n## ccs summary ##") + print(f"Running: {ccs_running}") + print(f"Planned : {ccs_planned}") + print(f"Total busy workers (ccs): {total_busy}") + + if total_busy < MAX_WORKERS: + # call workers to work + # n_workers = MAX_WORKERS - total_busy + # print(f'Start {n_workers} workers:') + # for w in range(n_workers): + jobid = str(uuid.uuid4()).split('-')[0] + cluster = "oculus" + job_name = job_files_path / f"pc2_job_{jobid}.sh" + res_plan = pc2.calculate_resources(**job_resource_plan) + + execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \ + "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_ddpg_objective.py -n 3" + + print(f'Start job {jobid} ..') + pc2.create_n_run_script( + job_name, + pc2.build_shell_script_lines(job_files_path, cluster, + job_name, res_plan, + execution_line), + dry=False) + + print('sleep 10s for better DB interaction', end='\r') + time.sleep(10) + + old_ccsinfo_counts = ccsinfo_state_counts + + # print('sleep..', end='\r') + # time.sleep(120) + print('Finished, need resatart to schedule again!..', end='\r') + + +if __name__ == '__main__': + main() diff --git a/experiments/hp_tune/pc2_schedule_ddpg.py b/experiments/hp_tune/pc2_schedule_ddpg.py new file mode 100644 index 00000000..d5d691ac --- /dev/null +++ b/experiments/hp_tune/pc2_schedule_ddpg.py @@ -0,0 +1,105 @@ +"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass +the allowed cpu core limit""" + +import os +import pathlib +import uuid +import time + +import optuna +from optuna.samplers import TPESampler + +from experiments.hp_tune.util import pc2 +from experiments.hp_tune.util.config import cfg + +# config +USER = os.getenv('USER') +ALLOWED_MAX_CPU_CORES = 500 # 512 +STUDY_NAME = cfg['STUDY_NAME'] +DB_NAME = 'optuna' +# resources request +job_resource_plan = { + 'duration': 24, # in hours + 'ncpus': 2, + 'memory': 12, + 'vmemory': 16, +} + +MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus'] + +PC2_LOCAL_PORT2MYSQL = 11998 +SERVER_LOCAL_PORT2MYSQL = 3306 + + +def main(): + started_workers = 0 + print('Start slavedriving loop..') + old_ccsinfo_counts = None + while True: + + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study( + storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}', + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME, + load_if_exists=True, + direction='maximize') + + complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]) + print(f'Completed trials in study: {complete_trials}') + if complete_trials > 12000: + print('Maximal completed trials reached - STOPPING') + break + + job_files_path = pathlib.Path( + f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}") # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet + job_files_path.mkdir(parents=False, exist_ok=True) + + # read ccsinfo + ccsinfo = pc2.get_ccsinfo(USER) + ccsinfo_state_counts = ccsinfo.state.value_counts() + ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0) + ccs_planned = ccsinfo_state_counts.get('PLANNED', 0) + total_busy = ccs_running + ccs_planned + if not ccsinfo_state_counts.equals(old_ccsinfo_counts): + print("\n## ccs summary ##") + print(f"Running: {ccs_running}") + print(f"Planned : {ccs_planned}") + print(f"Total busy workers (ccs): {total_busy}") + + if total_busy < MAX_WORKERS: + # call workers to work + n_workers = MAX_WORKERS - total_busy + print(f'Start {n_workers} workers:') + for w in range(n_workers): + started_workers += 1 + jobid = str(uuid.uuid4()).split('-')[0] + cluster = "oculus" + job_name = job_files_path / f"pc2_job_{jobid}.sh" + res_plan = pc2.calculate_resources(**job_resource_plan) + + execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \ + "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_ddpg_objective.py -n 1" + + print(f'Start job {jobid} ..') + pc2.create_n_run_script( + job_name, + pc2.build_shell_script_lines(job_files_path, cluster, + job_name, res_plan, + execution_line), + dry=False) + print('sleep 10s for better DB interaction', end='\r') + time.sleep(10) + + old_ccsinfo_counts = ccsinfo_state_counts + + print('sleep..', end='\r') + time.sleep(300) + + +if __name__ == '__main__': + main() diff --git a/experiments/hp_tune/pc2_schedule_td3.py b/experiments/hp_tune/pc2_schedule_td3.py new file mode 100644 index 00000000..0fb94108 --- /dev/null +++ b/experiments/hp_tune/pc2_schedule_td3.py @@ -0,0 +1,102 @@ +"""Allocate jobs executing a certain cmd endlessly. Make sure not to surpass +the allowed cpu core limit""" + +import os +import pathlib +import uuid +import time + +import optuna +from optuna.samplers import TPESampler + +from experiments.hp_tune.util import pc2 +from experiments.hp_tune.util.configTD3 import cfg + +# config +USER = os.getenv('USER') +ALLOWED_MAX_CPU_CORES = 512 +STUDY_NAME = cfg['STUDY_NAME'] +DB_NAME = 'optuna' +# resources request +job_resource_plan = { + 'duration': 24, # in hours + 'ncpus': 2, + 'memory': 12, + 'vmemory': 16, +} + +MAX_WORKERS = ALLOWED_MAX_CPU_CORES // job_resource_plan['ncpus'] +PC2_LOCAL_PORT2MYSQL = 11998 + +def main(): + started_workers = 0 + print('Start slavedriving loop..') + old_ccsinfo_counts = None + while True: + + creds_path = f'{os.getenv("HOME")}/creds/optuna_mysql' + + with open(creds_path, 'r') as f: + optuna_creds = ':'.join([s.strip(' \n') for s in f.readlines()]) + + study = optuna.create_study( + storage=f'mysql+pymysql://{optuna_creds}@localhost:{PC2_LOCAL_PORT2MYSQL}/{DB_NAME}', + # storage=f'postgresql://{optuna_creds}@localhost:{port}/{DB_NAME}', + sampler=TPESampler(n_startup_trials=2500), study_name=STUDY_NAME, + load_if_exists=True, + direction='maximize') + + complete_trials = len([t for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]) + print(f'Completed trials in study: {complete_trials}') + if complete_trials > 12000: + print('Maximal completed trials reached - STOPPING') + break + + job_files_path = pathlib.Path( + f"/scratch/hpc-prf-reinfl/weber/OMG/ccs_job_files/{STUDY_NAME}") # SCRATCH = $PC2PFS/hpc_....re/OMG_prjecet + job_files_path.mkdir(parents=False, exist_ok=True) + + # read ccsinfo + ccsinfo = pc2.get_ccsinfo(USER) + ccsinfo_state_counts = ccsinfo.state.value_counts() + ccs_running = ccsinfo_state_counts.get('ALLOCATED', 0) + ccs_planned = ccsinfo_state_counts.get('PLANNED', 0) + total_busy = ccs_running + ccs_planned + if not ccsinfo_state_counts.equals(old_ccsinfo_counts): + print("\n## ccs summary ##") + print(f"Running: {ccs_running}") + print(f"Planned : {ccs_planned}") + print(f"Total busy workers (ccs): {total_busy}") + + if total_busy < MAX_WORKERS: + # call workers to work + n_workers = MAX_WORKERS - total_busy + print(f'Start {n_workers} workers:') + for w in range(n_workers): + started_workers += 1 + jobid = str(uuid.uuid4()).split('-')[0] + cluster = "oculus" + job_name = job_files_path / f"pc2_job_{jobid}.sh" + res_plan = pc2.calculate_resources(**job_resource_plan) + + execution_line = "PYTHONPATH=$HOME/openmodelica-microgrid-gym/ " \ + "python $HOME/openmodelica-microgrid-gym/experiments/hp_tune/hp_tune_td3_objective.py -n 1" + + print(f'Start job {jobid} ..') + pc2.create_n_run_script( + job_name, + pc2.build_shell_script_lines(job_files_path, cluster, + job_name, res_plan, + execution_line), + dry=False) + print('sleep 10s for better DB interaction', end='\r') + time.sleep(10) + + old_ccsinfo_counts = ccsinfo_state_counts + + print('sleep..', end='\r') + time.sleep(300) + + +if __name__ == '__main__': + main() diff --git a/experiments/hp_tune/policies/split_actor.py b/experiments/hp_tune/policies/split_actor.py new file mode 100644 index 00000000..42a0ffb4 --- /dev/null +++ b/experiments/hp_tune/policies/split_actor.py @@ -0,0 +1,144 @@ +from typing import Optional, Tuple, List, Type + +import gym +import torch as th +from torch import nn + +from stable_baselines3.common.policies import register_policy, BaseModel +from stable_baselines3.common.preprocessing import get_action_dim +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor +from stable_baselines3.td3.policies import TD3Policy, Actor + + +def mlp(sizes, activation, output_activation=None): + """ + Defines a multi layer perceptron using pytorch layers and activation funtions + """ + layers = [] + for j in range(len(sizes) - 1): + act = activation if j < len(sizes) - 2 else output_activation + if act is not None: + layers += [nn.Linear(sizes[j], sizes[j + 1]), act()] + else: + layers += [nn.Linear(sizes[j], sizes[j + 1])] + # layers.append(nn.Tanh()) + return layers + + +class CustomActor(Actor): + """ + Actor network (policy) for TD3. + """ + + def __init__(self, *args, **kwargs): + super(CustomActor, self).__init__(*args, **kwargs) + # Define custom network with Dropout + # WARNING: it must end with a tanh activation to squash the output + # self.mu = nn.Sequential(*mlp([20, 10, 5, 6], nn.LeakyReLU())) + + # self.mu = nn.Sequential(nn.Linear(kwargs['observation_space'].shape[0], 32), + # kwargs['activation_fn'](negative_slope=0.02), + # nn.Linear(32, 10), + # nn.LeakyReLU(negative_slope=0.02), + # nn.Linear(10, int(kwargs['action_space'].shape[0] / 2))) + # self.I = nn.Sequential(nn.Linear(kwargs['observation_space'].shape[0], 32), + # kwargs['activation_fn'](negative_slope=0.02), + # nn.Linear(32, 10), + # nn.LeakyReLU(negative_slope=0.02), + # nn.Linear(10, int(kwargs['action_space'].shape[0] / 2))) + + self.mu = nn.Sequential(*mlp([kwargs['observation_space'].shape[0], *kwargs['net_arch'], + int(kwargs['action_space'].shape[0] / 2)], + kwargs['activation_fn'], + nn.Tanh)) + + self.I = nn.Sequential(*mlp([kwargs['observation_space'].shape[0], *kwargs['net_arch'], + int(kwargs['action_space'].shape[0] / 2)], + kwargs['activation_fn'], + nn.Tanh)) + + def forward(self, obs: th.Tensor) -> th.Tensor: + # assert deterministic, 'The TD3 actor only outputs deterministic actions' + features = self.extract_features(obs) + return th.cat((self.mu(features), self.I(features)), 1) + + +class CustomContinuousCritic(BaseModel): + """ + Critic network(s) for DDPG/SAC/TD3. + """ + + def __init__( + self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + net_arch: List[int], + features_extractor: nn.Module, + features_dim: int, + activation_fn: Type[nn.Module] = nn.ReLU, + normalize_images: bool = True, + n_critics: int = 2, + share_features_extractor: bool = True, + ): + super().__init__( + observation_space, + action_space, + features_extractor=features_extractor, + normalize_images=normalize_images, + ) + + action_dim = get_action_dim(self.action_space) + + self.share_features_extractor = share_features_extractor + self.n_critics = n_critics + self.q_networks = [] + for idx in range(n_critics): + # q_net = create_mlp(features_dim + action_dim, 1, net_arch, activation_fn) + # Define critic with Dropout here + # q_net = nn.Sequential( nn.Linear(features_dim + action_dim, 32), + # nn.ReLU(), + # nn.Linear(32, 10), + # nn.ReLU(), + # nn.Linear(10, 1) + # ) + + q_net = nn.Sequential(*mlp([features_dim + action_dim, *net_arch, 1], + activation_fn + )) + + self.add_module(f"qf{idx}", q_net) + self.q_networks.append(q_net) + + def forward(self, obs: th.Tensor, actions: th.Tensor) -> Tuple[th.Tensor, ...]: + # Learn the features extractor using the policy loss only + # when the features_extractor is shared with the actor + with th.set_grad_enabled(not self.share_features_extractor): + features = self.extract_features(obs) + qvalue_input = th.cat([features, actions], dim=1) + return tuple(q_net(qvalue_input) for q_net in self.q_networks) + + def q1_forward(self, obs: th.Tensor, actions: th.Tensor) -> th.Tensor: + """ + Only predict the Q-value using the first network. + This allows to reduce computation when all the estimates are not needed + (e.g. when updating the policy in TD3). + """ + with th.no_grad(): + features = self.extract_features(obs) + return self.q_networks[0](th.cat([features, actions], dim=1)) + + +class CustomTD3Policy(TD3Policy): + def __init__(self, *args, **kwargs): + super(CustomTD3Policy, self).__init__(*args, **kwargs) + + def make_actor(self, features_extractor: Optional[BaseFeaturesExtractor] = None) -> CustomActor: + actor_kwargs = self._update_features_extractor(self.actor_kwargs, features_extractor) + return CustomActor(**actor_kwargs).to(self.device) + + def make_critic(self, features_extractor: Optional[BaseFeaturesExtractor] = None) -> CustomContinuousCritic: + critic_kwargs = self._update_features_extractor(self.critic_kwargs, features_extractor) + return CustomContinuousCritic(**critic_kwargs).to(self.device) + + +register_policy("CustomTD3Policy", CustomTD3Policy) diff --git a/experiments/hp_tune/retrain.py b/experiments/hp_tune/retrain.py new file mode 100644 index 00000000..3c36eb02 --- /dev/null +++ b/experiments/hp_tune/retrain.py @@ -0,0 +1,190 @@ +import platform +import time + +import gym +import numpy as np +from stable_baselines3 import DDPG + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# imports net to define reward and executes script to register experiment + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +def retrain_DDPG(learning_rate, gamma, use_gamma_in_rew, + error_exponent, + training_episode_length, + number_learning_steps, integrator_weight, antiwindup_weight, + penalty_I_weight, penalty_P_weight, + t_start_penalty_I, t_start_penalty_P, optimizer, n_trail, model_path): + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) + + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + used_optimzer = optimizer # th.optim.Adam, th.optim.RMSprop, th.optim.SGD + + # policy_kwargs = dict(optimizer_class=used_optimzer) + + model = DDPG.load(model_path + f'model.zip', env=env, tensorboard_log=model_path + n_trail) + # env = model.get_env() + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards(), + "model_path": model_path + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(model_path + f'model_retrained.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + +reward = retrain_DDPG(learning_rate=1e-4, gamma=0.8003175741091463, use_gamma_in_rew=1, + error_exponent=0.3663140388100587, + training_episode_length=3413, + number_learning_steps=200000, + integrator_weight=0.6618979905182214, + antiwindup_weight=0.9197062574269099, + penalty_I_weight=0.7813975187119389, + penalty_P_weight=1.5344102590339561, + t_start_penalty_I=0.9996190838462778, + t_start_penalty_P=0.14935820375506648, + optimizer='Adam', + n_trail='1', + model_path='experiments/hp_tune/trained_models/study_18_run_6462/' + ) + +print(reward) diff --git a/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py b/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py new file mode 100644 index 00000000..5b127f1b --- /dev/null +++ b/experiments/hp_tune/retrain_DDPG_singleLoadstep_per_episode.py @@ -0,0 +1,389 @@ +import json +import platform +import time +from functools import partial + +import gym +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from stochastic.processes import VasicekProcess + +from experiments.hp_tune.env.env_wrapper import FeatureWrapper +from experiments.hp_tune.env.random_load import RandomLoad +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net, CallbackList # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# np.random.seed(0) +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.util import RandProcess + +model_path = 'experiments/hp_tune/trained_models/study_22_run_11534/' + +folder_name = cfg['STUDY_NAME'] + '_retrain' +node = platform.uname().node +file_congfig = open(model_path + + 'PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json', ) +trial_config = json.load(file_congfig) +print('Config-Params:') +print(*trial_config.items(), sep='\n') +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC + + +def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.show() + + +def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + plt.close() + + +def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + # ts = time.gmtime() + # fig.savefig(f'{folder_name + experiment_name}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.show() + + +# plant +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +# R = 40 # nomVoltPeak / 7.5 # / Ohm +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + +def experiment_fit_DDPG(number_learning_steps, + gamma=trial_config['gamma'], n_trail=trial_config['Trial number'], + training_episode_length=trial_config['training_episode_length'], + integrator_weight=trial_config['integrator_weight'], + antiwindup_weight=trial_config['antiwindup_weight'], + penalty_I_weight=trial_config['penalty_I_weight'], + penalty_P_weight=trial_config['penalty_P_weight'], + t_start_penalty_I=trial_config['penalty_I_decay_start'], + t_start_penalty_P=trial_config['penalty_P_decay_start'], + actor_number_layers=trial_config['actor_number_layers'], + alpha_relu_actor=trial_config['alpha_relu_actor'], + critic_number_layers=trial_config['critic_number_layers'], + alpha_relu_critic=trial_config['alpha_relu_critic'], + + ): + rand_load_train = RandomLoad(training_episode_length, net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=1, error_exponent=0.5, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + """ + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v1', + reward_fun=rew.rew_fun_dq0, + max_episode_steps=training_episode_length, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + """ + + env = gym.make('experiments.hp_tune.env:vctrl_single_inv_train-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + max_episode_steps=training_episode_length, + + viz_mode='episode', + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + # max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + # 'r_load.resistor1.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor2.R': partial(rand_load_train.load_step, gain=R), + # 'r_load.resistor3.R': partial(rand_load_train.load_step, gain=R), + 'r_load.resistor1.R': rand_load_train.one_random_loadstep_per_episode, + 'r_load.resistor2.R': rand_load_train.clipped_step, + 'r_load.resistor3.R': rand_load_train.clipped_step, + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_nom, + high=v_nom) if t == -1 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_nom, + high=i_nom) if t == -1 else None, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=cb.fire, + is_normalized=True, + action_time_delay=1 + ) + + env = FeatureWrapper(env, number_of_features=11, training_episode_length=training_episode_length, + recorder=mongo_recorder, n_trail=n_trail, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=gamma, + penalty_I_weight=penalty_I_weight, penalty_P_weight=penalty_P_weight, + t_start_penalty_I=t_start_penalty_I, t_start_penalty_P=t_start_penalty_P, + number_learing_steps=number_learning_steps) + + # todo: Upwnscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + model = DDPG.load(model_path + f'model.zip', env=env, tensorboard_log=model_path) + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + # todo: Downscale actionspace - lessulgy possible? Interaction pytorch... + env.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + # start training + model.learn(total_timesteps=number_learning_steps) + + # Log Train-info data + train_data = {"Name": "After_Training", + "Mean_eps_reward": env.reward_episode_mean, + "Trial number": n_trail, + "Database name": folder_name, + "Sum_eps_reward": env.get_episode_rewards() + } + mongo_recorder.save_to_json('Trial_number_' + n_trail, train_data) + + model.save(model_path + f'model_retrained.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v1', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=11, integrator_weight=integrator_weight, + recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + gamma=1, penalty_I_weight=0, penalty_P_weight=0) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + aP0 = [] + aP1 = [] + aP2 = [] + aI0 = [] + aI1 = [] + aI2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + va = [] + vb = [] + vc = [] + v_ref0 = [] + v_ref1 = [] + v_ref2 = [] + ia = [] + ib = [] + ic = [] + R_load = [] + for step in range(env_test.max_episode_steps): + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + aP0.append(np.float64(action[0])) + aP1.append(np.float64(action[1])) + aP2.append(np.float64(action[2])) + aI0.append(np.float64(action[3])) + aI1.append(np.float64(action[4])) + aI2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + + if step % 1000 == 0 and step != 0: + va.extend(env_test.history[env_test.viz_col_tmpls[0].vars[0]].copy().values.tolist()) + vb.extend(env_test.history[env_test.viz_col_tmpls[0].vars[1]].copy().values.tolist()) + vc.extend(env_test.history[env_test.viz_col_tmpls[0].vars[2]].copy().values.tolist()) + v_ref0.extend(env_test.history[env_test.viz_col_tmpls[0].vars[3]].copy().values.tolist()) + v_ref1.extend(env_test.history[env_test.viz_col_tmpls[0].vars[4]].copy().values.tolist()) + v_ref2.extend(env_test.history[env_test.viz_col_tmpls[0].vars[5]].copy().values.tolist()) + ia.extend(env_test.history[env_test.viz_col_tmpls[1].vars[0]].copy().values.tolist()) + ib.extend(env_test.history[env_test.viz_col_tmpls[1].vars[1]].copy().values.tolist()) + ic.extend(env_test.history[env_test.viz_col_tmpls[1].vars[2]].copy().values.tolist()) + R_load.extend(env_test.history[env_test.viz_col_tmpls[2].vars[1]].copy().values.tolist()) + + env_test.close() + obs = env_test.reset() + phase_list.append(env_test.env.net.components[0].phase) + + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "lc_capacitor1_v": va, + "lc_capacitor2_v": vb, + "lc_capacitor3_v": vc, + "inverter1_v_ref_0": v_ref0, + "inverter1_v_ref_1": v_ref1, + "inverter1_v_ref_2": v_ref2, + "lc_inductor1_i": ia, + "lc_inductor2_i": ib, + "lc_inductor3_i": ic, + "r_load_resistor1_R": R_load, + "ActionP0": aP0, + "ActionP1": aP1, + "ActionP2": aP2, + "ActionI0": aI0, + "ActionI1": aI1, + "ActionI2": aI2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)," + "integrator_zustand(delayed!), genutzte Aktion (P-anteil)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + """ + In new testenv not used, because then only the last episode is stored + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + """ + + mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + print(return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + +experiment_fit_DDPG(10000) diff --git a/experiments/hp_tune/run_testcase.py b/experiments/hp_tune/run_testcase.py new file mode 100644 index 00000000..38bca70e --- /dev/null +++ b/experiments/hp_tune/run_testcase.py @@ -0,0 +1,401 @@ +import platform +import time +from typing import Union + +import gym +import numpy as np +import torch as th +import matplotlib.pyplot as plt +from stable_baselines3 import DDPG +from stable_baselines3.common.monitor import Monitor +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.type_aliases import GymStepReturn + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.recorder import Recorder +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.util import abc_to_alpha_beta, dq0_to_abc, abc_to_dq0 + +# np.random.seed(0) + +folder_name = cfg['STUDY_NAME'] +node = platform.uname().node + +# mongo_recorder = Recorder(database_name=folder_name) +mongo_recorder = Recorder(node=node, + database_name=folder_name) # store to port 12001 for ssh data to cyberdyne or locally as json to cfg[meas_data_folder] + + +class FeatureWrapper(Monitor): + + def __init__(self, env, number_of_features: int = 0, training_episode_length: int = np.inf, + recorder=None, n_trail="", integrator_weight=net.ts, antiwindup_weight=net.ts): + """ + Env Wrapper to add features to the env-observations and adds information to env.step output which can be used in + case of an continuing (non-episodic) task to reset the environment without being terminated by done + + Hint: is_dq0: if the control is done in dq0; if True, the action is tranfered to abc-system using env-phase and + the observation is tranfered back to dq using the next phase + + :param env: Gym environment to wrap + :param number_of_features: Number of features added to the env observations in the wrapped step method + :param training_episode_length: (For non-episodic environments) number of training steps after the env is reset + by the agent for training purpose (Set to inf in test env!) + + """ + super().__init__(env) + self.observation_space = gym.spaces.Box( + low=np.full(env.observation_space.shape[0] + number_of_features, -np.inf), + high=np.full(env.observation_space.shape[0] + number_of_features, np.inf)) + self.training_episode_length = training_episode_length + self.recorder = recorder + self._n_training_steps = 0 + self._i_phasor = 0.0 + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self._v_pahsor = 0.0 + self.n_episode = 0 + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.reward_episode_mean = [] + self.n_trail = n_trail + self.phase = [] + self.integrator_sum = np.zeros(self.action_space.shape) + self.integrator_weight = integrator_weight + self.antiwindup_weight = antiwindup_weight + self.action_P0 = [] + self.action_P1 = [] + self.action_P2 = [] + self.action_I0 = [] + self.action_I1 = [] + self.action_I2 = [] + + def step(self, action: Union[np.ndarray, int]) -> GymStepReturn: + """ + Adds additional features and infos after the gym env.step() function is executed. + Triggers the env to reset without done=True every training_episode_length steps + """ + self.integrator_sum += action * self.integrator_weight + + self.action_P0.append(action[0]) + self.action_P1.append(action[1]) + self.action_P2.append(action[2]) + self.action_I0.append(self.integrator_sum[0]) + self.action_I1.append(self.integrator_sum[1]) + self.action_I2.append(self.integrator_sum[2]) + + action_PI = action # + self.integrator_sum + + if cfg['is_dq0']: + # Action: dq0 -> abc + action_abc = dq0_to_abc(action_PI, self.env.net.components[0].phase) + + # check if m_abc will be clipped + if np.any(abs(action_abc) > 1): + # if, reduce integrator by clipped delta + action_delta = abc_to_dq0(np.clip(action_abc, -1, 1) - action_abc, self.env.net.components[0].phase) + self.integrator_sum += action_delta * self.antiwindup_weight + + obs, reward, done, info = super().step(action_abc) + self._n_training_steps += 1 + + if self._n_training_steps % self.training_episode_length == 0: + # info["timelimit_reached"] = True + done = True + + # add wanted features here (add appropriate self.observation in init!!) + # calculate magnitude of current phasor abc + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if done: + self.reward_episode_mean.append(np.mean(self.rewards)) + self.n_episode += 1 + + if cfg['loglevel'] == 'train': + episode_data = {"Name": "On_Training", + "Episode_number": self.n_episode, + "Episode_length": self._n_training_steps, + "R_load_training": self.R_training, + "i_phasor_training": self.i_phasor_training, + "i_a_training": self.i_a, + "i_b_training": self.i_b, + "i_c_training": self.i_c, + "v_a_training": self.v_a, + "v_b_training": self.v_b, + "v_c_training": self.v_c, + "v_phasor_training": self.v_phasor_training, + "Rewards": self.rewards, + "Phase": self.phase, + "Node": platform.uname().node, + "Trial number": self.n_trail, + "Database name": folder_name, + "Reward function": 'rew.rew_fun_dq0', + } + + """ + add here "model_params_change": callback.params_change, from training_recorder? + """ + + # stores data locally to cfg['meas_data_folder'], needs to be grept / transfered via reporter to mongodc + # mongo_recorder.save_to_json('Trial_number_' + self.n_trail, episode_data) + + # clear lists + self.R_training = [] + self.i_phasor_training = [] + self.v_phasor_training = [] + self.i_a = [] + self.i_b = [] + self.i_c = [] + self.v_a = [] + self.v_b = [] + self.v_c = [] + self.phase = [] + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + # obs = np.append(obs, delta_i_lim_i_phasor) + + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_action) + + return obs, reward, done, info + + def reset(self, **kwargs): + """ + Reset the wrapped env and the flag for the number of training steps after the env is reset + by the agent for training purpose and internal counters + """ + obs = super().reset() + self._n_training_steps = 0 + self.integrator_sum = np.zeros(self.action_space.shape) + + self.i_phasor = self.cal_phasor_magnitude(obs[0:3]) + self.v_phasor = self.cal_phasor_magnitude(obs[3:6]) + + if cfg['loglevel'] == 'train': + self.R_training.append(self.env.history.df['r_load.resistor1.R'].iloc[-1]) + self.i_phasor_training.append((self.i_phasor) * self.env.net['inverter1'].i_lim) + self.v_phasor_training.append((self.v_phasor) * self.env.net['inverter1'].v_lim) + + self.i_a.append(self.env.history.df['lc.inductor1.i'].iloc[-1]) + self.i_b.append(self.env.history.df['lc.inductor2.i'].iloc[-1]) + self.i_c.append(self.env.history.df['lc.inductor3.i'].iloc[-1]) + + self.v_a.append(self.env.history.df['lc.capacitor1.v'].iloc[-1]) + self.v_b.append(self.env.history.df['lc.capacitor2.v'].iloc[-1]) + self.v_c.append(self.env.history.df['lc.capacitor3.v'].iloc[-1]) + self.phase.append(self.env.net.components[0].phase) + + if cfg['is_dq0']: + # if setpoint in dq: Transform measurement to dq0!!!! + obs[3:6] = abc_to_dq0(obs[3:6], self.env.net.components[0].phase) + obs[0:3] = abc_to_dq0(obs[0:3], self.env.net.components[0].phase) + """ + Features + """ + error = obs[6:9] - obs[3:6] # control error: v_setpoint - v_mess + # delta_i_lim_i_phasor = 1 - self.i_phasor # delta to current limit + + """ + Following maps the return to the range of [-0.5, 0.5] in + case of magnitude = [-lim, lim] using (phasor_mag) - 0.5. 0.5 can be exceeded in case of the magnitude + exceeds the limit (no extra env interruption here!, all phases should be validated separately) + """ + # obs = np.append(obs, self.i_phasor - 0.5) + obs = np.append(obs, error) + obs = np.append(obs, np.sin(self.env.net.components[0].phase)) + obs = np.append(obs, np.cos(self.env.net.components[0].phase)) + # obs = np.append(obs, delta_i_lim_i_phasor) + """ + Add used action to the NN input to learn delay + """ + obs = np.append(obs, self.used_action) + + return obs + + def cal_phasor_magnitude(self, abc: np.array) -> float: + """ + Calculated the magnitude of a phasor in a three phase system. M + + :param abc: Due to limit normed currents or voltages in abc frame + :return: magnitude of the current or voltage phasor + """ + # calculate magnitude of current phasor abc-> alpha,beta ->|sqrt(alpha² + beta²)| + i_alpha_beta = abc_to_alpha_beta(abc) + i_phasor_mag = np.sqrt(i_alpha_beta[0] ** 2 + i_alpha_beta[1] ** 2) + + return i_phasor_mag + + +def run_testcase_DDPG(gamma, integrator_weight, antiwindup_weight, model_path, error_exponent=0.5, use_gamma_in_rew=1, + n_trail=50000): + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + model = DDPG.load(model_path + f'model.zip') + + ####### Run Test ######### + return_sum = 0.0 + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'] + ) + env_test = FeatureWrapper(env_test, number_of_features=8, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight) + obs = env_test.reset() + phase_list = [] + phase_list.append(env_test.env.net.components[0].phase) + + rew_list = [] + a0 = [] + a1 = [] + a2 = [] + v_d = [] + v_q = [] + v_0 = [] + + while True: + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + phase_list.append(env_test.env.net.components[0].phase) + a0.append(np.float64(action[0])) + a1.append(np.float64(action[1])) + a2.append(np.float64(action[2])) + v_a = env_test.history.df['lc.capacitor1.v'].iloc[-1] + v_b = env_test.history.df['lc.capacitor2.v'].iloc[-1] + v_c = env_test.history.df['lc.capacitor3.v'].iloc[-1] + + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), env_test.env.net.components[0].phase) + + v_d.append(v_dq0[0]) + v_q.append(v_dq0[1]) + v_0.append(v_dq0[2]) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + # print(rewards) + if done: + env_test.close() + # print(limit_exceeded_in_test) + break + + ts = time.gmtime() + test_after_training = {"Name": "Test", + "time": ts, + "Reward": rew_list, + "Action0": a0, + "Action1": a1, + "Action2": a2, + "Phase": phase_list, + "Node": platform.uname().node, + "End time": time.strftime("%Y_%m_%d__%H_%M_%S", time.gmtime()), + "Reward function": 'rew.rew_fun_dq0', + "Trial number": n_trail, + "Database name": folder_name, + "Info": "Delay, obs=[v_mess,sp_dq0, i_mess_dq0, error_mess_sp, last_action, sin/cos(phase)]; " + "Reward = MRE, PI-Approch using AntiWindUp" + "without abort! (risk=0 manullay in env); only voltage taken into account in reward!"} + + # fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(10, 10)) + # ax1, ax2 = ax.flatten() + + plt.plot(env_test.action_P0) + plt.plot(env_test.action_P1) + plt.plot(env_test.action_P2) + plt.xlabel("") + plt.ylabel("action_P") + plt.title('Test') + plt.show() + + plt.plot(env_test.action_I0) + plt.plot(env_test.action_I1) + plt.plot(env_test.action_I2) + plt.xlabel("") + plt.ylabel("action_I") + plt.title('Test') + plt.show() + + plt.plot(v_d) + plt.plot(v_q) + plt.plot(v_0) + plt.xlabel("") + plt.ylabel("v_dq0") + plt.title('Test') + plt.show() + # Add v-&i-measurements + test_after_training.update({env_test.viz_col_tmpls[j].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + test_after_training.update({env_test.viz_col_tmpls[2].vars[i].replace(".", "_"): env_test.history[ + env_test.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + # mongo_recorder.save_to_json('Trial_number_' + n_trail, test_after_training) + + return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + +run_testcase_DDPG(gamma=0.94713819942184, integrator_weight=2.3448684965845657e-06, + antiwindup_weight=0.0035792826486838116, + model_path='experiments/hp_tune/trained_models/study_10_run_954/', + error_exponent=0.5, use_gamma_in_rew=1, n_trail=50000) diff --git a/experiments/hp_tune/trained_models/Future_10Rvals/model.zip b/experiments/hp_tune/trained_models/Future_10Rvals/model.zip new file mode 100644 index 00000000..51ee2f63 Binary files /dev/null and b/experiments/hp_tune/trained_models/Future_10Rvals/model.zip differ diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip new file mode 100644 index 00000000..fbbf9a36 Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Actor.zip differ diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip new file mode 100644 index 00000000..2f9f1a13 Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals.zip differ diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip new file mode 100644 index 00000000..4d0b0207 Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_corr.zip differ diff --git a/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip new file mode 100644 index 00000000..ed3ecead Binary files /dev/null and b/experiments/hp_tune/trained_models/paper/model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip differ diff --git a/experiments/hp_tune/trained_models/study_18_run_6462/model.zip b/experiments/hp_tune/trained_models/study_18_run_6462/model.zip new file mode 100644 index 00000000..b16c43cd Binary files /dev/null and b/experiments/hp_tune/trained_models/study_18_run_6462/model.zip differ diff --git a/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip b/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip new file mode 100644 index 00000000..b16c43cd Binary files /dev/null and b/experiments/hp_tune/trained_models/study_18_run_6462_new/model.zip differ diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json b/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json new file mode 100644 index 00000000..eadd42a2 --- /dev/null +++ b/experiments/hp_tune/trained_models/study_22_run_11534/PC2_DDPG_Vctrl_single_inv_22_newTestcase_Trial_number_11534_0.json @@ -0,0 +1,39 @@ +{ + "Name": "Config", + "Node": "node04-048", + "Agent": "DDPG", + "Number_learning_Steps": 500000, + "Trial number": "11534", + "Database name": "PC2_DDPG_Vctrl_single_inv_22_newTestcase", + "Start time": "2021_08_03__07_32_51", + "Optimierer/ Setting stuff": "Kein Const_liar_feature, hoehere Grenzen, INtergrator Gewicht als HP,Actionspace = 6, da P und I-Anteil seperate ausg\u00e4nge und im wrapper addiert werdenIntegratorzustand+used_P_Action (je um einen verzoegert) wird mit als feature uebergebenPenalties fuer action_P und action_PMehr HPs: trainfreq, batch/buffer_size, a_relu ", + "Weitere Info": "NEUES TEST ENV - 100k steps alle 1000 resettet das vom training - zuf\u00e4lligeLast", + "additionalInfo": "Long Holiday run", + "penalty_I_weight": 1.132480628572647, + "penalty_P_weight": 1.4834257541454123, + "penalty_I_decay_start": 0.5489063567901366, + "penalty_P_decay_start": 0.23007974811664603, + "integrator_weight": 0.31113470671968957, + "antiwindup_weight": 0.660818130720168, + "learning_rate": 0.00037457864914508586, + "lr_decay_start": 0.2750816923408933, + "lr_decay_duration": 0.3240504611772025, + "final_lr": 0.8356876361923928, + "gamma": 0.9462178519540726, + "weight_scale": 0.000852050757214834, + "bias_scale": 0.020070268741104066, + "alpha_relu_actor": 0.20809806015130924, + "alpha_relu_critic": 0.006784965521936233, + "batch_size": 261, + "buffer_size": 386945, + "actor_hidden_size": 25, + "actor_number_layers": 2, + "critic_hidden_size": 295, + "critic_number_layers": 4, + "noise_var": 0.023580253339050283, + "noise_theta": 31.575020911887215, + "training_episode_length": 2811, + "tau": 0.002609222715831891, + "train_freq": 2, + "optimizer": "Adam" +} \ No newline at end of file diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/model.zip b/experiments/hp_tune/trained_models/study_22_run_11534/model.zip new file mode 100644 index 00000000..50daca14 Binary files /dev/null and b/experiments/hp_tune/trained_models/study_22_run_11534/model.zip differ diff --git a/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip b/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip new file mode 100644 index 00000000..2d40930f Binary files /dev/null and b/experiments/hp_tune/trained_models/study_22_run_11534/model_retrained.zip differ diff --git a/experiments/hp_tune/util/__init__.py b/experiments/hp_tune/util/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/experiments/hp_tune/util/action_noise_wrapper.py b/experiments/hp_tune/util/action_noise_wrapper.py new file mode 100644 index 00000000..3a2bf7ea --- /dev/null +++ b/experiments/hp_tune/util/action_noise_wrapper.py @@ -0,0 +1,48 @@ +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +import numpy as np + + +class myOrnsteinUhlenbeckActionNoise(OrnsteinUhlenbeckActionNoise): + """ + Wraps the OU-noise from sb3 to give the possibility to reduce the action noise over training time + Implementation similar to kerasRL2 (https://github.com/wau/keras-rl2/blob/master/rl/random.py) + """ + + def __init__(self, n_steps_annealing=1000, sigma_min=None, *args, **kwargs): + super(myOrnsteinUhlenbeckActionNoise, self).__init__(*args, **kwargs) + self.n_steps_annealing = n_steps_annealing + self.sigma_min = sigma_min + self.n_steps = 0 + + if sigma_min is not None: + # self.m = -float(self._sigma - sigma_min) / float(n_steps_annealing) + self.m = -(self._sigma - sigma_min) / (n_steps_annealing) + self.c = self._sigma + self.sigma_min = sigma_min + else: + self.m = 0. + self.c = self._sigma + self.sigma_min = self._sigma + + @property + def current_sigma(self): + sigma = np.maximum(self.sigma_min, self.m * float(self.n_steps) + self.c) + return sigma + + def __call__(self) -> np.ndarray: + noise = ( + self.noise_prev + + self._theta * (self._mu - self.noise_prev) * self._dt + + self.current_sigma * np.sqrt(self._dt) * np.random.normal(size=self._mu.shape) + ) + self.noise_prev = noise + self.n_steps += 1 + return noise + + def reset(self) -> None: + super().reset() + + # should not be reset because action_noise is reset after episode, but noise reduction over learning-length + # does not reset the noise reduction! Reduction not per episode but per learing, since action noise + # is redifiend then, no reset of annealing needed + # self.n_steps = 0 diff --git a/experiments/hp_tune/util/callbacklist.py b/experiments/hp_tune/util/callbacklist.py new file mode 100644 index 00000000..2a94f306 --- /dev/null +++ b/experiments/hp_tune/util/callbacklist.py @@ -0,0 +1,6 @@ +class CallbackList(list): + # List of callback functions + def fire(self, *args, **kwargs): + # executes all callbacks in list + for listener in self: + listener(*args, **kwargs) diff --git a/experiments/hp_tune/util/config.py b/experiments/hp_tune/util/config.py new file mode 100644 index 00000000..12d40e0d --- /dev/null +++ b/experiments/hp_tune/util/config.py @@ -0,0 +1,17 @@ +cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay', + 'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'], + # STUDY_NAME='PC2_TD3_Vctrl_single_inv_2', + # STUDY_NAME='PC2_DDPG_Vctrl_single_inv_23_added_Past_vals', + # STUDY_NAME='PC2_DDPG_Vctrl_single_inv_HPO_noI_term_study_25', + # STUDY_NAME='OMG_DDPG_Integrator_no_pastVals', + # meas_data_folder='Json_to_MonogDB_OMG_DDPG_Integrator_no_pastVals_New/', + STUDY_NAME='OMG_DDPG_Actor', + meas_data_folder='Json_to_MonogDB_OMG_DDPG_Integrator_no_pastVals/', + MONGODB_PORT=12001, + loglevel='test', + is_dq0=True, + train_episode_length=2881, # defines when in training the env is reset e.g. for exploring starts, + # nothing -> Standard FeatureWrapper; past -> FeatureWrapper_pastVals; future -> FeatureWrapper_futureVals + # I-controller -> DDPG as P-term + standard I-controller; no-I-term -> Pure DDPG without integrator + env_wrapper='past' + ) diff --git a/experiments/hp_tune/util/configTD3.py b/experiments/hp_tune/util/configTD3.py new file mode 100644 index 00000000..f9806adf --- /dev/null +++ b/experiments/hp_tune/util/configTD3.py @@ -0,0 +1,10 @@ +cfg = dict(lea_vpn_nodes=['lea-skynet', 'lea-picard', 'lea-barclay', + 'lea-cyberdyne', 'webbah-ThinkPad-L380', 'LEA_WORK35'], + + STUDY_NAME='PC2_TD3_Vctrl_single_inv_5_newTestcase', + # STUDY_NAME='DEGUB_1', + meas_data_folder='Json_to_MonogDB/', + MONGODB_PORT=12001, + loglevel='test', + is_dq0=True + ) diff --git a/experiments/hp_tune/util/pc2.py b/experiments/hp_tune/util/pc2.py new file mode 100644 index 00000000..4da43241 --- /dev/null +++ b/experiments/hp_tune/util/pc2.py @@ -0,0 +1,74 @@ +"""Helper functions for the communication with the high-performance computing +cluster 'Paderborn Center for Parallel Computing' (PC²)""" + +import subprocess as sub +import pathlib +import time +import pandas as pd + + +def build_shell_script_lines(path, cluster, job_name, res_plan, execution_lines): + cfg_id_d = {'oculus': '#CCS', 'noctua': '#SBATCH'} + assert cluster in cfg_id_d, f'cluster "{cluster}" not supported' + assert isinstance(path, pathlib.Path) + log_path = path / 'logs' + log_path.mkdir(parents=True, exist_ok=True) + cfg_id = cfg_id_d[cluster] + lines = ['#! /usr/bin/env zsh', '#! /bin/zsh', '', + f'{cfg_id} -t {res_plan["duration"]}', + f'{cfg_id} -o {log_path / "%reqid.log"}', + f'{cfg_id} -N {job_name}', + f'{cfg_id} --res=rset={res_plan["rset"]}' + f':ncpus={res_plan["ncpus"]}' + f':mem={res_plan["mem"]}' + f':vmem={res_plan["vmem"]}', + f'{cfg_id} -j', ''] + + lines.extend(execution_lines if isinstance(execution_lines, list) + else [execution_lines]) + return [line + '\n' for line in lines] + + +def calculate_resources(duration=1, ncpus=6, memory=4, vmemory=8): + # todo: Think of a more intelligent (adaptive) resource plan + plan = {'duration': str(duration) + 'h', + 'rset': '1', + 'ncpus': str(ncpus), + 'mem': str(memory) + 'g', + 'vmem': str(vmemory) + 'g' + } + return plan + + +def create_n_run_script(name, content, dry=False): + with open(name, 'w+') as f: + f.writelines(content) + sub.run(["chmod", "+x", name]) # Make script executable + + if not dry: + # allocate and run, zB name = pc2_job_412643.sh + sub.run(['ccsalloc', name]) + time.sleep(1) + + +def get_ccsinfo(user): + """Returns the current ccs schedule as DataFrame""" + ccsinfo = sub.run(['ccsinfo', '-s', f'--user={user}', '--raw'], + stdout=sub.PIPE).stdout.decode().strip('\n').split('\n') + + # def run(*popenargs, + # input=None, capture_output=False, timeout=None, check=False, **kwargs): + + info_lines = [l.strip().split() for l in ccsinfo] + base_columns = ['jobid', 'jobname', 'user', + 'state', 'time', 'allocated_time_days', + 'allocated_time_hm', ] + if any(len(l) > 9 for l in info_lines): + columns = base_columns + ['efficiency_1', 'efficiency_2', 'resources'] + else: + columns = base_columns + ['efficiency', 'resources'] + ccsinfo = pd.DataFrame(info_lines + if len(info_lines) > 0 and len(info_lines[0]) > 0 + else None, columns=columns) + + return ccsinfo diff --git a/experiments/hp_tune/util/record_env.py b/experiments/hp_tune/util/record_env.py new file mode 100644 index 00000000..469e82ea --- /dev/null +++ b/experiments/hp_tune/util/record_env.py @@ -0,0 +1,71 @@ +from stable_baselines3.common.callbacks import BaseCallback +import matplotlib.pyplot as plt +import time +from experiments.hp_tune.env.vctrl_single_inv import folder_name + + +class RecordEnvCallback(BaseCallback): + + def __init__(self, env, agent, max_episode_steps, recorder=None, n_trail=0): + """ + Class to interact with stable_baseline learner callback, + Runs e.g. every 1000 steps to evaluate the learning process in the env: + + plot_callback = EveryNTimesteps(n_steps=1000, callback=RecordEnvCallback(env, model)) + agent.learn(total_timesteps=2000, callback=[plot_callback]) + + :param env: environment to run on + :param agent: agent to run on env to evaluate + """ + self.env = env + self.agent = agent + self.max_episode_steps = max_episode_steps + self.recorder = recorder + self.n_trail = n_trail + super().__init__() + + def _on_step(self) -> bool: + rewards = [] + obs = self.env.reset() + for _ in range(self.max_episode_steps): + self.env.render() + action, _states = self.agent.predict(obs, deterministic=True) + obs, reward, done, info = self.env.step(action) + rewards.append(reward) + if done or info.get("timelimit_reached", False): + break + # plot rewards? + + ts = time.gmtime() + + plt.plot(rewards) + plt.xlabel(r'$t\,/\,\mathrm{s}$') + plt.ylabel('$Reward$') + plt.grid(which='both') + + plt.savefig(f'{folder_name}/{self.n_trail}/Reward{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + validation_during_training = {"Name": "Validation during training", + "num_timesteps_learned_so_far": self.num_timesteps, + "time": ts, + "Reward": rewards} + + # Add v-measurements + validation_during_training.update( + {self.env.env.viz_col_tmpls[j].vars[i].replace(".", "_"): self.env.env.history[ + self.env.env.viz_col_tmpls[j].vars[i]].copy().tolist() for j in range(2) for i in range(6) + }) + + validation_during_training.update( + {self.env.env.viz_col_tmpls[2].vars[i].replace(".", "_"): self.env.env.history[ + self.env.env.viz_col_tmpls[2].vars[i]].copy().tolist() for i in range(3) + }) + + # va = self.env.env.history[self.env.env.viz_col_tmpls[0].vars[0]].copy() + + self.recorder.save_to_mongodb('Trail_number_' + self.n_trail, validation_during_training) + + self.env.close() + self.env.reset() + return True diff --git a/experiments/hp_tune/util/recorder.py b/experiments/hp_tune/util/recorder.py new file mode 100644 index 00000000..48f66f95 --- /dev/null +++ b/experiments/hp_tune/util/recorder.py @@ -0,0 +1,69 @@ +import json +from os import makedirs + +import sshtunnel +from pymongo import MongoClient + +from experiments.hp_tune.util.config import cfg + +MONGODB_PORT = cfg['MONGODB_PORT'] # 12001 + + +class Recorder: + + def __init__(self, node, database_name): + """ + Class to record measured data to mongo database using pymongo + Depending on the node we are operating at it connects via ssh to + - in lea_vpn: to cyberdyne port 12001 + - else: assume pc2 node -> connect to frontend + and stores data to mongoDB at port MONGODB_PORT ( =12001). + HINT: From pc2 frontend permanent tunnel from cyberdyne port 12001 to frontend 12001 + is needed (assuming Mongod-Process running on cyberdyne + :params node: platform.uname().node + :params database_name: string for the database name to store data in + """ + self.node = node + self.save_count = 0 + + if self.node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + + self.database_name = database_name + makedirs(self.save_folder, exist_ok=True) + #pathlib.Path(self.save_folder.mkdir(exist_ok=True)) + + def save_to_mongodb(self, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[self.database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def save_to_json(self, col: str = ' trails', data=None): + """ + Stores data to json file in specified directory. From there the data can be grept by another process + and can be stored to a DB via ssh + To distinguish the files of one trail a save_count is incremented and added to the filename + """ + + with open(self.save_folder + self.database_name + '_' + col + '_' + str(self.save_count) + '.json', + 'w') as outfile: + json.dump(data, outfile) + + self.save_count += 1 diff --git a/experiments/hp_tune/util/reporter.py b/experiments/hp_tune/util/reporter.py new file mode 100644 index 00000000..6a8983f3 --- /dev/null +++ b/experiments/hp_tune/util/reporter.py @@ -0,0 +1,147 @@ +import json +import os +import platform +import time + +import numpy as np + +import sshtunnel +from pymongo import MongoClient +# from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.config import cfg + + +class Reporter: + + def __init__(self): + """ + Greps json data which is stored in the cfg[meas_data_folder] and sends it to mongoDB + on cyberdyne (lea38) via sshtunnel on port MONGODB_PORT + """ + + MONGODB_PORT = cfg['MONGODB_PORT'] + + node = platform.uname().node + + if node in cfg['lea_vpn_nodes']: + self.server_name = 'lea38' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT)} + self.save_folder = './' + cfg['meas_data_folder'] + else: + # assume we are on a node of pc2 -> connect to frontend and put data on prt 12001 + # from there they can be grep via permanent tunnel from cyberdyne + self.server_name = 'fe.pc2.uni-paderborn.de' + self.tun_cfg = {'remote_bind_address': ('127.0.0.1', + MONGODB_PORT), + 'ssh_username': 'webbah'} + + self.save_folder = '/scratch/hpc-prf-reinfl/weber/OMG/' + cfg['meas_data_folder'] + + def save_to_mongodb(self, database_name: str, col: str = ' trails', data=None): + """ + Stores data to database in document col + """ + with sshtunnel.open_tunnel(self.server_name, **self.tun_cfg) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[database_name] + trial_coll = db[col] # get collection named col + trial_coll.insert_one(data) + + def oldest_file_in_tree(self, extension=".json"): + """ + Returns the oldest file-path string + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def oldest_file_with_name_in_tree(self, count_number_to_find, extension=".json"): + """ + Returns the oldest file-path string + + :param count_number_to_find: List of count_numbers to find and store instead of storing all + """ + print(os.getcwd()) + return min( + (os.path.join(dirname, filename) + for dirname, dirnames, filenames in os.walk(self.save_folder) + for filename in filenames + if filename.endswith(str(count_number_to_find) + extension)), + key=lambda fn: os.stat(fn).st_mtime) + + def json_to_mongo_via_sshtunnel(self, file_name_to_store=None): + + if not len(os.listdir(self.save_folder)) == 0: + + if file_name_to_store is None: + try: + oldest_file_path = self.oldest_file_in_tree() + except(ValueError) as e: + print('Folder seems empty or no matching data found!') + print(f'ValueError{e}') + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + return + else: + oldest_file_path = file_name_to_store + + with open(oldest_file_path) as json_file: + data = json.load(json_file) + + successfull = False + retry_counter = 0 + + while not successfull: + try: + now = time.time() + if os.stat(oldest_file_path).st_mtime < now - 60: + self.save_to_mongodb(database_name=data['Database name'], + col='Trial_number_' + data['Trial number'], data=data) + print('Reporter: Data stored successfully to MongoDB and will be removed locally!') + os.remove(oldest_file_path) + successfull = True + except (sshtunnel.BaseSSHTunnelForwarderError) as e: + wait_time = np.random.randint(1, 60) + retry_counter += 1 + if retry_counter > 10: + print('Stopped after 10 connection attempts!') + raise e + print(f'Reporter: Could not connect via ssh to frontend, retry in {wait_time} s') + time.sleep(wait_time) + + else: + print('Empty directory! Go to sleep for 5 minutes!') + time.sleep(5 * 60) + + +if __name__ == "__main__": + + file_ending_number = [0, 1, 2] + + reporter = Reporter() + print("Starting Reporter for logging from local savefolder to mongoDB") + + print(f"Searching for files in directory with number ending on {file_ending_number}") + + # print(reporter.oldest_file_in_tree()) + while True: + # reporter.json_to_mongo_via_sshtunnel() + + # to send only files ending with number file_ending_number + for number in file_ending_number: + try: + oldest_named_file_path = reporter.oldest_file_with_name_in_tree(number) + print(oldest_named_file_path) + + reporter.json_to_mongo_via_sshtunnel(oldest_named_file_path) + + except(ValueError) as e: + print(f'No file with number {number} ending') + print(f'ValueError{e}') + print('Go to sleep for 5 seconds and go on with next number!') + time.sleep(5) diff --git a/experiments/hp_tune/util/scheduler.py b/experiments/hp_tune/util/scheduler.py new file mode 100644 index 00000000..e524a633 --- /dev/null +++ b/experiments/hp_tune/util/scheduler.py @@ -0,0 +1,59 @@ +from typing import Union, Callable + +import numpy as np + + +def linear_schedule(initial_value, final_value, t_start, t_end, total_timesteps: int = 1000) -> Callable[ + [float], float]: + """ + Linear learning rate schedule from t_start to t_end in between initial -> final value. + :param initial_value: (float or str) start value + :param final_value: final value + :param t_start: timestep (int!) at which the linear decay starts + :param t_ends: timestep (int!) at which the linear decay ends + :param total_timesteps: number of learning steps + :return: (function) + """ + + def func(progress_remaining: float) -> float: + """ + Progress will decrease from 1 (beginning) to 0 + :param progress_remaining: (float) progress_remaining = 1.0 - (num_timesteps / total_timesteps) + :return: (float) + """ + # Original: return= initial_value * progress_remaining + + return np.maximum( + np.minimum(initial_value, initial_value + (t_start * (initial_value - final_value)) / (t_end - t_start) \ + - (initial_value - final_value) / (t_end - t_start) * ((1.0 - progress_remaining) \ + * total_timesteps)), final_value) + + # return np.maximum(final_value, np.minimum(initial_value,b+ m *(1.0 - progress_remaining) * total_timesteps)) + + return func + + +def exopnential_schedule(initial_value: Union[float, str], final_value: float = 0) -> Callable[[float], float]: + """ + Linear learning rate schedule. + :param initial_value: (float or str) start value + :param final_value: final value as percentage of initial value (e.g. 0.1 -> final value is 10 % of initial value + :return: (function) + """ + if isinstance(initial_value, str): + initial_value = float(initial_value) + + def func(progress_remaining: float) -> float: + """ + Progress will decrease from 1 (beginning) to 0 + :param progress_remaining: (float) 1.0 - (num_timesteps / total_timesteps) + Y - X * M + :return: (float) + https://www.jeremyjordan.me/nn-learning-rate/ + """ + # return (progress_remaining * initial_value)*(1-(1-progress_remaining))# + final_value * initial_value + # return ( initial_value)**(1/progress_remaining)# + final_value * initial_value + raise NotImplementedError + return (initial_value) * (progress_remaining - 1) + + return func diff --git a/experiments/hp_tune/util/training_recorder.py b/experiments/hp_tune/util/training_recorder.py new file mode 100644 index 00000000..6922d447 --- /dev/null +++ b/experiments/hp_tune/util/training_recorder.py @@ -0,0 +1,66 @@ +from stable_baselines3.common.callbacks import BaseCallback +import numpy as np + + +class TrainRecorder(BaseCallback): + + def __init__(self, verbose=1): + super(TrainRecorder, self).__init__(verbose) + self.last_model_params = None # self.model.policy.state_dict() + self.params_change = [] + + def _on_training_end(self) -> None: + """ + This event is triggered before exiting the `learn()` method. + """ + # asd = 1 + # ads = 2 + pass + + def _on_step(self) -> bool: + asd = 1 + # R_training[self.n_calls, number_trails] = self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1] + # R_training[self.n_calls-1, 0] = self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1] + """ + R_training.append(self.training_env.envs[0].env.history.df['r_load.resistor1.R'].iloc[-1]) + i_phasor_training.append((self.training_env.envs[0].i_phasor+0.5)*net['inverter1'].i_lim) + v_phasor_training.append((self.training_env.envs[0].v_phasor+0.5)*net['inverter1'].v_lim) + + if (self.training_env.envs[0].i_phasor)*net['inverter1'].i_lim > 15: + asd = 1 + + i_a.append(self.training_env.envs[0].env.history.df['lc.inductor1.i'].iloc[-1]) + i_b.append(self.training_env.envs[0].env.history.df['lc.inductor2.i'].iloc[-1]) + i_c.append(self.training_env.envs[0].env.history.df['lc.inductor3.i'].iloc[-1]) + + v_a.append(self.training_env.envs[0].env.history.df['lc.capacitor1.v'].iloc[-1]) + v_b.append(self.training_env.envs[0].env.history.df['lc.capacitor2.v'].iloc[-1]) + v_c.append(self.training_env.envs[0].env.history.df['lc.capacitor3.v'].iloc[-1]) + # nach env.step() + """ + return True + + def _on_rollout_end(self) -> None: + # asd = 1 + + model_params = self.model.policy.parameters_to_vector() + + if self.last_model_params is None: + self.last_model_params = model_params + else: + self.params_change.append(np.float64(np.mean(self.last_model_params - model_params))) + + """model_params = self.model.policy.state_dict() + if self.last_model_params is None: + for key, value in model_params.items(): + self.params_change[key.replace(".", "_")] = [] + else: + for key, value in model_params.items(): + #print(key) + self.params_change[key.replace(".", "_")].append(th.mean((model_params[key]-self.last_model_params[key])).tolist()) + """ + + self.last_model_params = model_params + + # self.model.actor.mu._modules # alle :) + pass diff --git a/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py b/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py new file mode 100644 index 00000000..8529d523 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/Collect_from_mongoDB.py @@ -0,0 +1,76 @@ +import matplotlib.pyplot as plt +import numpy as np +import optuna +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True +plotly = False + +# db_name = 'OMG_DDPG_Actor' # 17 +# db_names = 'OMG_DDPG_Integrator_no_pastVals' +# db_names = ['OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr'] # 15 +# db_name = 'OMG_DDPG_Integrator_no_pastVals_corr' + +db_names = [ + 'OMG_DDPG_Actor'] # , 'OMG_DDPG_Integrator_no_pastVals', 'OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr', +# 'OMG_DDPG_Integrator_no_pastVals_corr'] + +for db_name in db_names: + ret_list_test = [] + ret_mean_list_test = [] + ret_std_list_test = [] + reward_list = [] + + with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + # store best trail + + # [for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]: + trial = db['Trial_number_44'] + # if trial.state == optuna.structs.TrialState.COMPLETE: + trial_test = trial.find_one({"Name": "Test_Reward"}) + + if trial_test is not None: + # If, trail is not comleted + # ret_list_test.append(trial_test['Return']) + ret_std_list_test.append(np.std(trial_test['Reward'])) + ret_mean_list_test.append(np.mean(trial_test['Reward'])) + reward_list.append(trial_test['Reward']) + + for coll_name in db.list_collection_names(): + + # [for t in study.trials if t.state == optuna.structs.TrialState.COMPLETE]: + trial = db[coll_name] + # if trial.state == optuna.structs.TrialState.COMPLETE: + trial_test = trial.find_one({"Name": "Test_Reward"}) + + if trial_test is not None: + # If, trail is not comleted + # ret_list_test.append(trial_test['Return']) + ret_std_list_test.append(np.std(trial_test['Reward'])) + ret_mean_list_test.append(np.mean(trial_test['Reward'])) + # reward_list.append(trial_test['Reward']) + + # if len(ret_list_test) > 550: + # break + + print(ret_mean_list_test) + print(ret_std_list_test) + asd = 1 + results = { + #'return': ret_list_test, + 'return_Mean': ret_mean_list_test, + 'return_Std': ret_std_list_test, + 'study_name': db_name} + + df = pd.DataFrame(results) + df.to_pickle(db_name + "return_8XX_agents.pkl") diff --git a/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py b/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py new file mode 100644 index 00000000..075e5709 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/Collect_trainData_from_Mongo.py @@ -0,0 +1,79 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True +plotly = False + +db_name = 'OMG_DDPG_Actor' # 15 +db_name = 'OMG_DDPG_Integrator_no_pastVals' # 15 + +trial = '0' +show_episode_number = 19 + +ret_mean_list_test = [] +ret_std_list_test = [] +i_q_delta_mean_list_test = [] +i_q_delta_std_list_test = [] +i_d_delta_mean_list_test = [] +i_d_delta_std_list_test = [] +reward_list = [] + +reward_df = pd.DataFrame() + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + idx = 0 + for coll_name in db.list_collection_names(): + trial = db[coll_name] + # trial = db.Trial_number_23 + + train_data = trial.find_one({"Name": "After_Training"}) + # trial_test = trial.find_one({"Name": "Test"}) + + if train_data is not None: # if trial not finished (was in actor_Ddpg > 550) + + if idx == 0: + reward_df = pd.DataFrame({str(idx): train_data['Mean_eps_reward']}) + else: + + df_tmp = pd.DataFrame({str(idx): train_data['Mean_eps_reward']}) + reward_df = reward_df.join(df_tmp) + idx += 1 + + # reward_list.append(train_data['Mean_eps_reward']) + + # reward_df = reward_df.join(df_tmp) + + # if len(reward_list) >= 550: + # break + +reward_df.to_pickle(db_name + "_8XX_agents_train_data.pkl") +# print(ret_mean_list_test) +# print(ret_std_list_test) +# asd = 1 +# results = { +# 'reward': reward_list, +# 'study_name': db_name} + +# df = pd.DataFrame(results) +# df.to_pickle(db_name+"_1250_agents_train_data.pkl") + + +plt.plot(reward_list) +# plt.fill_between( m - s, m + s, facecolor='r') +plt.ylabel('Average return +- sdt') +plt.xlabel('Max_episode steps') +# plt.ylim([0, 200]) +plt.grid() +plt.title(db_name) +plt.show() diff --git a/experiments/hp_tune/visualize_tests/Com_models_pc2.py b/experiments/hp_tune/visualize_tests/Com_models_pc2.py new file mode 100644 index 00000000..67eecf5d --- /dev/null +++ b/experiments/hp_tune/visualize_tests/Com_models_pc2.py @@ -0,0 +1,809 @@ +print('Start script') + +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \ + BaseWrapper, FeatureWrapper_I_controller +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# imports for PIPI +from experiments.hp_tune.env.random_load import RandomLoad +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +import gym + +# np.random.seed(0) + +show_plots = True +save_results = False + +# folder_name = 'saves/OMG_DDPGActor_wo_integrator_butPastVals_3_Deterministic' # cfg['STUDY_NAME'] +folder_name = 'saves/paper_desscaR_load' # cfg['STUDY_NAME'] +# folder_name = 'saves/OMG_i_load_feature_0_Deterministic' # cfg['STUDY_NAME'] +node = platform.uname().node + +# model_name = 'model_retrain_pastVals12.zip' +# number_past_vals = [5, 5, 0, 0] # [0, 5, 10, 16, 25] # [30, 0] +number_past_vals = [5] # [0, 5, 10, 16, 25] # [30, 0] +# use_past_vals = [True] # [False, True, True, True, True] # [True, False] +# wrapper = ['past', 'no-I-term', 'past', 'i_load'] # ['past', 'future', 'no-I-term', 'I-controller'] +wrapper = ['no-I-term'] # ['past', 'future', 'no-I-term', 'I-controller'] + +# model_name = ['model.zip'] +# model_path = 'OMG_Integrator_Actor_i_load_feature_2/1/' +# model_path = 'OMG_DDPG_Actor/3/' +model_path = 'experiments/hp_tune/trained_models/paper/' +# model_path = 'OMG_Integrator_Actor/32/' + +# model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip', +# 'model_OMG_DDPG_Integrator_no_pastVals_corr.zip', +# 'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip'] + +model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip'] +model_name = ['model_OMG_DDPG_Actor.zip'] +# model_name = ['model.zip'] +################DDPG Config Stuff######################################################################### +gamma = 0.946218 +integrator_weight = 0.311135 +antiwindup_weight = 0.660818 +error_exponent = 0.5 +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 4 +alpha_relu_actor = 0.208098 +alpha_relu_critic = 0.00678497 +""" +################DDPG Config Stuff######################################################################### +gamma = 0.984421 # 0.946218 +integrator_weight = 0 # 0.311135 +antiwindup_weight = 0 # 0.660818 +error_exponent = 0.5 +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 3 # 4 +alpha_relu_actor = 0.0034719 # 0.208098 +alpha_relu_critic = 0.00613757 # 0.00678497 + +print('HPs für DDPG ohne I-Anteil!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') +""" +mongo_recorder = Recorder(node=node, database_name=folder_name) + +num_average = 1 +max_episode_steps_list = [10000] # [1000, 5000, 10000, 20000, 50000, 100000] + +# data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl' +data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl' +data_str = 'experiments/hp_tune/data/R_load_dessca.pkl' + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +#################PI Config stuff############################################################################## + +current_directory = os.getcwd() +# folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +# max_episode_steps = 1002 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +""" +print("P10 stuff!") +L_filter = 70e-6 # / H +R_filter = 1.1e-3 # / Ohm +C_filter = 250e-6 # / F +""" + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# kp_v = 0.002 +# ki_v = 143 + +# Old optimized parameters: +kp_v = 0 # 0.0095 # 0.0 +ki_v = 182 # 173.22 # 200 +kp_c = 0.0308 # 0.0404 # 0.04 +ki_c = 13.3584 # 4.065 # 11.8 + +""" +#P10: +print('using p10 setting') +kp_v = 0.2972 +ki_v = 142.7 +kp_c = 0.00068 +ki_c = 0.731 +""" +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-i_lim * 10, i_lim * 10)) + +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +"""""" +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ####################################PI Stuff################################################ + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl')) + # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl')) + data_str)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + + # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + # on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + rew.gamma = 0 + return_sum_PI = 0.0 + rew_list_PI = [] + v_d_PI = [] + v_q_PI = [] + v_0_PI = [] + R_load_PI = [] + limit_exceeded_in_test_PI = False + limit_exceeded_penalty_PI = 0 + + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + agent_fig = None + obs_PI = env.reset() + + for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + agent.observe(None, False) + act_PI = agent.act(obs_PI) + obs_PI, r_PI, done_PI, info_PI = env.step(act_PI) + rew_list_PI.append(r_PI) + env.render() + return_sum_PI += r_PI + if r_PI == -1 and not limit_exceeded_in_test_PI: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test_PI = True + limit_exceeded_penalty_PI = -1 + + # _, env_fig = env.close() + agent.observe(r_PI, done_PI) + + v_a_PI = env.history.df['lc.capacitor1.v'] + v_b_PI = env.history.df['lc.capacitor2.v'] + v_c_PI = env.history.df['lc.capacitor3.v'] + i_a_PI = env.history.df['lc.inductor1.i'] + i_b_PI = env.history.df['lc.inductor2.i'] + i_c_PI = env.history.df['lc.inductor3.i'] + R_load_PI = (env.history.df['r_load.resistor1.R'].tolist()) + phase_PI = env.history.df['inverter1.phase.0'] # env.net.components[0].phase + + i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) + + i_d_PI = i_dq0_PI[0].tolist() + i_q_PI = i_dq0_PI[1].tolist() + i_0_PI = i_dq0_PI[2].tolist() + v_d_PI = (v_dq0_PI[0].tolist()) + v_q_PI = (v_dq0_PI[1].tolist()) + v_0_PI = (v_dq0_PI[2].tolist()) + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "time": ts, + "PI_Kp_c": kp_c, + "PI_Ki_c": ki_c, + "PI_Kp_v": kp_v, + "PI_Ki_v": ki_v, + "DDPG_model_path": model_path, + "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI), + "Reward PI": rew_list_PI, + "env_hist_PI": env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "PI result for comparison with RL agent", + "optimization node": 'Thinkpad', + "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2' + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps') + + ####################################DDPG Stuff############################################## + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + + net = Network.load('net/net_vctrl_single_inv_dq0.yaml') # is used from vctrl_single_env, not needed here + + for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals): + + if wrapper_mode == 'i_load': + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2' # ], + , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + else: + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + + if wrapper_mode in ['past', 'i_load']: + env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3, + # training_episode_length=training_episode_length, (da aus pickle!) + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + number_past_vals=used_number_past_vales) + + elif wrapper_mode == 'future': + env_test = FeatureWrapper_futureVals(env_test, number_of_features=9, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, number_future_vals=10, + future_data=data_str) + + elif wrapper_mode == 'I-controller': + env_test = FeatureWrapper_I_controller(env_test, number_of_features=14 + used_number_past_vales * 3, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + Ki=12, + number_past_vals=number_past_vals) + + elif wrapper_mode == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_past_vals=used_number_past_vales) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, + recorder=mongo_recorder, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, + penalty_P_weight=0) # , use_past_vals=True, number_past_vals=30) + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + # model2 = DDPG.load(model_path + f'model.zip') # , env=env_test) + print('Before load') + + model = DDPG.load(model_path + f'{used_model}') # , env=env_test) + + print('After load') + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + return_sum = 0.0 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + rew_list = [] + v_d = [] + v_q = [] + v_0 = [] + action_P0 = [] + action_P1 = [] + action_P2 = [] + action_I0 = [] + action_I1 = [] + action_I2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + R_load = [] + + ####### Run Test ######### + # agent ~ PI Controllerv using env + # model ~ RL Controller using env_test + # Both run in the same loop + + obs = env_test.reset() + + for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + action, _states = model.predict(obs, deterministic=True) + if step == 988: + asd = 1 + obs, rewards, done, info = env_test.step(action) + action_P0.append(np.float64(action[0])) + action_P1.append(np.float64(action[1])) + action_P2.append(np.float64(action[2])) + if wrapper_mode not in ['no-I-term', 'I-controller']: + action_I0.append(np.float64(action[3])) + action_I1.append(np.float64(action[4])) + action_I2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + # print(rewards) + if done: + env_test.close() + + # print(limit_exceeded_in_test) + break + + env_test.close() + + v_a = env_test.history.df['lc.capacitor1.v'] + v_b = env_test.history.df['lc.capacitor2.v'] + v_c = env_test.history.df['lc.capacitor3.v'] + i_a = env_test.history.df['lc.inductor1.i'] + i_b = env_test.history.df['lc.inductor2.i'] + i_c = env_test.history.df['lc.inductor3.i'] + R_load = (env_test.history.df['r_load.resistor1.R'].tolist()) + phase = env_test.history.df['inverter1.phase.0'] # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + + i_d = i_dq0[0].tolist() + i_q = i_dq0[1].tolist() + i_0 = i_dq0[2].tolist() + v_d = (v_dq0[0].tolist()) + v_q = (v_dq0[1].tolist()) + v_0 = (v_dq0[2].tolist()) + """ + plt.plot(v_d_PI, 'b') + plt.plot(v_q_PI, 'r') + plt.plot(v_0_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title('PI') + plt.show() + + plt.plot(rew_list_PI) + plt.xlabel("") + plt.grid() + plt.ylabel("Reward") + plt.title('PI') + plt.show() + + plt.plot(R_load_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('Test') + plt.show() + """ + + plt.plot(v_d, 'b') + plt.plot(v_q, 'r') + plt.plot(v_0, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title(f'DDPG - {used_model}') + plt.show() + + plt.plot(rew_list) + plt.xlabel("") + plt.grid() + plt.ylabel("Reward") + plt.title(f'DDPG - {used_model}') + plt.show() + + plt.plot(R_load, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('DDPG') + plt.show() + + plt.plot(integrator_sum0) + plt.plot(integrator_sum0) + plt.plot(integrator_sum0) + plt.ylabel('intergratorzustand') + plt.show() + + plt.plot(action_I0) + plt.plot(action_I1) + plt.plot(action_I2) + plt.ylabel('action I') + plt.show() + + plt.plot(action_P0) + plt.plot(action_P1) + plt.plot(action_P2) + plt.ylabel('action P') + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}') + # print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}') + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "model name": model_name, + "Wrapper": wrapper, + "used_number_past_vales": used_number_past_vales, + "time": ts, + "ActionP0": action_P0, + "ActionP1": action_P1, + "ActionP2": action_P2, + "ActionI0": action_I0, + "ActionI1": action_I1, + "ActionI2": action_I2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "DDPG_model_path": model_path, + "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "Reward DDPG": rew_list, + "env_hist_DDPG": env_test.env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "execution of RL agent on 10 s test case-loading values", + "optimization node": 'Thinkpad', + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps') + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +# df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 diff --git a/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py b/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py new file mode 100644 index 00000000..40fa282e --- /dev/null +++ b/experiments/hp_tune/visualize_tests/CompareModelsPlotting_noNoise.py @@ -0,0 +1,267 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +make_pyplot = False +show_load = True +interval_plt = True + +# interval_list_x = [[0, 0.01], [0.01, 1.0], [0.78, 0.9]] +# interval_list_y = [[-25, 210], [-40, 210], [165, 175]] + + +# Fuer den Detzerministc case +interval_list_x = [[0, 0.01], [0.105, 0.2], [0.695, 0.71], [0.85, 0.88]] +interval_list_y = [[-25, 210], [165, 175], [-25, 335], [165, 175]] + +# Fuer den 10s Fall +interval_list_x = [[0, 0.006], [2.0925, 2.1], [3.11, 3.12], [7.1, 7.14], [8.145, 8.16]] +# interval_list_x = [[0, 0.01], [2.09, 2.1], [2.11, 2.12], [7.08, 7.16], [7.145, 7.16]] +interval_list_y = [[-25, 210], [-25, 340], [160, 190], [-25, 340], [125, 340]] +# folder_name = 'saves/Comparison_study_future10Rvals_deterministicTestcase' +# folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_deterministic_noMeasNoise' +folder_names = [ + 'saves/OMG_integratorActor_3_Deterministic'] # , 'saves/OMG_i_load_feature_0_Deterministic'] # _deterministic' +folder_names = ['saves/OMG_i_load_feature_0_Deterministic'] # _deterministic' +folder_names = ['saves/paper_deterministic'] # _deterministic' +folder_names = ['saves/paper'] # _deterministic' +folder_names = ['saves/paper_new', 'saves/paper_new', 'saves/paper_new', 'saves/paper_new'] # _deterministic' + +number_of_steps = '_100000steps' + +# df = pd.read_pickle(folder_names[0] + '/PI' + number_of_steps) +df = pd.read_pickle('saves/paper_desscaR_load' + '/PI_10000steps') +# df = pd.read_pickle(folder_name + '/PI_9989steps') + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +reward_PI = df['Reward PI'][0] +return_PI = df['Return PI'][0] +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] + +model_names = ['model_OMG_DDPG_Actor.zip', 'model_OMG_DDPG_Integrator_no_pastVals_corr.zip', + 'model_OMG_DDPG_Integrator_no_pastVals.zip', + 'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip'] +ylabels = ['DDPG', 'DDPG-I', 'DDPG-I+pastVals', 'DDPG-I+i_load'] +# ylabels = ['DDPG-I+pastVals'] +# ylabels = ['DDPG-I+pastVals'] +# model_names = ['model_OMG_DDPG_Actor.zip'] # ['model_0_pastVals.zip','model_2_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ] # , 'model_noPastVals.zip'] +# model_names = ['model_OMG_DDPG_Integrator_no_pastVals.zip'] +# model_names = ['model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip'] +# model_names = ['model_OMG_DDPG_Integrator_no_pastVals_corr.zip'] +pastVals = ['5', '0', '5', '0'] # ['0', '2', '5', '10', '16', '25'] +# pastVals = ['5'] # ['0', '2', '5', '10', '16', '25'] +return_list_DDPG = [] +reward_list_DDPG = [] + +ts = 1e-4 # if ts stored: take from db + +# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist() + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() +t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist() + +# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12)) # , sharex=True) # a new figure window +fig, axs = plt.subplots(len(model_names) + 3, len(interval_list_y), + figsize=(12, 10)) # , sharex=True) # a new figure window + +for i in range(len(interval_list_y)): + plt_count = 3 + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + + for model_name, pV, folder_name, ylabel_use in zip(model_names, pastVals, folder_names, ylabels): + + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps) + #df_DDPG = pd.read_pickle(folder_name + '/' 'model_5_pastVals.zip_100000steps_NoPhaseFeature_1427') + + if i == 0: + return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7)) + # reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + + axs[0, i].plot(t_test, R_load_PI) + axs[0, i].grid() + axs[0, i].set_xlim(interval_list_x[i]) + # axs[0, i].set_ylim([15, 75]) + if i == 0: + axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$") + # ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + + DDPG_reward = df_DDPG['Reward DDPG'][0] + if plt_count == 3: + axs[1, i].plot(t_reward, reward_PI, 'b', label=f' PI: ' + f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[1, i].plot(DDPG_reward, 'r', label=f'DDPG: ' + f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[1, i].grid() + axs[1, i].set_xlim(interval_list_x[i]) + # axs[1, i].set_ylim(interval_list_y[i]) + axs[1, i].legend() + if i == 0: + axs[1, i].set_ylabel("Reward") + + axs[2, i].plot(t_test, v_d_PI, 'b', label='v_d') + axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q') + axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0') + axs[2, i].grid() + axs[2, i].set_xlim(interval_list_x[i]) + axs[2, i].set_ylim(interval_list_y[i]) + if i == 0: + axs[2, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$") + # else: + # axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$") + + axs[plt_count, i].plot(v_d_DDPG, 'b') + axs[plt_count, i].plot(v_q_DDPG, 'r') + axs[plt_count, i].plot(v_0_DDPG, 'g') + axs[plt_count, i].grid() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel(ylabel_use) + # axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + plt.show() + +fig.suptitle(f'Model using pastVals:' + str(pastVals) + ' \n ' + f'Model-return(MRE)' + str(return_list_DDPG) + ' \n' + f' PI-return(MRE): {round(return_PI, 7)} \n ' + f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', + fontsize=14) + +fig.subplots_adjust(wspace=0.2, hspace=0.2) +plt.show() + +#fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf') + +if make_pyplot: + # pyplot Load + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=R_load_PI)) # , title='R_load') + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + # pyplot PI + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_reward, y=DDPG_reward)) + plot.add_trace( + px.Scatter(x=t_reward, y=reward_PI)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + for model_name in model_names: + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + number_of_steps) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + # pyplot ddpg + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_DDPG)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + plot.add_trace( + px.Scatter(x=t_test, y=v_d_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_PI)) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + +plt.plot(t_test, v_d_DDPG, 'b') +# plt.plot(t_test, v_d_PI, 'r') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0, 0.025]) +plt.ylim([160, 190]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG") +plt.title(f'DDPG') +plt.show() + +plt.plot(t_test, v_d_PI, 'r') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0, 0.025]) +plt.ylim([160, 190]) +plt.xlabel("time") +plt.ylabel("v_dq0_PI") +plt.title(f'PI') +plt.show() + +plt.plot(t_test, v_d_DDPG, 'b') +# plt.plot(t_test, v_d_PI, 'r') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +plt.xlim([0.1, 0.11]) +plt.ylim([290, 360]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG") +plt.title(f'DDPG') +plt.show() + +plt.plot(t_test, v_d_PI, 'r') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +plt.xlim([0.1, 0.2]) +plt.ylim([290, 360]) +plt.xlabel("time") +plt.ylabel("v_dq0_PI") +plt.title(f'PI') +plt.show() diff --git a/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py b/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py new file mode 100644 index 00000000..72f473b2 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/CompareModels_noNoise.py @@ -0,0 +1,803 @@ +print('Start script') + +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals, FeatureWrapper_futureVals, \ + BaseWrapper, FeatureWrapper_I_controller +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# imports for PIPI +from experiments.hp_tune.env.random_load import RandomLoad +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +import gym + +# np.random.seed(0) + +show_plots = True +save_results = False + +# folder_name = 'saves/OMG_DDPGActor_wo_integrator_butPastVals_3_Deterministic' # cfg['STUDY_NAME'] +folder_name = 'saves/paper' # cfg['STUDY_NAME'] +# folder_name = 'saves/OMG_i_load_feature_0_Deterministic' # cfg['STUDY_NAME'] +node = platform.uname().node + +# model_name = 'model_retrain_pastVals12.zip' +number_past_vals = [5] # , 5, 0, 0] # [0, 5, 10, 16, 25] # [30, 0] +# use_past_vals = [True] # [False, True, True, True, True] # [True, False] +wrapper = ['past'] #, 'no-I-term', 'past', 'i_load'] # ['past', 'future', 'no-I-term', 'I-controller'] + +# model_name = ['model.zip'] +# model_path = 'OMG_Integrator_Actor_i_load_feature_2/1/' +# model_path = 'OMG_DDPG_Actor/3/' +model_path = 'experiments/hp_tune/trained_models/paper/' +model_path = 'experiments/hp_tune/trained_models/NoPhaseFeature_1427/' +# model_path = 'OMG_Integrator_Actor/32/' + +model_name = ['model_OMG_DDPG_Integrator_no_pastVals.zip', 'model_OMG_DDPG_Actor.zip', + 'model_OMG_DDPG_Integrator_no_pastVals_corr.zip', + 'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip'] + +model_name = ['model_5_pastVals.zip'] +# model_name = ['model.zip'] +################DDPG Config Stuff######################################################################### +gamma = 0.946218 +integrator_weight = 0.311135 +antiwindup_weight = 0.660818 +error_exponent = 0.5 +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 4 +alpha_relu_actor = 0.208098 +alpha_relu_critic = 0.00678497 +""" +################DDPG Config Stuff######################################################################### +gamma = 0.984421 # 0.946218 +integrator_weight = 0 # 0.311135 +antiwindup_weight = 0 # 0.660818 +error_exponent = 0.5 +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 3 # 4 +alpha_relu_actor = 0.0034719 # 0.208098 +alpha_relu_critic = 0.00613757 # 0.00678497 + +print('HPs für DDPG ohne I-Anteil!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!') +""" +mongo_recorder = Recorder(node=node, database_name=folder_name) + +num_average = 1 +max_episode_steps_list = [100000] # [1000, 5000, 10000, 20000, 50000, 100000] + +data_str = 'experiments/hp_tune/data/R_load_deterministic_test_case2_1_seconds.pkl' +data_str = 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl' +# data_str = 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl' + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +#################PI Config stuff############################################################################## + +current_directory = os.getcwd() +# folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +# max_episode_steps = 1002 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +""" +print("P10 stuff!") +L_filter = 70e-6 # / H +R_filter = 1.1e-3 # / Ohm +C_filter = 250e-6 # / F +""" + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# kp_v = 0.002 +# ki_v = 143 + +# Old optimized parameters: +kp_v = 0 # 0.0095 # 0.0 +ki_v = 182 # 173.22 # 200 +kp_c = 0.0308 # 0.0404 # 0.04 +ki_c = 13.3584 # 4.065 # 11.8 + +""" +#P10: +print('using p10 setting') +kp_v = 0.2972 +ki_v = 142.7 +kp_c = 0.00068 +ki_c = 0.731 +""" +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-i_lim * 10, i_lim * 10)) + +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + + +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ####################################PI Stuff################################################ + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_hard_test_case_60_seconds_noReset.pkl')) + # 'experiments/hp_tune/data/R_load_deterministic_test_case_25_ohm_1_seconds.pkl')) + data_str)) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + + # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + # on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + rew.gamma = 0 + return_sum_PI = 0.0 + rew_list_PI = [] + v_d_PI = [] + v_q_PI = [] + v_0_PI = [] + R_load_PI = [] + limit_exceeded_in_test_PI = False + limit_exceeded_penalty_PI = 0 + """ + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + agent_fig = None + obs_PI = env.reset() + + for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + agent.observe(None, False) + act_PI = agent.act(obs_PI) + obs_PI, r_PI, done_PI, info_PI = env.step(act_PI) + rew_list_PI.append(r_PI) + env.render() + return_sum_PI += r_PI + if r_PI == -1 and not limit_exceeded_in_test_PI: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test_PI = True + limit_exceeded_penalty_PI = -1 + + _, env_fig = env.close() + agent.observe(r_PI, done_PI) + + v_a_PI = env.history.df['lc.capacitor1.v'] + v_b_PI = env.history.df['lc.capacitor2.v'] + v_c_PI = env.history.df['lc.capacitor3.v'] + i_a_PI = env.history.df['lc.inductor1.i'] + i_b_PI = env.history.df['lc.inductor2.i'] + i_c_PI = env.history.df['lc.inductor3.i'] + R_load_PI = (env.history.df['r_load.resistor1.R'].tolist()) + phase_PI = env.history.df['inverter1.phase.0'] # env.net.components[0].phase + + i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) + + i_d_PI = i_dq0_PI[0].tolist() + i_q_PI = i_dq0_PI[1].tolist() + i_0_PI = i_dq0_PI[2].tolist() + v_d_PI = (v_dq0_PI[0].tolist()) + v_q_PI = (v_dq0_PI[1].tolist()) + v_0_PI = (v_dq0_PI[2].tolist()) + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "time": ts, + "PI_Kp_c": kp_c, + "PI_Ki_c": ki_c, + "PI_Kp_v": kp_v, + "PI_Ki_v": ki_v, + "DDPG_model_path": model_path, + "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI), + "Reward PI": rew_list_PI, + "env_hist_PI": env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "PI result for comparison with RL agent", + "optimization node": 'Thinkpad', + "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2' + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps') + """ + ####################################DDPG Stuff############################################## + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + + net = Network.load('net/net_vctrl_single_inv_dq0.yaml') # is used from vctrl_single_env, not needed here + + for used_model, wrapper_mode, used_number_past_vales in zip(model_name, wrapper, number_past_vals): + + if wrapper_mode == 'i_load': + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2' # ], + , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + else: + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + # , 'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + ) + + if wrapper_mode in ['past', 'i_load']: + env_test = FeatureWrapper_pastVals(env_test, number_of_features=9 + used_number_past_vales * 3, + # training_episode_length=training_episode_length, (da aus pickle!) + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + number_past_vals=used_number_past_vales) + + elif wrapper_mode == 'future': + env_test = FeatureWrapper_futureVals(env_test, number_of_features=9, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, number_future_vals=10, + future_data=data_str) + + elif wrapper_mode == 'I-controller': + env_test = FeatureWrapper_I_controller(env_test, number_of_features=14 + used_number_past_vales * 3, + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + Ki=12, + number_past_vals=number_past_vals) + + elif wrapper_mode == 'no-I-term': + env_test = BaseWrapper(env_test, number_of_features=6 + used_number_past_vales * 3, + recorder=mongo_recorder, n_trail=n_trail, gamma=gamma, + number_past_vals=used_number_past_vales) + + else: + env_test = FeatureWrapper(env_test, number_of_features=11, + recorder=mongo_recorder, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, + penalty_P_weight=0) # , use_past_vals=True, number_past_vals=30) + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + # model2 = DDPG.load(model_path + f'model.zip') # , env=env_test) + model = DDPG.load(model_path + f'{used_model}') #, env=env_test) + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + if wrapper_mode not in ['no-I-term', 'I-controller']: + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + return_sum = 0.0 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + rew_list = [] + v_d = [] + v_q = [] + v_0 = [] + action_P0 = [] + action_P1 = [] + action_P2 = [] + action_I0 = [] + action_I1 = [] + action_I2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + R_load = [] + + ####### Run Test ######### + # agent ~ PI Controllerv using env + # model ~ RL Controller using env_test + # Both run in the same loop + + obs = env_test.reset() + + for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + action, _states = model.predict(obs, deterministic=True) + if step == 988: + asd = 1 + obs, rewards, done, info = env_test.step(action) + action_P0.append(np.float64(action[0])) + action_P1.append(np.float64(action[1])) + action_P2.append(np.float64(action[2])) + if wrapper_mode not in ['no-I-term', 'I-controller']: + action_I0.append(np.float64(action[3])) + action_I1.append(np.float64(action[4])) + action_I2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + # print(rewards) + if done: + env_test.close() + + # print(limit_exceeded_in_test) + break + + env_test.close() + + v_a = env_test.history.df['lc.capacitor1.v'] + v_b = env_test.history.df['lc.capacitor2.v'] + v_c = env_test.history.df['lc.capacitor3.v'] + i_a = env_test.history.df['lc.inductor1.i'] + i_b = env_test.history.df['lc.inductor2.i'] + i_c = env_test.history.df['lc.inductor3.i'] + R_load = (env_test.history.df['r_load.resistor1.R'].tolist()) + phase = env_test.history.df['inverter1.phase.0'] # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + + i_d = i_dq0[0].tolist() + i_q = i_dq0[1].tolist() + i_0 = i_dq0[2].tolist() + v_d = (v_dq0[0].tolist()) + v_q = (v_dq0[1].tolist()) + v_0 = (v_dq0[2].tolist()) + + plt.plot(v_d_PI, 'b') + plt.plot(v_q_PI, 'r') + plt.plot(v_0_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title('PI') + plt.show() + + plt.plot(rew_list_PI) + plt.xlabel("") + plt.grid() + plt.ylabel("Reward") + plt.title('PI') + plt.show() + + plt.plot(R_load_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('Test') + plt.show() + + plt.plot(v_d, 'b') + plt.plot(v_q, 'r') + plt.plot(v_0, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title(f'DDPG - {used_model}') + plt.show() + + plt.plot(rew_list) + plt.xlabel("") + plt.grid() + plt.ylabel("Reward") + plt.title(f'DDPG - {used_model}') + plt.show() + + plt.plot(R_load, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('DDPG') + plt.show() + + plt.plot(integrator_sum0) + plt.plot(integrator_sum0) + plt.plot(integrator_sum0) + plt.ylabel('intergratorzustand') + plt.show() + + plt.plot(action_I0) + plt.plot(action_I1) + plt.plot(action_I2) + plt.ylabel('action I') + plt.show() + + plt.plot(action_P0) + plt.plot(action_P1) + plt.plot(action_P2) + plt.ylabel('action P') + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}') + print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}') + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "model name": model_name, + "Wrapper": wrapper, + "used_number_past_vales": used_number_past_vales, + "time": ts, + "ActionP0": action_P0, + "ActionP1": action_P1, + "ActionP2": action_P2, + "ActionI0": action_I0, + "ActionI1": action_I1, + "ActionI2": action_I2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "DDPG_model_path": model_path, + "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "Reward DDPG": rew_list, + "env_hist_DDPG": env_test.env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "execution of RL agent on 10 s test case-loading values", + "optimization node": 'Thinkpad', + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle( + f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps_NoPhaseFeature_1427') + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +# df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 diff --git a/experiments/hp_tune/visualize_tests/Paper_plts.py b/experiments/hp_tune/visualize_tests/Paper_plts.py new file mode 100644 index 00000000..04f86523 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/Paper_plts.py @@ -0,0 +1,223 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import matplotlib + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +save_results = True + +# Fuer den 10s Fall +interval_list_x = [[0, 0.005], [7.145, 7.155]] +interval_list_y = [[-5, 202], [-20, 345]] + +folder_name = 'saves/paper_new2' # _deterministic' + +number_of_steps = '_100000steps' + +df = pd.read_pickle(folder_name + '/PI' + number_of_steps) + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist() +i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist() +i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist() +i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) +i_d_PI = (i_dq0_PI[0].tolist()) +i_q_PI = (i_dq0_PI[1].tolist()) +i_0_PI = (i_dq0_PI[2].tolist()) + +reward_PI = df['Reward PI'][0] +return_PI = df['Return PI'][0] +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] + +model_names = ['model_OMG_DDPG_Actor.zip', + 'model_OMG_DDPG_Integrator_no_pastVals.zip'] +ylabels = ['DDPG', 'DDPG-I'] + +return_list_DDPG = [] +reward_list_DDPG = [] + +ts = 1e-4 # if ts stored: take from db + +v_d_ref = [169.7] * len(v_0_PI) + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() +t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist() + +# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y), +fig, axs = plt.subplots(3, len(interval_list_y), + figsize=(9, 7)) # , sharex=True) # a new figure window + +for i in range(len(interval_list_y)): + plt_count = 4 + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + + df_DDPG = pd.read_pickle(folder_name + '/' + model_names[0] + number_of_steps) + + if i == 0: + return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7)) + # reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist() + i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist() + i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + i_d_DDPG = (i_dq0[0].tolist()) + i_q_DDPG = (i_dq0[1].tolist()) + i_0_DDPG = (i_dq0[2].tolist()) + + DDPG_reward = df_DDPG['Reward DDPG'][0] + + df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[1] + number_of_steps) + + if i == 0: + return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7)) + # reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + + env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG'] + + v_a_I = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist() + v_b_I = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist() + v_c_I = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist() + i_a_I = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist() + i_b_I = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist() + i_c_I = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist() + phase_I = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0_I = abc_to_dq0(np.array([v_a_I, v_b_I, v_c_I]), phase_I) + i_dq0_I = abc_to_dq0(np.array([i_a_I, i_b_I, i_c_I]), phase_I) + v_d_DDPG_I = (v_dq0_I[0].tolist()) + v_q_DDPG_I = (v_dq0_I[1].tolist()) + v_0_DDPG_I = (v_dq0_I[2].tolist()) + i_d_DDPG_I = (i_dq0_I[0].tolist()) + i_q_DDPG_I = (i_dq0_I[1].tolist()) + i_0_DDPG_I = (i_dq0_I[2].tolist()) + + DDPG_reward_I = df_DDPG_I['Reward DDPG'][0] + + axs[2, i].plot(t_reward, reward_PI, 'b', label=f' PI: ' + f'{round(sum(reward_PI[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[2, i].plot(t_reward, DDPG_reward, 'r', label=f' DDPG: ' + f'{round(sum(DDPG_reward[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + axs[2, i].plot(t_reward, DDPG_reward_I, 'g', label=f'DDPG+I: ' + f'{round(sum(DDPG_reward_I[int(interval_list_x[i][0] / ts):int(interval_list_x[i][1] / ts)]) / ((interval_list_x[i][1] - interval_list_x[i][0]) / ts), 4)}') + + axs[2, i].grid() + axs[2, i].set_xlim(interval_list_x[i]) + # axs[1, i].set_ylim(interval_list_y[i]) + axs[2, i].legend() + axs[2, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + axs[2, i].set_ylabel("Reward") + + axs[0, i].plot(t_test, v_d_ref, '--', color='gray') + axs[0, i].plot(t_test, v_d_PI, 'b', label='PI') + axs[0, i].plot(t_test, v_d_DDPG, 'r', label='DDPG') + axs[0, i].plot(t_test, v_d_DDPG_I, 'g', label='DDPG+I') + # axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q') + # axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0') + axs[0, i].grid() + axs[0, i].legend() + axs[0, i].set_xlim(interval_list_x[i]) + axs[0, i].set_ylim(interval_list_y[i]) + if i == 0: + axs[0, i].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") + # else: + # axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$") + + axs[1, i].plot(t_test, i_d_PI, 'b', label='PI') + axs[1, i].plot(t_test, i_d_DDPG, 'r', label='DDPG') + axs[1, i].plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I') + # axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q') + # axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0') + axs[1, i].grid() + # axs[1, i].legend() + axs[1, i].set_xlim(interval_list_x[i]) + # axs[3, i].set_ylim(interval_list_y[i]) + if i == 0: + axs[1, i].set_ylim([0, 15]) + axs[1, i].set_ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$") + + """ + axs[plt_count, i].plot(t_test, v_d_DDPG, 'b') + axs[plt_count, i].plot(t_test, v_q_DDPG, 'r') + axs[plt_count, i].plot(t_test, v_0_DDPG, 'g') + axs[plt_count, i].grid() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + + axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + # else: + # axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + + axs[plt_count, i].plot(t_test, i_d_DDPG, 'b') + axs[plt_count, i].plot(t_test, i_q_DDPG, 'r') + axs[plt_count, i].plot(t_test, i_0_DDPG, 'g') + axs[plt_count, i].grid() + axs[plt_count, i].set_xlim(interval_list_x[i]) + #axs[plt_count, i].set_ylim(interval_list_y[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + axs[plt_count, i].set_ylabel("$i_{\mathrm{dq0, DDPG}}\,/\,\mathrm{A}$") + """ + +# fig.suptitle(f'Model using pastVals:' + str(14) + ' \n ' +# f'Model-return(MRE)' + str(return_list_DDPG) + ' \n' +# f' PI-return(MRE): {round(return_PI, 7)} \n ' +# f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', +# fontsize=14) + +fig.subplots_adjust(wspace=0.2, hspace=0.2) +plt.show() + +fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf') +if save_results: + # Plot setting + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 8, # fontsize for x and y labels (was 10) + 'axes.titlesize': 8, + 'font.size': 8, # was 10 + 'legend.fontsize': 8, # was 10 + 'xtick.labelsize': 8, + 'ytick.labelsize': 8, + 'text.usetex': True, + 'figure.figsize': [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + + fig.savefig(f'{folder_name}/_OMG_U_I.png') + fig.savefig(f'{folder_name}/_OMG_U_I.pdf') + fig.savefig(f'{folder_name}/_OMG_U_I.pgf') diff --git a/experiments/hp_tune/visualize_tests/mongoDB_plt.py b/experiments/hp_tune/visualize_tests/mongoDB_plt.py new file mode 100644 index 00000000..351d9523 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/mongoDB_plt.py @@ -0,0 +1,206 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +plt_train = True + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +# db_name = 'DDPG_SplitActor_Best_study18_6462' +db_name = 'P10_setting_best_study22_clipped_abort_newReward_design' +trial = '0' +show_episode_number = 19 + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + trial = db.Trial_number_43 + + train_data = trial.find_one({"Name": "After_Training"}) + train_episode_data = trial.find_one({"Episode_number": show_episode_number}) + trial_config = trial.find_one({"Name": "Config"}) + + ts = 1e-4 # if ts stored: take from db + # t_test = np.arange(0, len(trial_test['lc_capacitor1_v']) * ts, ts).tolist() + + if train_episode_data is not None: + # only available if loglevel == 'train' + ############################################################## + # Plot example Training Episode + R_load = train_episode_data['R_load_training'] + i_a = train_episode_data['i_a_training'] + i_b = train_episode_data['i_b_training'] + i_c = train_episode_data['i_c_training'] + v_a = train_episode_data['v_a_training'] + v_b = train_episode_data['v_b_training'] + v_c = train_episode_data['v_c_training'] + reward = train_episode_data['Rewards'] + phase = train_episode_data['Phase'] + + plt.plot(R_load) + plt.grid() + plt.xlabel("steps") + plt.ylabel("R_load") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(i_a) + plt.plot(i_b) + plt.plot(i_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("i_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(v_a) + plt.plot(v_b) + plt.plot(v_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(reward) + plt.grid() + plt.xlabel("steps") + plt.ylabel("Reward") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['Integrator0']) + plt.plot(train_episode_data['Integrator1']) + plt.plot(train_episode_data['Integrator2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("Int Zustand") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['actionP0']) + plt.plot(train_episode_data['actionP1']) + plt.plot(train_episode_data['actionP2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("actionP") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(train_episode_data['actionI0']) + plt.plot(train_episode_data['actionI1']) + plt.plot(train_episode_data['actionI2']) + plt.grid() + plt.xlabel("steps") + plt.ylabel("actionI") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + df = pd.DataFrame() + df['R_load'] = R_load + + hist = df['R_load'].hist(bins=50) + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + """ + plot = px.Figure() + plot.add_trace( + px.Scatter(y=R_load) + """ + # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0']) + # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1']) + # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2']) + + # df2['phase'] = pd.DataFrame(test_data['Phase']) + + # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase'])) + + v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase)) + + # x = df2['t'] + v_d = v_mess_dq0[0][:] # df2['v_a'] + v_q = v_mess_dq0[1][:] # df2['v_b'] + v_0 = v_mess_dq0[2][:] # df2['v_c'] + + plt.plot(v_d) + plt.plot(v_q) + plt.plot(v_0) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_dq0") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:] + # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:] + # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:] + + plot = px.Figure() + plot.add_trace( + px.Scatter(y=v_a)) + + plot.add_trace( + px.Scatter(y=v_b)) + + plot.add_trace( + px.Scatter(y=v_c)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + ############################################################## + # After Training + + train_reward_per_episode = train_data['Mean_eps_reward'] + + ax = plt.plot(train_reward_per_episode) + plt.grid() + plt.xlabel("Episodes") + # plt.yscale('log') + plt.ylabel("Mean episode Reward") + # plt.ylim([-0.06, -0.025]) + # plt.title("1.000.000") + plt.show() + + if True: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=train_reward_per_episode)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() diff --git a/experiments/hp_tune/visualize_tests/old/CompareModels.py b/experiments/hp_tune/visualize_tests/old/CompareModels.py new file mode 100644 index 00000000..a26038c2 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/old/CompareModels.py @@ -0,0 +1,633 @@ +import logging +import os +import platform +import time +from functools import partial + +import GPy +import matplotlib.pyplot as plt +import numpy as np +import torch as th +from stable_baselines3 import DDPG +from stochastic.processes import VasicekProcess +from tqdm import tqdm +# imports net to define reward and executes script to register experiment +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise + +# from agents.my_ddpg import myDDPG +from experiments.hp_tune.env.env_wrapper import FeatureWrapper, FeatureWrapper_pastVals +from experiments.hp_tune.env.rewards import Reward +from experiments.hp_tune.env.vctrl_single_inv import net # , folder_name +from experiments.hp_tune.util.config import cfg +from experiments.hp_tune.util.recorder import Recorder + +# imports for PIPI +from experiments.hp_tune.env.random_load import RandomLoad +from openmodelica_microgrid_gym.agents import SafeOptAgent +from openmodelica_microgrid_gym.agents.util import MutableFloat +from openmodelica_microgrid_gym.aux_ctl import PI_params, DroopParams, \ + MultiPhaseDQ0PIPIController +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.env.plotmanager import PlotManager +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import FullHistory, RandProcess + +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +import gym + +# np.random.seed(0) + +show_plots = True +save_results = False + +folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_oldtestEnv_PI_SP_corr' # cfg['STUDY_NAME'] +node = platform.uname().node + +# model_name = 'model_retrain_pastVals12.zip' +number_past_vals = [2] # [0, 5, 10, 16, 25] # [30, 0] +use_past_vals = [True] # [False, True, True, True, True] # [True, False] +# model_name = ['model.zip'] +model_path = 'experiments/hp_tune/trained_models/study_22_best_pastVal_HPO_oldtestEnv/' +# model_path = 'experiments/hp_tune/trained_models/study_22_best_iLoad_Feature/' + +model_name = [ + 'model_2_pastVals.zip'] # ['model_0_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ] # , 'model_noPastVals.zip'] + +error_exponent = 0.5 + +mongo_recorder = Recorder(node=node, database_name=folder_name) + +num_average = 1 +max_episode_steps_list = [20000] # [1000, 5000, 10000, 20000, 50000, 100000] + +result_list = [] +ret_list = [] +mean_list = [] +std_list = [] +ret_array = np.zeros(num_average) + +df = pd.DataFrame() +ret_dict = dict() + +#################PI Config stuff############################################################################## + +current_directory = os.getcwd() +# folder_name = 'Pipi_safeopt_best_run4d' +save_folder = os.path.join(current_directory, folder_name) +os.makedirs(save_folder, exist_ok=True) + +# Simulation definitions +# net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +net = Network.load('net/net_vctrl_single_inv.yaml') +delta_t = 1e-4 # simulation time step size / s +undersample = 1 +# max_episode_steps = 1002 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from +DroopGain = 0.0 # virtual droop gain for active power / W/Hz +QDroopGain = 0.0 # virtual droop gain for reactive power / VAR/V + +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F + +lower_bound_load = -10 # to allow maximal load that draws i_limit +upper_bound_load = 200 # to apply symmetrical load bounds +lower_bound_load_clip = 14 # to allow maximal load that draws i_limit (let exceed?) +upper_bound_load_clip = 200 # to apply symmetrical load bounds +lower_bound_load_clip_std = 2 +upper_bound_load_clip_std = 0 +##################################### +# Definitions for the GP +prior_mean = 0 # 2 # mean factor of the GP prior mean which is multiplied with the first performance of the initial set +noise_var = 0.001 # ** 2 # measurement noise sigma_omega +prior_var = 2 # prior variance of the GP + +bounds = [(0.000, 0.045), (4, 450)] # bounds on the input variable current-Ki&Kp and voltage-Ki&Kp +lengthscale = [.003, 50.] # length scale for the parameter variation [current-Ki&Kp and voltage-Ki&Kp] for the GP + +safe_threshold = 0 +j_min = -5 # cal min allowed performance + +explore_threshold = 0 + +# Factor to multiply with the initial reward to give back an abort_reward-times higher negative reward in case of +# limit exceeded +abort_reward = 100 * j_min + +# Definition of the kernel +kernel = GPy.kern.Matern32(input_dim=len(bounds), variance=prior_var, lengthscale=lengthscale, ARD=True) + +##################################### +# Definition of the controllers +# kp_v = 0.002 +# ki_v = 143 +kp_v = 0 # 0.0095 # 0.0 +ki_v = 182 # 173.22 # 200 +# Choose Kp and Ki for the current and voltage controller as mutable parameters +mutable_params = dict(voltageP=MutableFloat(kp_v), voltageI=MutableFloat(ki_v)) # 300Hz +# mutable_params = dict(voltageP=MutableFloat(0.016), voltageI=MutableFloat(105)) # 300Hz +voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], + limits=(-i_lim, i_lim)) + +kp_c = 0.0308 # 0.0404 # 0.04 +ki_c = 13.3584 # 4.065 # 11.8 +current_dqp_iparams = PI_params(kP=kp_c, kI=ki_c, limits=(-1, 1)) # Current controller values +droop_param = DroopParams(DroopGain, 0.005, net.freq_nom) +qdroop_param = DroopParams(QDroopGain, 0.002, net.v_nom) + +ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, + ts_sim=delta_t, + ts_ctrl=undersample * delta_t, + name='master') + +agent = SafeOptAgent(mutable_params, + abort_reward, + j_min, + kernel, + dict(bounds=bounds, noise_var=noise_var, prior_mean=prior_mean, + safe_threshold=safe_threshold, explore_threshold=explore_threshold), + [ctrl], + dict(master=[[f'lc.inductor{k}.i' for k in '123'], + [f'lc.capacitor{k}.v' for k in '123'] + ]), + history=FullHistory(), + ) + +################DDPG Config Stuff######################################################################### +gamma = 0.946218 +integrator_weight = 0.311135 +antiwindup_weight = 0.660818 +error_exponent = error_exponent +use_gamma_in_rew = 1 +n_trail = 50001 +actor_number_layers = 2 +critic_number_layers = 4 +alpha_relu_actor = 0.208098 +alpha_relu_critic = 0.00678497 + +for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + for ave_run in tqdm(range(num_average), desc='steps', unit='step', leave=False): + + rew = Reward(net.v_nom, net['inverter1'].v_lim, net['inverter1'].v_DC, gamma, + use_gamma_normalization=use_gamma_in_rew, error_exponent=error_exponent, + i_lim=net['inverter1'].i_lim, + i_nom=net['inverter1'].i_nom) + + ####################################PI Stuff################################################ + R = np.random.uniform(low=lower_bound_load, high=upper_bound_load) + + gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=800, vol=40, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + rand_load_train = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + bounds=(lower_bound_load_clip, upper_bound_load_clip), + bounds_std=(lower_bound_load_clip_std, upper_bound_load_clip_std)) + rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_tenLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + # 'experiments/hp_tune/data/R_load_oneLoadstepPerEpisode2881Len_test_case_10_seconds.pkl')) + 'experiments/hp_tune/data/R_load_hard_test_case_10_seconds.pkl')) + + cb = CallbackList() + # set initial = None to reset load random in range of bounds + cb.append(partial(gen.reset)) # , initial=np.random.uniform(low=lower_bound_load, high=upper_bound_load))) + cb.append(rand_load_train.reset) + + plotter = PlotManager(agent, save_results=save_results, save_folder=save_folder, + show_plots=show_plots) + + + # rand_load_test = RandomLoad(max_episode_steps_list[max_eps_steps], net.ts, gen, + # load_curve=pd.read_pickle( + # 'experiments/hp_tune/data/R_load_test_case_2_seconds.pkl')) + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + # ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + ts = time.gmtime() + # fig.savefig(f'{save_folder}/Load{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + # fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + # ax.set_xlim([0, 0.005]) + ts = time.gmtime() + # fig.savefig( + # f'{folder_name + experiment_name}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + if show_plots: + plt.show() + else: + plt.close() + + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun_PIPI_MRE, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'master.SPV{i}' for i in 'abc']], + callback=plotter.xylables_v_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'master.CVV{i}' for i in 'dq0'], [f'master.SPV{i}' for i in 'dq0']], + callback=plotter.xylables_v_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'master.SPI{i}' for i in 'abc']], + callback=plotter.xylables_i_abc, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ), + PlotTmpl([[f'master.CVi{i}' for i in 'dq0'], [f'master.SPI{i}' for i in 'dq0']], + callback=plotter.xylables_i_dq0, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps_list[max_eps_steps], + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor1.R'), + 'r_load.resistor2.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor2.R'), + 'r_load.resistor3.R': partial(rand_load_test.give_dataframe_value, + col='r_load.resistor3.R'), + # 'lc.capacitor1.v': 0, + # 'lc.capacitor2.v': 0, + # 'lc.capacitor3.v': 0, + # 'lc.inductor1.i': 0, + # 'lc.inductor2.i': 0, + # 'lc.inductor3.i': 0, + }, + net=net, + model_path='omg_grid/grid.paper_loadstep.fmu', + history=FullHistory(), + # on_episode_reset_callback=cb.fire, + action_time_delay=1 * undersample + ) + + rew.gamma = 0 + return_sum_PI = 0.0 + rew_list_PI = [] + v_d_PI = [] + v_q_PI = [] + v_0_PI = [] + R_load_PI = [] + limit_exceeded_in_test_PI = False + limit_exceeded_penalty_PI = 0 + + agent.reset() + agent.obs_varnames = env.history.cols + env.history.cols = env.history.structured_cols(None) + agent.measurement_cols + env.measure = agent.measure + agent_fig = None + obs_PI = env.reset() + + for step in tqdm(range(env.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + agent.observe(None, False) + act_PI = agent.act(obs_PI) + obs_PI, r_PI, done_PI, info_PI = env.step(act_PI) + rew_list_PI.append(r_PI) + env.render() + return_sum_PI += r_PI + if r_PI == -1 and not limit_exceeded_in_test_PI: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test_PI = True + limit_exceeded_penalty_PI = -1 + + _, env_fig = env.close() + agent.observe(r_PI, done_PI) + + v_a_PI = env.history.df['lc.capacitor1.v'] + v_b_PI = env.history.df['lc.capacitor2.v'] + v_c_PI = env.history.df['lc.capacitor3.v'] + i_a_PI = env.history.df['lc.inductor1.i'] + i_b_PI = env.history.df['lc.inductor2.i'] + i_c_PI = env.history.df['lc.inductor3.i'] + R_load_PI = (env.history.df['r_load.resistor1.R'].tolist()) + phase_PI = env.history.df['inverter1.phase.0'] # env.net.components[0].phase + + i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) + v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) + + i_d_PI = i_dq0_PI[0].tolist() + i_q_PI = i_dq0_PI[1].tolist() + i_0_PI = i_dq0_PI[2].tolist() + v_d_PI = (v_dq0_PI[0].tolist()) + v_q_PI = (v_dq0_PI[1].tolist()) + v_0_PI = (v_dq0_PI[2].tolist()) + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "time": ts, + "PI_Kp_c": kp_c, + "PI_Ki_c": ki_c, + "PI_Kp_v": kp_v, + "PI_Ki_v": ki_v, + "DDPG_model_path": model_path, + "Return PI": (return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI), + "env_hist_PI": env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "PI result for comparison with RL agent", + "optimization node": 'Thinkpad', + "optimization folder name": 'Pipi_new_testcase_opt_4d_undsafe_2' + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/PI_{max_episode_steps_list[max_eps_steps]}steps') + + ####################################DDPG Stuff############################################## + + rew.gamma = 0 + # episodes will not abort, if limit is exceeded reward = -1 + rew.det_run = True + rew.exponent = 0.5 # 1 + + net = Network.load('net/net_vctrl_single_inv_dq0.yaml') # is used from vctrl_single_env, not needed here + + for used_model, used_past_vals, used_number_past_vales in zip(model_name, use_past_vals, number_past_vals): + + env_test = gym.make('experiments.hp_tune.env:vctrl_single_inv_test-v0', + reward_fun=rew.rew_fun_dq0, + abort_reward=-1, # no needed if in rew no None is given back + # on_episode_reset_callback=cb.fire # needed? + obs_output=['lc.inductor1.i', 'lc.inductor2.i', 'lc.inductor3.i', + 'lc.capacitor1.v', 'lc.capacitor2.v', 'lc.capacitor3.v', + 'inverter1.v_ref.0', 'inverter1.v_ref.1', 'inverter1.v_ref.2'], + # ,'r_load.resistor1.i', 'r_load.resistor2.i', 'r_load.resistor3.i'], + max_episode_steps=max_episode_steps_list[max_eps_steps] + ) + + if used_past_vals: + env_test = FeatureWrapper_pastVals(env_test, number_of_features=11 + used_number_past_vales, + # training_episode_length=training_episode_length, (da aus pickle!) + recorder=mongo_recorder, n_trail=n_trail, + integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, penalty_P_weight=0, + number_past_vals=used_number_past_vales) + else: + env_test = FeatureWrapper(env_test, number_of_features=11, + recorder=mongo_recorder, integrator_weight=integrator_weight, + antiwindup_weight=antiwindup_weight, gamma=1, + penalty_I_weight=0, + penalty_P_weight=0) # , use_past_vals=True, number_past_vals=30) + + # env_test = FeatureWrapper(env_test, number_of_features=11+used_number_past_vales, integrator_weight=integrator_weight, + # recorder=mongo_recorder, antiwindup_weight=antiwindup_weight, + # gamma=1, penalty_I_weight=0, penalty_P_weight=0)#, use_past_vals=used_past_vals) + # using gamma=1 and rew_weigth=3 we get the original reward from the env without penalties + + env_test.action_space = gym.spaces.Box(low=np.full(6, -1), high=np.full(6, 1)) + + # model2 = DDPG.load(model_path + f'model.zip') # , env=env_test) + model = DDPG.load(model_path + f'{used_model}') # , env=env_test) + + count = 0 + for kk in range(actor_number_layers + 1): + + if kk < actor_number_layers: + model.actor.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + model.actor_target.mu._modules[str(count + 1)].negative_slope = alpha_relu_actor + + count = count + 2 + + count = 0 + + for kk in range(critic_number_layers + 1): + + if kk < critic_number_layers: + model.critic.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + model.critic_target.qf0._modules[str(count + 1)].negative_slope = alpha_relu_critic + + count = count + 2 + + env_test.action_space = gym.spaces.Box(low=np.full(3, -1), high=np.full(3, 1)) + + return_sum = 0.0 + limit_exceeded_in_test = False + limit_exceeded_penalty = 0 + + rew_list = [] + v_d = [] + v_q = [] + v_0 = [] + action_P0 = [] + action_P1 = [] + action_P2 = [] + action_I0 = [] + action_I1 = [] + action_I2 = [] + integrator_sum0 = [] + integrator_sum1 = [] + integrator_sum2 = [] + R_load = [] + + ####### Run Test ######### + # agent ~ PI Controllerv using env + # model ~ RL Controller using env_test + # Both run in the same loop + + obs = env_test.reset() + + for step in tqdm(range(env_test.max_episode_steps), desc='steps', unit='step', leave=False): + # for max_eps_steps in tqdm(range(len(max_episode_steps_list)), desc='steps', unit='step', leave=False): + + action, _states = model.predict(obs, deterministic=True) + obs, rewards, done, info = env_test.step(action) + action_P0.append(np.float64(action[0])) + action_P1.append(np.float64(action[1])) + action_P2.append(np.float64(action[2])) + action_I0.append(np.float64(action[3])) + action_I1.append(np.float64(action[4])) + action_I2.append(np.float64(action[5])) + integrator_sum0.append(np.float64(env_test.integrator_sum[0])) + integrator_sum1.append(np.float64(env_test.integrator_sum[1])) + integrator_sum2.append(np.float64(env_test.integrator_sum[2])) + + if rewards == -1 and not limit_exceeded_in_test: + # Set addidional penalty of -1 if limit is exceeded once in the test case + limit_exceeded_in_test = True + limit_exceeded_penalty = -1 + env_test.render() + return_sum += rewards + rew_list.append(rewards) + + # print(rewards) + if done: + env_test.close() + + # print(limit_exceeded_in_test) + break + + env_test.close() + + v_a = env_test.history.df['lc.capacitor1.v'] + v_b = env_test.history.df['lc.capacitor2.v'] + v_c = env_test.history.df['lc.capacitor3.v'] + i_a = env_test.history.df['lc.inductor1.i'] + i_b = env_test.history.df['lc.inductor2.i'] + i_c = env_test.history.df['lc.inductor3.i'] + R_load = (env_test.history.df['r_load.resistor1.R'].tolist()) + phase = env_test.history.df['inverter1.phase.0'] # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + + i_d = i_dq0[0].tolist() + i_q = i_dq0[1].tolist() + i_0 = i_dq0[2].tolist() + v_d = (v_dq0[0].tolist()) + v_q = (v_dq0[1].tolist()) + v_0 = (v_dq0[2].tolist()) + + plt.plot(v_d_PI, 'b') + plt.plot(v_q_PI, 'r') + plt.plot(v_0_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title('PI') + plt.show() + + plt.plot(R_load_PI, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('Test') + plt.show() + + plt.plot(v_d, 'b') + plt.plot(v_q, 'r') + plt.plot(v_0, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel("v_dq0") + plt.title(f'DDPG - {used_model}') + plt.show() + + plt.plot(R_load, 'g') + plt.xlabel("") + plt.grid() + plt.ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + plt.title('PI') + plt.show() + + # return (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + print(f'RL: {(return_sum / env_test.max_episode_steps + limit_exceeded_penalty)}') + print(f'PI: {(return_sum_PI / env.max_episode_steps + limit_exceeded_penalty_PI)}') + + ts = time.gmtime() + compare_result = {"Name": "comparison_PI_DDPG", + "model name": model_name, + "used past values as Features": used_past_vals, + "used_number_past_vales": used_number_past_vales, + "time": ts, + "ActionP0": action_P0, + "ActionP1": action_P1, + "ActionP2": action_P2, + "ActionI0": action_I0, + "ActionI1": action_I1, + "ActionI2": action_I2, + "integrator_sum0": integrator_sum0, + "integrator_sum1": integrator_sum1, + "integrator_sum2": integrator_sum2, + "DDPG_model_path": model_path, + "Return DDPG": (return_sum / env_test.max_episode_steps + limit_exceeded_penalty), + "env_hist_DDPG": env_test.env.history.df, + "max_episode_steps": str(max_episode_steps_list[max_eps_steps]), + "number of averages per run": num_average, + "info": "execution of RL agent on 10 s test case-loading values", + "optimization node": 'Thinkpad', + } + store_df = pd.DataFrame([compare_result]) + store_df.to_pickle(f'{folder_name}/' + used_model + f'_{max_episode_steps_list[max_eps_steps]}steps') + + ret_list.append((return_sum / env_test.max_episode_steps + limit_exceeded_penalty)) + ret_array[ave_run] = (return_sum / env_test.max_episode_steps + limit_exceeded_penalty) + + # ret_dict[str(ave_run)] = (return_sum / env.max_episode_steps + limit_exceeded_penalty) + + # zipped = zip(max_episode_steps_list[max_eps_steps], ret_list) + # temp_dict = dict(zipped) + temp_dict = {str(max_episode_steps_list[max_eps_steps]): ret_list} + result_list.append(temp_dict) + # ret_dict.append(zipped) + # df = df.append(ret_dict) + + mean_list.append(np.mean(ret_array)) + std_list.append(np.std(ret_array)) + +# df = df.append(temp_list, True) +print(mean_list) +print(std_list) +print(result_list) + +results = { + 'Mean': mean_list, + 'Std': std_list, + 'All results': result_list, + 'max_episode_steps_list': max_episode_steps_list +} + +df = pd.DataFrame(results) +# df.to_pickle("DDPG_study18_best_test_varianz.pkl") +asd = 1 diff --git a/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py b/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py new file mode 100644 index 00000000..8b529486 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/old/CompareModelsPlotting.py @@ -0,0 +1,477 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +make_pyplot = False +show_load = True +interval_plt = True +# interval_list_x = [[0, 0.015], [6.32, 6.42], [6.4, 6.42]]#, [6.4, 6.42]]#[0.993, 0.997], [0.993, 0.997]] +# interval_list_y = [[-20, 310], [-20, 330], [50, 330]]#, [50, 325]] +interval_list_x = [[6.405, 6.41], [0, 0.015], [0.173, 0.177]] # [0.993, 0.997], [0.993, 0.997]] +interval_list_y = [[110, 330], [-20, 310], [-5, 210]] # , [50, 325]] +folder_name = 'saves/Comparison_PI_DDPG_iLoad_Feature' +# folder_name = 'saves/Comparison_PI_DDPGs_oneLoadstepPerEpisode' +# name = 'DDPG_PI_local_PastVals_10000steps' +df = pd.read_pickle(folder_name + '/PI_100000steps') +# df_DDPG_past_vals = pd.read_pickle(folder_name + '/model_pastVals.zip_10000steps') +df_DDPG_past_vals = pd.read_pickle( + 'saves/Comparison_PI_DDPGs_oneLoadstepPerEpisode' + '/model_pastVals.zip_100000steps') +# df_DDPG = pd.read_pickle(folder_name + '/model_noPastVals_10000steps') +df_DDPG = pd.read_pickle(folder_name + '/model.zip_100000steps') +# df_PI = pd.read_pickle(folder_name+'/PI_10000steps') +# df = pd.read_pickle('DDPG_PI_local_10000steps') + +env_hist_DDPG = df_DDPG['env_hist_DDPG'] + +v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() +v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() +v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() +i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist() +i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist() +i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist() +R_load = (env_hist_DDPG[0]['r_load.resistor1.R'].tolist()) +phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) +i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) + +i_d_DDPG = i_dq0[0].tolist() +i_q_DDPG = i_dq0[1].tolist() +i_0_DDPG = i_dq0[2].tolist() +v_d_DDPG = (v_dq0[0].tolist()) +v_q_DDPG = (v_dq0[1].tolist()) +v_0_DDPG = (v_dq0[2].tolist()) + +env_hist_DDPG_pastVals = df_DDPG_past_vals['env_hist_DDPG'] + +v_a_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor1.v'].tolist() +v_b_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor2.v'].tolist() +v_c_pastVals = env_hist_DDPG_pastVals[0]['lc.capacitor3.v'].tolist() +i_a_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor1.i'].tolist() +i_b_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor2.i'].tolist() +i_c_pastVals = env_hist_DDPG_pastVals[0]['lc.inductor3.i'].tolist() +R_load_DDPG_pastVals = (env_hist_DDPG_pastVals[0]['r_load.resistor1.R'].tolist()) +phase_pastVals = env_hist_DDPG_pastVals[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_pastVals = abc_to_dq0(np.array([v_a_pastVals, v_b_pastVals, v_c_pastVals]), phase_pastVals) +i_dq0_pastVals = abc_to_dq0(np.array([i_a_pastVals, i_b_pastVals, i_c_pastVals]), phase_pastVals) + +i_d_DDPG_pastVals = i_dq0_pastVals[0].tolist() +i_q_DDPG_pastVals = i_dq0_pastVals[1].tolist() +i_0_DDPG_pastVals = i_dq0_pastVals[2].tolist() +v_d_DDPG_pastVals = (v_dq0_pastVals[0].tolist()) +v_q_DDPG_pastVals = (v_dq0_pastVals[1].tolist()) +v_0_DDPG_pastVals = (v_dq0_pastVals[2].tolist()) + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist() +i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist() +i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) +i_d_PI = i_dq0_PI[0].tolist() +i_q_PI = i_dq0_PI[1].tolist() +i_0_PI = i_dq0_PI[2].tolist() +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +m_d_PI = env_hist_PI[0]['master.md'].tolist() +m_q_PI = env_hist_PI[0]['master.mq'].tolist() +m_0_PI = env_hist_PI[0]['master.m0'].tolist() + +m_a_PI = env_hist_PI[0]['master.ma'].tolist() +m_b_PI = env_hist_PI[0]['master.mb'].tolist() +m_c_PI = env_hist_PI[0]['master.mc'].tolist() + +return_PR = df['Return PI'][0] +return_DDPG = df_DDPG['Return DDPG'][0] +return_DDPG_pastVals = df_DDPG_past_vals['Return DDPG'][0] + +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] + +ts = 1e-4 # if ts stored: take from db + +# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist() + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() + +# plot first interval once +plt.plot(t_test, R_load) +plt.grid() +# plt.xlim([0, 0.025]) +plt.xlabel("time") +plt.ylabel("R_load") +plt.title('') +plt.show() + +plt.plot(t_test, R_load_DDPG_pastVals) +plt.grid() +# plt.xlim([0, 0.025]) +plt.xlabel("time") +plt.ylabel("R_load_DDPG_pastVals") +plt.title('') +plt.show() + +plt.plot(t_test, R_load_PI) +plt.grid() +# plt.xlim([0, 0.025]) +plt.xlabel("time") +plt.ylabel("R_load_PI") +plt.title('') +plt.show() + +plt.plot(t_test, v_d_PI, 'b', label='v_d') +plt.plot(t_test, v_q_PI, 'r', label='v_q') +plt.plot(t_test, v_0_PI, 'g', label='v_0') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +plt.xlim([0, 0.025]) +plt.ylim([-50, 250]) +plt.xlabel("time") +plt.ylabel("v_dq0_PI") +plt.title(f' PI-return(MRE): {return_PR}') +plt.show() + +plt.plot(t_test, v_d_DDPG, 'b') +plt.plot(t_test, v_q_DDPG, 'r') +plt.plot(t_test, v_0_DDPG, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +plt.xlim([0, 0.025]) +plt.ylim([-50, 250]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG") +plt.title(f'DDPG-return(MRE): {return_DDPG}') +plt.show() + +plt.plot(t_test, v_d_DDPG_pastVals, 'b') +plt.plot(t_test, v_q_DDPG_pastVals, 'r') +plt.plot(t_test, v_0_DDPG_pastVals, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +plt.xlim([0, 0.025]) +plt.ylim([-50, 250]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG_pV") +plt.title(f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}') +plt.show() + +plt.plot(t_test, v_d_PI, 'b', label='v_d') +plt.plot(t_test, v_q_PI, 'r', label='v_q') +plt.plot(t_test, v_0_PI, 'g', label='v_0') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0, 0.025]) +plt.ylim([-50, 300]) +plt.xlabel("time") +plt.ylabel("v_dq0_PI") +plt.title(f' PI-return(MSE): {return_PR}') +plt.show() + +plt.plot(t_test, v_d_DDPG, 'b') +plt.plot(t_test, v_q_DDPG, 'r') +plt.plot(t_test, v_0_DDPG, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0, 0.025]) +plt.ylim([-50, 300]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG") +plt.title(f'DDPG-return(MSE): {return_DDPG}') +plt.show() + +plt.plot(t_test, v_d_DDPG_pastVals, 'b') +plt.plot(t_test, v_q_DDPG_pastVals, 'r') +plt.plot(t_test, v_0_DDPG_pastVals, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +plt.grid() +# plt.xlim([0, 0.025]) +plt.ylim([-50, 300]) +plt.xlabel("time") +plt.ylabel("v_dq0_DDPG_pV") +plt.title(f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals}') +plt.show() +############################# +############## Subplots +# fig = plt.figure(figsize=(10,12)) # a new figure window +fig, axs = plt.subplots(4, 3, figsize=(16, 12)) # , sharex=True) # a new figure window +fig.suptitle(f'DDPG-return using i_load-feature(MRE): {return_DDPG} \n ' + f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals} \n' + f'PI-return(MRE): {return_PR} \n ' + f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14) + +plt_count = 1 + +i = 0 + +# ax = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[0, 0].plot(t_test, R_load) +axs[0, 0].grid() +axs[0, 0].set_xlim(interval_list_x[i]) +# axs[0, i].set_xlabel("time") +axs[0, 0].set_ylabel("R_load") +# axs[0, i].set_title(f'#{plt_count}') +# plt.show() +plt_count += 1 + +# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[1, 0].plot(t_test, v_d_PI, 'b', label='v_d') +axs[1, 0].plot(t_test, v_q_PI, 'r', label='v_q') +axs[1, 0].plot(t_test, v_0_PI, 'g', label='v_0') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[1, 0].grid() +axs[1, 0].set_xlim(interval_list_x[i]) +axs[1, 0].set_ylim(interval_list_y[i]) + +axs[1, 0].set_ylabel("v_dq0_PI") + +# ax3 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[2, 0].plot(t_test, v_d_DDPG, 'b') +axs[2, 0].plot(t_test, v_q_DDPG, 'r') +axs[2, 0].plot(t_test, v_0_DDPG, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[2, 0].grid() +axs[2, 0].set_xlim(interval_list_x[i]) +axs[2, 0].set_ylim(interval_list_y[i]) +axs[2, 0].set_xlabel("time") +axs[2, 0].set_ylabel("v_dq0_DDPG") + +####### +# ax = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[0, 1].plot(t_test, m_d_PI, 'b') +axs[0, 1].plot(t_test, m_q_PI, 'r') +axs[0, 1].plot(t_test, m_0_PI, 'g') +axs[0, 1].grid() +axs[0, 1].set_xlim(interval_list_x[i]) +axs[0, 1].set_ylim([-0.2, 0.8]) +axs[0, 1].set_ylabel("m_dq0_PI") + +# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[1, 1].plot(t_test, i_d_PI, 'b', label='i_d') +axs[1, 1].plot(t_test, i_q_PI, 'r', label='i_q') +axs[1, 1].plot(t_test, i_0_PI, 'g', label='i_0') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[1, 1].grid() +axs[1, 1].set_xlim(interval_list_x[i]) +axs[1, 1].set_ylim([-10, 14]) +# axs[1, i].set_xlabel("time") + +axs[1, 1].set_ylabel("i_dq0_PI") + +axs[2, 1].plot(t_test, i_d_DDPG, 'b') +axs[2, 1].plot(t_test, i_q_DDPG, 'r') +axs[2, 1].plot(t_test, i_0_DDPG, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[2, 1].grid() +axs[2, 1].set_xlim(interval_list_x[i]) +axs[2, 1].set_ylim([-10, 14]) +axs[2, 1].set_xlabel("time") +axs[2, 1].set_ylabel("i_dq0_DDPG") +# axs[2, i].set_title(f'#{plt_count}') +# plt.show() + + +####### +actionP0 = df_DDPG['ActionP0'][0] +t_action = np.arange(0, round((len(actionP0)) * ts, 4), ts).tolist() + +axs[2, 2].plot(t_action, actionP0, 'b') +axs[2, 2].plot(t_action, df_DDPG['ActionP1'][0], 'r') +axs[2, 2].plot(t_action, df_DDPG['ActionP2'][0], 'g') +axs[2, 2].grid() +axs[2, 2].set_xlim(interval_list_x[i]) +axs[2, 2].set_ylabel("action_P_012_DDPG") + +""" +axs[1, 2].plot(t_action, df['ActionI0'][0], 'b') +axs[1, 2].plot(t_action, df['ActionI1'][0], 'r') +axs[1, 2].plot(t_action, df['ActionI2'][0], 'g') +axs[1, 2].grid() +axs[1, 2].set_xlim(interval_list_x[i]) +#axs[1, 2].set_ylim(interval_list_x[i]) +axs[1, 2].set_ylabel("action_I_012_DDPG") +""" +axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum0'][0], actionP0)], 'b') +axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum1'][0], df_DDPG['ActionP1'][0])], 'r') +axs[0, 2].plot(t_action, [sum(x) for x in zip(df_DDPG['integrator_sum2'][0], df_DDPG['ActionP2'][0])], 'g') +axs[0, 2].grid() +axs[0, 2].set_xlim(interval_list_x[i]) +axs[0, 2].set_ylim([-0.2, 0.8]) +axs[0, 2].set_ylabel("m_dq0_DDPG") + +axs[1, 2].plot(t_action, df_DDPG['integrator_sum0'][0], 'b') +axs[1, 2].plot(t_action, df_DDPG['integrator_sum1'][0], 'r') +axs[1, 2].plot(t_action, df_DDPG['integrator_sum2'][0], 'g') +axs[1, 2].grid() +axs[1, 2].set_xlim(interval_list_x[i]) +axs[1, 2].set_ylim([-0.2, 0.8]) +axs[1, 2].set_ylabel("Intergrator_sum_dq0_DDPG") +""" +# ax = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[0, 2].plot(t_test, m_a_PI, 'b') +axs[0, 2].plot(t_test, m_b_PI, 'r') +axs[0, 2].plot(t_test, m_c_PI, 'g') +axs[0, 2].grid() +axs[0, 2].set_xlim(interval_list_x[i]) +axs[0, 2].set_ylabel("m_abc_PI") + + + +# ax2 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes +axs[1, 1].plot(t_test, i_d_PI, 'b', label='i_d') +axs[1, 1].plot(t_test, i_q_PI, 'r', label='i_q') +axs[1, 1].plot(t_test, i_0_PI, 'g', label='i_0') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[1, 1].grid() +axs[1, 1].set_xlim(interval_list_x[i]) +axs[1, 1].set_ylim(interval_list_y[i]) +# axs[1, i].set_xlabel("time") + +axs[1, 1].set_ylabel("i_dq0_PI") + + +axs[2, 1].plot(t_test, i_d_DDPG, 'b') +axs[2, 1].plot(t_test, i_q_DDPG, 'r') +axs[2, 1].plot(t_test, i_0_DDPG, 'g') +# plt.plot(t_test, v_sp_abc[0, :]) +axs[2, 1].grid() +axs[2, 1].set_xlim(interval_list_x[i]) +axs[2, 1].set_ylim(interval_list_y[i]) +axs[2, 1].set_xlabel("time") +axs[2, 1].set_ylabel("i_dq0_DDPG") +# axs[2, i].set_title(f'#{plt_count}') +# plt.show() + + + +""" + +#fig.savefig(f'{folder_name}/overview.pdf') + +fig.subplots_adjust(wspace=0.4, hspace=0.2) +plt.show() + +if interval_plt: + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + fig, axs = plt.subplots(4, len(interval_list_y), figsize=(16, 12)) # , sharex=True) # a new figure window + fig.suptitle(f'DDPG-return(MRE): {return_DDPG} \n ' + f'DDPG-return(MRE) using past observations: {return_DDPG_pastVals} \n' + f' PI-return(MRE): {return_PR} \n ' + f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14) + + plt_count = 1 + + for i in range(len(interval_list_y)): + + # ax = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes + axs[0, i].plot(t_test, R_load) + axs[0, i].grid() + axs[0, i].set_xlim(interval_list_x[i]) + # axs[0, i].set_xlabel("time") + if i == 0: + axs[0, i].set_ylabel("R_load") + # axs[0, i].set_title(f'#{plt_count}') + # plt.show() + plt_count += 1 + + # ax2 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes + axs[1, i].plot(t_test, v_d_PI, 'b', label='v_d') + axs[1, i].plot(t_test, v_q_PI, 'r', label='v_q') + axs[1, i].plot(t_test, v_0_PI, 'g', label='v_0') + # plt.plot(t_test, v_sp_abc[0, :]) + axs[1, i].grid() + axs[1, i].set_xlim(interval_list_x[i]) + axs[1, i].set_ylim(interval_list_y[i]) + # axs[1, i].set_xlabel("time") + if i == 0: + axs[1, i].set_ylabel("v_dq0_PI") + # axs[1, i].set_title(f'#{plt_count}') + # plt.show() + plt_count += 1 + + # ax3 = fig.add_subplot(3, len(interval_list_y), plt_count) # a new axes + axs[2, i].plot(t_test, v_d_DDPG, 'b') + axs[2, i].plot(t_test, v_q_DDPG, 'r') + axs[2, i].plot(t_test, v_0_DDPG, 'g') + # plt.plot(t_test, v_sp_abc[0, :]) + axs[2, i].grid() + axs[2, i].set_xlim(interval_list_x[i]) + axs[2, i].set_ylim(interval_list_y[i]) + axs[2, i].set_xlabel("time") + if i == 0: + axs[2, i].set_ylabel("v_dq0_DDPG i_load_feature") + # axs[2, i].set_title(f'#{plt_count}') + # plt.show() + plt_count += 1 + + axs[3, i].plot(t_test, v_d_DDPG_pastVals, 'b') + axs[3, i].plot(t_test, v_q_DDPG_pastVals, 'r') + axs[3, i].plot(t_test, v_0_DDPG_pastVals, 'g') + axs[3, i].grid() + axs[3, i].set_xlim(interval_list_x[i]) + axs[3, i].set_ylim(interval_list_y[i]) + axs[3, i].set_xlabel("time") + if i == 0: + axs[3, i].set_ylabel("v_dq0_DDPG_pastVals") + # axs[2, i].set_title(f'#{plt_count}') + # plt.show() + plt_count += 1 + + fig.subplots_adjust(wspace=0.2, hspace=0.2) + plt.show() + + fig.savefig(f'{folder_name}/Ausschnitt2.pdf') + +if make_pyplot: + # pyplot PI + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_PI)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + # pyplot ddpg + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_DDPG)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + # pyplot Load + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=R_load)) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() diff --git a/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py b/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py new file mode 100644 index 00000000..db0a9336 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/old/CompareModelsPlottingLoop.py @@ -0,0 +1,179 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +make_pyplot = True +show_load = True +interval_plt = True + +# interval_list_x = [[0.992, 1], [0.992, 1]] +# interval_list_y = [[150, 230], [-10, 10]] +interval_list_x = [[0, 0.02], [0, 0.02]] +interval_list_y = [[-10, 310], [-10, 40]] +folder_name = 'saves/Comparison_study_22_best_pastVal_HPO_oldtestEnv' + +df = pd.read_pickle(folder_name + '/PI_20000steps') + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +return_PI = df['Return PI'][0] +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] + +model_names = [ + 'model_2_pastVals.zip'] # ['model_0_pastVals.zip','model_2_pastVals.zip', 'model_5_pastVals.zip', 'model_10_pastVals.zip', 'model_16_pastVals.zip', 'model_25_pastVals.zip', ] # , 'model_noPastVals.zip'] +pastVals = ['2'] # ['0', '2', '5', '10', '16', '25'] +reward_list_DDPG = [] + +ts = 1e-4 # if ts stored: take from db + +# t_test_R = np.arange(ts, (len(testcase_100k['v_d_PI'])) * ts, ts).tolist() + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() + +# fig, axs = plt.subplots(len(model_names)+2, len(interval_list_y), figsize=(16, 12)) # , sharex=True) # a new figure window +fig, axs = plt.subplots(len(model_names) + 2, len(interval_list_y), + figsize=(12, 10)) # , sharex=True) # a new figure window + +for i in range(len(interval_list_y)): + plt_count = 2 + ############## Subplots + # fig = plt.figure(figsize=(10,12)) # a new figure window + + for model_name, pV in zip(model_names, pastVals): + + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + '_20000steps') + + if i == 0: + reward_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 4)) + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + + axs[0, i].plot(t_test, R_load_PI) + axs[0, i].grid() + axs[0, i].set_xlim(interval_list_x[i]) + # axs[0, i].set_ylim([15, 75]) + # if i == 0: + axs[0, i].set_ylabel("$R_{\mathrm{load}}\,/\,\mathrm{\Omega}$") + # ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + + axs[1, i].plot(t_test, v_d_PI, 'b', label='v_d') + axs[1, i].plot(t_test, v_q_PI, 'r', label='v_q') + axs[1, i].plot(t_test, v_0_PI, 'g', label='v_0') + axs[1, i].grid() + axs[1, i].set_xlim(interval_list_x[i]) + axs[1, i].set_ylim(interval_list_y[i]) + if i == 0: + axs[1, i].set_ylabel("$v_{\mathrm{dq0, PI}}\,/\,\mathrm{V}$") + else: + axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$") + + axs[plt_count, i].plot(t_test, v_d_DDPG, 'b') + axs[plt_count, i].plot(t_test, v_q_DDPG, 'r') + axs[plt_count, i].plot(t_test, v_0_DDPG, 'g') + axs[plt_count, i].grid() + axs[plt_count, i].set_xlim(interval_list_x[i]) + axs[plt_count, i].set_ylim(interval_list_y[i]) + axs[plt_count, i].set_xlabel(r'$t\,/\,\mathrm{s}$') + if i == 0: + # axs[plt_count, i].set_ylabel(pV) + axs[plt_count, i].set_ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") + else: + axs[plt_count, i].set_ylabel("$v_{\mathrm{q0, DDPG}}\,/\,\mathrm{V}$") + plt_count += 1 + + +fig.suptitle(f'Model using pastVals:' +str(pastVals)+' \n ' + f'Model-return(MRE)'+ str(reward_list_DDPG) +' \n' + f' PI-return(MRE): {return_PI} \n ' + f'PI: Kp_i = {kp_c}, Ki_i = {ki_c}, Kp_v = {kp_v}, Ki_v = {ki_v}', fontsize=14) + +fig.subplots_adjust(wspace=0.2, hspace=0.2) +plt.show() + +#fig.savefig(f'{folder_name}/Ausschnitt_2pV_blackstart.pdf') + +if make_pyplot: + # pyplot Load + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=R_load_PI)) # , title='R_load') + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + # pyplot PI + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_PI)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() + + for model_name in model_names: + df_DDPG = pd.read_pickle(folder_name + '/' + model_name + '_20000steps') + + env_hist_DDPG = df_DDPG['env_hist_DDPG'] + + v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() + v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() + v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() + phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase + v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) + v_d_DDPG = (v_dq0[0].tolist()) + v_q_DDPG = (v_dq0[1].tolist()) + v_0_DDPG = (v_dq0[2].tolist()) + # pyplot ddpg + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_d_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_DDPG)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_DDPG)) + # plot.add_trace( + # px.Scatter(x=t_test, y=v_sp_abc[1, :])) + plot.add_trace( + px.Scatter(x=t_test, y=v_d_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_q_PI)) + plot.add_trace( + px.Scatter(x=t_test, y=v_0_PI)) + + plot.update_layout(xaxis=dict(rangeselector=dict(buttons=list([ + dict(count=1, step="day", stepmode="backward"), ])), + rangeslider=dict(visible=True), )) + plot.show() diff --git a/experiments/hp_tune/visualize_tests/paper_lpt_single.py b/experiments/hp_tune/visualize_tests/paper_lpt_single.py new file mode 100644 index 00000000..8fd1cb9e --- /dev/null +++ b/experiments/hp_tune/visualize_tests/paper_lpt_single.py @@ -0,0 +1,487 @@ +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +from openmodelica_microgrid_gym.util import abc_to_dq0 + +save_results = True + +# Fuer den 10s Fall +interval_list_x = [7.1465, 7.1505] +interval_list_y = [80, 345] + +folder_name = 'saves/paper_new2' # _deterministic' + +number_of_steps = '_100000steps' + +df = pd.read_pickle(folder_name + '/PI' + number_of_steps) + +env_hist_PI = df['env_hist_PI'] +v_a_PI = env_hist_PI[0]['lc.capacitor1.v'].tolist() +v_b_PI = env_hist_PI[0]['lc.capacitor2.v'].tolist() +v_c_PI = env_hist_PI[0]['lc.capacitor3.v'].tolist() +R_load_PI = (env_hist_PI[0]['r_load.resistor1.R'].tolist()) +phase_PI = env_hist_PI[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_PI = abc_to_dq0(np.array([v_a_PI, v_b_PI, v_c_PI]), phase_PI) +v_d_PI = (v_dq0_PI[0].tolist()) +v_q_PI = (v_dq0_PI[1].tolist()) +v_0_PI = (v_dq0_PI[2].tolist()) + +i_a_PI = env_hist_PI[0]['lc.inductor1.i'].tolist() +i_b_PI = env_hist_PI[0]['lc.inductor2.i'].tolist() +i_c_PI = env_hist_PI[0]['lc.inductor3.i'].tolist() +i_dq0_PI = abc_to_dq0(np.array([i_a_PI, i_b_PI, i_c_PI]), phase_PI) +i_d_PI = (i_dq0_PI[0].tolist()) +i_q_PI = (i_dq0_PI[1].tolist()) +i_0_PI = (i_dq0_PI[2].tolist()) + +reward_PI = df['Reward PI'][0] +return_PI = df['Return PI'][0] +kp_c = df['PI_Kp_c'][0] +ki_c = df['PI_Ki_c'][0] +kp_v = df['PI_Kp_v'][0] +ki_v = df['PI_Ki_v'][0] + +model_names = ['model_OMG_DDPG_Actor.zip', + 'model_OMG_DDPG_Integrator_no_pastVals.zip', + 'model_OMG_DDPG_Integrator_no_pastVals_corr.zip', + 'model_OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr.zip'] +ylabels = ['DDPG', 'DDPG-I'] + +return_list_DDPG = [] +reward_list_DDPG = [] + +ts = 1e-4 # if ts stored: take from db + +v_d_ref = [169.7] * len(v_0_PI) + +t_test = np.arange(0, round((len(v_0_PI)) * ts, 4), ts).tolist() +t_reward = np.arange(0, round((len(reward_PI)) * ts, 4), ts).tolist() + +# fig, axs = plt.subplots(len(model_names) + 4, len(interval_list_y), +fig = plt.figure() + +############## Subplots +# fig = plt.figure(figsize=(10,12)) # a new figure window + +df_DDPG = pd.read_pickle(folder_name + '/' + model_names[0] + number_of_steps) + +return_list_DDPG.append(round(df_DDPG['Return DDPG'][0], 7)) +# reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + +env_hist_DDPG = df_DDPG['env_hist_DDPG'] + +v_a = env_hist_DDPG[0]['lc.capacitor1.v'].tolist() +v_b = env_hist_DDPG[0]['lc.capacitor2.v'].tolist() +v_c = env_hist_DDPG[0]['lc.capacitor3.v'].tolist() +i_a = env_hist_DDPG[0]['lc.inductor1.i'].tolist() +i_b = env_hist_DDPG[0]['lc.inductor2.i'].tolist() +i_c = env_hist_DDPG[0]['lc.inductor3.i'].tolist() +phase = env_hist_DDPG[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), phase) +i_dq0 = abc_to_dq0(np.array([i_a, i_b, i_c]), phase) +v_d_DDPG = (v_dq0[0].tolist()) +v_q_DDPG = (v_dq0[1].tolist()) +v_0_DDPG = (v_dq0[2].tolist()) +i_d_DDPG = (i_dq0[0].tolist()) +i_q_DDPG = (i_dq0[1].tolist()) +i_0_DDPG = (i_dq0[2].tolist()) + +DDPG_reward = df_DDPG['Reward DDPG'][0] + +df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[1] + number_of_steps) + +return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7)) +# reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + +env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG'] + +v_a_I = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist() +v_b_I = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist() +v_c_I = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist() +i_a_I = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist() +i_b_I = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist() +i_c_I = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist() +phase_I = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_I = abc_to_dq0(np.array([v_a_I, v_b_I, v_c_I]), phase_I) +i_dq0_I = abc_to_dq0(np.array([i_a_I, i_b_I, i_c_I]), phase_I) +v_d_DDPG_I = (v_dq0_I[0].tolist()) +v_q_DDPG_I = (v_dq0_I[1].tolist()) +v_0_DDPG_I = (v_dq0_I[2].tolist()) +i_d_DDPG_I = (i_dq0_I[0].tolist()) +i_q_DDPG_I = (i_dq0_I[1].tolist()) +i_0_DDPG_I = (i_dq0_I[2].tolist()) + +DDPG_reward_I = df_DDPG_I['Reward DDPG'][0] + +df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[2] + number_of_steps) + +return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7)) +# reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + +env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG'] + +v_a_I_noPV = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist() +v_b_I_noPV = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist() +v_c_I_noPV = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist() +i_a_I_noPV = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist() +i_b_I_noPV = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist() +i_c_I_noPV = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist() +phase_I_noPV = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_I_noPV = abc_to_dq0(np.array([v_a_I_noPV, v_b_I_noPV, v_c_I_noPV]), phase_I_noPV) +i_dq0_I_noPV = abc_to_dq0(np.array([i_a_I_noPV, i_b_I_noPV, i_c_I_noPV]), phase_I_noPV) +v_d_DDPG_I_noPV = (v_dq0_I_noPV[0].tolist()) +v_q_DDPG_I_noPV = (v_dq0_I_noPV[1].tolist()) +v_0_DDPG_I_noPV = (v_dq0_I_noPV[2].tolist()) +i_d_DDPG_I_noPV = (i_dq0_I_noPV[0].tolist()) +i_q_DDPG_I_noPV = (i_dq0_I_noPV[1].tolist()) +i_0_DDPG_I_noPV = (i_dq0_I_noPV[2].tolist()) + +DDPG_reward_I_noPV = df_DDPG_I['Reward DDPG'][0] + +df_DDPG_I = pd.read_pickle(folder_name + '/' + model_names[3] + number_of_steps) + +return_list_DDPG.append(round(df_DDPG_I['Return DDPG'][0], 7)) +# reward_list_DDPG.append(df_DDPG['Reward DDPG'][0]) + +env_hist_DDPG_I = df_DDPG_I['env_hist_DDPG'] + +v_a_I_load = env_hist_DDPG_I[0]['lc.capacitor1.v'].tolist() +v_b_I_load = env_hist_DDPG_I[0]['lc.capacitor2.v'].tolist() +v_c_I_load = env_hist_DDPG_I[0]['lc.capacitor3.v'].tolist() +i_a_I_load = env_hist_DDPG_I[0]['lc.inductor1.i'].tolist() +i_b_I_load = env_hist_DDPG_I[0]['lc.inductor2.i'].tolist() +i_c_I_load = env_hist_DDPG_I[0]['lc.inductor3.i'].tolist() +phase_I_load = env_hist_DDPG_I[0]['inverter1.phase.0'].tolist() # env_test.env.net.components[0].phase +v_dq0_I_load = abc_to_dq0(np.array([v_a_I_load, v_b_I_load, v_c_I_load]), phase_I_load) +i_dq0_I_load = abc_to_dq0(np.array([i_a_I_load, i_b_I_load, i_c_I_load]), phase_I_load) +v_d_DDPG_I_load = (v_dq0_I_load[0].tolist()) +v_q_DDPG_I_load = (v_dq0_I_load[1].tolist()) +v_0_DDPG_I_load = (v_dq0_I_load[2].tolist()) +i_d_DDPG_I_load = (i_dq0_I_load[0].tolist()) +i_q_DDPG_I_load = (i_dq0_I_load[1].tolist()) +i_0_DDPG_I_load = (i_dq0_I_load[2].tolist()) + +DDPG_reward_I_load = df_DDPG_I['Reward DDPG'][0] + +""" +if save_results: + # Plot setting + + +fig, axs = plt.subplots(3, 1) +axs[0].plot(t_test, R_load_PI, 'g') +axs[0].grid() +axs[0].tick_params(axis='x', colors='w') +axs[0].set_xlim([0, 10]) +axs[0].set_ylabel('$R_\mathrm{load}\,/\,\mathrm{\Omega}$') +# axs[0].setxlabel(r'$t\,/\,\mathrm{s}$') + +axs[1].plot(t_test, v_d_PI, 'b', label='PI') +axs[1].plot(t_test, v_q_PI, 'r') +axs[1].plot(t_test, v_0_PI, 'g') +axs[1].grid() +axs[1].legend() +axs[1].tick_params(axis='x', colors='w') +axs[1].set_xlim([0, 10]) +axs[1].set_ylabel('$v_{\mathrm{dq0}}\,/\,\mathrm{V}$') +# axs[1].setxlabel(r'$t\,/\,\mathrm{s}$') + +axs[2].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{DDPG}_\mathrm{I,pv}$') +axs[2].plot(t_test, v_q_DDPG_I, 'r') +axs[2].plot(t_test, v_0_DDPG_I, 'g') +axs[2].grid() +axs[2].legend() +axs[2].set_xlim([0, 10]) +axs[2].set_ylabel('$v_{\mathrm{dq0}}\,/\,\mathrm{V}$') +axs[2].set_xlabel(r'$t\,/\,\mathrm{s}$') + +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/OMG_testcase.png') + fig.savefig(f'{folder_name}/OMG_testcase.pdf') + fig.savefig(f'{folder_name}/OMG_testcase.pgf') +""" +if save_results: + # Plot setting + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 13, # fontsize for x and y labels (was 10) + 'axes.titlesize': 13, + 'font.size': 13, # was 10 + 'legend.fontsize': 13, # was 10 + 'xtick.labelsize': 13, + 'ytick.labelsize': 13, + 'text.usetex': True, + 'figure.figsize': [8.5, 2.4], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 12, # fontsize for x and y labels (was 10) + 'axes.titlesize': 12, + 'font.size': 12, # was 10 + 'legend.fontsize': 12, # was 10 + 'xtick.labelsize': 12, + 'ytick.labelsize': 12, + 'text.usetex': True, + 'figure.figsize': [5.3, 3.5], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + +fig = plt.figure() # figsize =(6, 5)) +plt.plot(t_test, R_load_PI, 'g') +plt.grid() +plt.xlim([0, 10]) +plt.ylabel('$R_\mathrm{load}\,/\,\mathrm{\Omega}$') +plt.xlabel(r'$t\,/\,\mathrm{s}$') +plt.tick_params(direction='in') +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/OMG_R_loadsmall.png') + fig.savefig(f'{folder_name}/OMG_R_loadsmall.pdf') + fig.savefig(f'{folder_name}/OMG_R_loadsmall.pgf') + + # Plot setting + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 10, # fontsize for x and y labels (was 10) + 'axes.titlesize': 10, + 'font.size': 10, # was 10 + 'legend.fontsize': 10, # was 10 + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'text.usetex': True, + 'figure.figsize': [4.5, 6], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + +fig, axs = plt.subplots(5, 1) +axs[0].plot(t_test, v_d_PI, 'b', label='PI') +axs[0].plot(t_test, v_d_ref, '--', color='gray') +axs[0].grid() +axs[0].legend() +axs[0].set_xlim(interval_list_x) +axs[0].tick_params(axis='x', colors='w') +axs[0].set_ylim(interval_list_y) +# axs[0].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") + +axs[1].plot(t_test, v_d_DDPG, 'b', label='$\mathrm{DDPG}$') +axs[1].plot(t_test, v_d_ref, '--', color='gray') +axs[1].grid() +axs[1].legend() +axs[1].set_xlim(interval_list_x) +axs[1].tick_params(axis='x', colors='w') +axs[1].set_ylim(interval_list_y) +# axs[1].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") + +axs[4].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{DDPG}_\mathrm{I,pv}$') +axs[4].plot(t_test, v_d_ref, '--', color='gray') +axs[4].grid() +axs[4].legend() +axs[4].set_xlim(interval_list_x) +axs[4].set_ylim(interval_list_y) +# axs[4].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") +axs[4].set_xlabel(r'$t\,/\,\mathrm{s}$') + +axs[3].plot(t_test, v_d_DDPG_I_load, 'b', label='$\mathrm{DDPG}_\mathrm{I,i_{load}}$') +axs[3].plot(t_test, v_d_ref, '--', color='gray') +axs[3].grid() +axs[3].legend() +axs[3].set_xlim(interval_list_x) +axs[3].tick_params(axis='x', colors='w') +# axs[3].set_xticks(color='w') +axs[3].set_ylim(interval_list_y) +# axs[3].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") + +axs[2].plot(t_test, v_d_DDPG_I_noPV, 'b', label='$\mathrm{DDPG}_\mathrm{I}$') +axs[2].plot(t_test, v_d_ref, '--', color='gray') +axs[2].grid() +axs[2].legend() +axs[2].set_xlim(interval_list_x) +axs[2].tick_params(axis='x', colors='w') +axs[2].set_ylim(interval_list_y) +axs[2].set_ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") + +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/OMG_v_d_compare.png') + fig.savefig(f'{folder_name}/OMG_v_d_compare.pdf') + fig.savefig(f'{folder_name}/OMG_v_d_compare.pgf') + + # Plot setting + params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 10, # fontsize for x and y labels (was 10) + 'axes.titlesize': 10, + 'font.size': 10, # was 10 + 'legend.fontsize': 10, # was 10 + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'text.usetex': True, + 'figure.figsize': [4.5, 6], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + matplotlib.rcParams.update(params) + +fig, axs = plt.subplots(2, 1) +axs[0].plot(t_test, v_d_DDPG_I, 'b', label='$\mathrm{SEC-DDPG}$') +axs[0].plot(t_test, v_q_DDPG_I, 'r') +axs[0].plot(t_test, v_0_DDPG_I, 'g') +axs[0].plot(t_test, v_d_PI, '--b', label='PI') +axs[0].plot(t_test, v_q_PI, '--r') +axs[0].plot(t_test, v_0_PI, '--g') +axs[0].plot(t_test, v_d_ref, '--', color='gray') +axs[0].grid() +axs[0].legend() +axs[0].set_xlim(interval_list_x) +# axs[0].set_ylim(interval_list_y) +# axs[0].set_xlabel(r'$t\,/\,\mathrm{s}$') +axs[0].tick_params(axis='x', colors='w') +axs[0].set_ylabel("$v_{\mathrm{dq0}}\,/\,\mathrm{V}$") + +axs[1].plot(t_test, i_d_DDPG_I, 'b', label='$i_\mathrm{d}$') +axs[1].plot(t_test, i_q_DDPG_I, 'r', label='$i_\mathrm{q}$') +axs[1].plot(t_test, i_0_DDPG_I, 'g', label='$i_\mathrm{0}$') +axs[1].plot(t_test, i_d_PI, '--b') +axs[1].plot(t_test, i_q_PI, '--r') +axs[1].plot(t_test, i_0_PI, '--g') +axs[1].grid() +axs[1].set_xlim(interval_list_x) +# axs[1].set_ylim(interval_list_y) +axs[1].set_xlabel(r'$t\,/\,\mathrm{s}$') +axs[1].set_ylabel("$i_{\mathrm{dq0}}\,/\,\mathrm{A}$") +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.png') + fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.pdf') + fig.savefig(f'{folder_name}/OMG_DDPGpv_PI_compare.pgf') + +plt.plot(t_reward, reward_PI, 'b', label=f' PI: ' + f'{round(sum(reward_PI[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}') +plt.plot(t_reward, DDPG_reward, 'r', label=f' DDPG: ' + f'{round(sum(DDPG_reward[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}') +plt.plot(t_reward, DDPG_reward_I, 'g', label=f'DDPG+I,pv: ' + f'{round(sum(DDPG_reward_I[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}') +plt.plot(t_reward, DDPG_reward_I_noPV, 'g', label=f'DDPG+I: ' + f'{round(sum(DDPG_reward_I_noPV[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}') +plt.plot(t_reward, DDPG_reward_I_load, 'g', label=f'DDPG+I,iLoad: ' + f'{round(sum(DDPG_reward_I_load[int(interval_list_x[0] / ts):int(interval_list_x[1] / ts)]) / ((interval_list_x[1] - interval_list_x[0]) / ts), 4)}') + +plt.grid() +plt.xlim(interval_list_x) +# axs[1, i].set_ylim(interval_list_y[i]) +plt.legend() +plt.xlabel(r'$t\,/\,\mathrm{s}$') + +plt.ylabel("Reward") +plt.show() + +""" +plt.plot(t_test, v_d_DDPG_I_noPV, 'b', label='$v_\mathrm{d}') +plt.plot(t_test, v_q_DDPG_I_noPV, 'b', label='$v_\mathrm{q}') +plt.plot(t_test, v_0_DDPG_I_noPV, 'b', label='$v_\mathrm{0}') +plt.grid() +plt.xlim(interval_list_x) +plt.ylim(interval_list_y) +plt.xlabel(r'$t\,/\,\mathrm{s}$') +plt.ylabel("$v_{\mathrm{dq0, DDPG}}\,/\,\mathrm{V}$") +plt.show() + + +plt.plot(t_test, i_d_DDPG_I_noPV, 'b', label='$v_\mathrm{d}') +plt.plot(t_test, i_q_DDPG_I_noPV, 'b', label='$v_\mathrm{q}') +plt.plot(t_test, i_0_DDPG_I_noPV, 'b', label='$v_\mathrm{0}') +plt.grid() +plt.xlim(interval_list_x) +plt.ylim(interval_list_y) +plt.xlabel(r'$t\,/\,\mathrm{s}$') +plt.ylabel("$i_{\mathrm{dq0, DDPG}}\,/\,\mathrm{A}$") +plt.show() + + + +fig = plt.figure() +plt.plot(t_test, i_d_PI, 'b', label='PI') +plt.plot(t_test, i_d_DDPG, 'r', label='DDPG') +plt.plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I,pv') +plt.plot(t_test, i_d_DDPG_I_load, 'm', label='DDPG+I,iLoad') +plt.plot(t_test, i_d_DDPG_I_noPV, 'c', label='DDPG+I') +# axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q') +# axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0') +plt.grid() +# axs[1, i].legend() +plt.xlim(interval_list_x) +# axs[3, i].set_ylim(interval_list_y[i]) +plt.ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$") + +fig = plt.figure() +plt.plot(t_test, v_d_ref, '--', color='gray') +plt.plot(t_test, v_d_PI, 'b', label='PI') +plt.plot(t_test, v_q_PI, 'b', label='PI') +plt.plot(t_test, v_0_PI, 'b', label='PI') +plt.plot(t_test, v_d_DDPG, 'r', label='DDPG') +plt.plot(t_test, v_q_DDPG, 'r', label='DDPG') +plt.plot(t_test, v_0_DDPG, 'r', label='DDPG') +plt.plot(t_test, v_d_DDPG_I, 'g', label='DDPG+I') +plt.plot(t_test, v_q_DDPG_I, 'g', label='DDPG+I') +plt.plot(t_test, v_0_DDPG_I, 'g', label='DDPG+I') +# axs[2, i].plot(t_test, v_q_PI, 'r', label='v_q') +# axs[2, i].plot(t_test, v_0_PI, 'g', label='v_0') +plt.grid() +plt.legend() +plt.xlim(interval_list_x) +plt.ylim(interval_list_y) +plt.ylabel("$v_{\mathrm{d}}\,/\,\mathrm{V}$") +# else: +# axs[1, i].set_ylabel("$v_{\mathrm{q0, PI}}\,/\,\mathrm{V}$") + +fig = plt.figure() +plt.plot(t_test, i_d_PI, 'b', label='PI') +plt.plot(t_test, i_q_PI, 'b') +plt.plot(t_test, i_0_PI, 'b') +plt.plot(t_test, i_d_DDPG, 'r', label='DDPG') +plt.plot(t_test, i_q_DDPG, 'r') +plt.plot(t_test, i_0_DDPG, 'r') +plt.plot(t_test, i_d_DDPG_I, 'g', label='DDPG+I') +plt.plot(t_test, i_q_DDPG_I, 'g') +plt.plot(t_test, i_0_DDPG_I, 'g') +# axs[1, i].plot(t_test, i_q_PI, 'r', label='v_q') +# axs[1, i].plot(t_test, i_0_PI, 'g', label='v_0') +plt.grid() +# axs[1, i].legend() +plt.xlim(interval_list_x) +# axs[3, i].set_ylim(interval_list_y[i]) +plt.ylabel("$i_{\mathrm{d}}\,/\,\mathrm{A}$") + +fig.subplots_adjust(wspace=0.2, hspace=0.2) +plt.show() + +#fig.savefig(f'{folder_name}/Ausschnitt_2pV_q0.pdf') + +""" diff --git a/experiments/hp_tune/visualize_tests/plt_errorbar.py b/experiments/hp_tune/visualize_tests/plt_errorbar.py new file mode 100644 index 00000000..b317cbe4 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/plt_errorbar.py @@ -0,0 +1,158 @@ +import matplotlib +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd + +# Plot setting +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 8, # fontsize for x and y labels (was 10) + 'axes.titlesize': 8, + 'font.size': 10, # was 10 + 'legend.fontsize': 10, # was 10 + 'xtick.labelsize': 10, + 'ytick.labelsize': 10, + 'text.usetex': True, + 'figure.figsize': [5.8, 3.8], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +# I_term = pd.read_pickle('GEM_I_term_3mean_over_50_agents.pkl') +# no_I_term = pd.read_pickle('GEM_no_I_term_3mean_over_50_agents.pkl') + +asd = 1 + +save_results = False +folder_name = 'errorbar_plots/' + +# da json zu groß, kopie aus dashboard.... + +OMG_DDPG_Actor = [-0.226037, -0.128363, -0.139432, -0.121386, -0.137367, + -0.216827, -0.116579, -0.0831927, -0.112777, -0.127669, + -0.185162, -0.128747, -0.113952, -0.122981, -0.114832, + -0.120671, -0.226531, -0.118882, -0.134699, -0.118027, + -0.149192, -0.121207, -0.253065, -0.219944, -0.1244, + -0.0993589, -0.12237, -0.143523, -0.244333, -0.124357, + -0.152193, -0.118973, -0.0955573, -0.114242, -0.111534, + -0.127907, -0.102504, -0.225466, -0.219972, -0.120333, + -0.134156, -0.116749, -0.122513, -0.167896, -0.062778, + -0.239305, -0.110423, -0.103946, -0.160686, -0.127362] + +OMG_DDPG_Actor_500 = pd.read_pickle('OMG_DDPG_Actorreturn_500_agents.pkl')['return'].tolist()[ + :-1] # einen zu viel geladen! + +# typo! das sind die mit 5 pastVals! +OMG_DDPG_Integrator_no_pastVals = [-0.0566483, -0.177257, -0.22384, -0.0566379, -0.0613575, + -0.866927, -0.0591551, -0.0409672, -0.0410715, -0.0405743, + -0.0481607, -1.00176, -0.0398449, -0.0584291, -0.0428567, + -0.754902, -0.0499666, -0.346553, -0.0448563, -0.0424514, + -0.19927, -0.0424081, -0.0613121, -0.0501086, -0.287048, + -0.214733, -0.0421697, -0.0474572, -0.0464294, -0.0467267, + -0.0483718, -0.0584424, -0.354886, -0.0451979, -0.04627, + -0.047793, -0.0471481, -0.0846913, -0.0446951, -0.0500306, + -0.043155, -0.0718899, -0.039992, -0.0453119, -0.0673279, + -0.0408377, -0.047179, -0.0438636, -0.0430013, -0.0595805] + +OMG_DDPG_Integrator_no_pastVals_500 = \ + pd.read_pickle('OMG_DDPG_Integrator_no_pastValsreturn_500_agents.pkl')['return'].tolist() + +OMG_DDPG_Integrator_no_pastVals_corr = [-0.048334, -0.251245, -0.0688722, -0.0565136, -0.202199, + -0.042535, -0.0408258, -0.0480982, -0.0423354, -0.0461098, + -0.543109, -0.0444726, -0.134507, -0.101061, -0.0410615, + -0.0423758, -0.0732737, -0.0531188, -0.0451057, -0.0557529, + -0.0516102, -0.272256, -0.0494411, -0.0453498, -0.049296, + -0.0524428, -0.0417263, -0.0453462, -0.0466777, -0.0772813, + -0.217484, -0.0407658, -0.0403833, -0.0795559, -0.0393357, + -0.0526313, -0.0443727, -0.0455981, -0.049839, -0.046536, + -0.0453199, -0.0421393, -0.0469275, -0.0441136, -0.0426031, + -0.162181, -0.0523912, -0.0403753, -0.0412137, -0.770299] + +OMG_DDPG_Integrator_no_pastVals_corr_500 = \ + pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_corrreturn_500_agents.pkl')['return'].tolist()[ + :-1] # einen zu viel geladen! + +OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = [-0.0387997, -0.0409335, -0.0685522, -0.164238, -0.0409236, + -0.0410673, -0.039469, -0.0399732, -0.13207, -0.0415697, + -0.122869, -0.0611268, -0.306491, -0.0992046, -0.044661, + -0.0458972, -0.043849, -0.0500543, -0.0531591, -0.0679286, + -0.20993, -0.0497402, -0.0405819, -0.0746702, -0.203728, + -0.0408563, -0.0708935, -0.0409779, -0.0438561, -0.0432274, + -0.0395637, -0.0404426, -0.0377221, -0.0404959, -0.0465647, + -0.0612425, -0.0409127, -0.0416884, -0.198034, -0.0523231, + -0.2017, -0.0414555, -0.0422072, -0.0398287, -0.0400683, + -0.0461625, -0.264055, -0.0453719, -0.0396692, -0.0411879] + +OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500 = \ + pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_i_load_feature_corrreturn_500_agents.pkl')['return'].tolist() +# m = np.array(I_term['return_Mean']) +# s = np.array(I_term['return_Std']) +agents = np.arange(0, 550) +agents = np.arange(0, 500) + +OMG_DDPG_Actor = OMG_DDPG_Actor + OMG_DDPG_Actor_500 +OMG_DDPG_Integrator_no_pastVals = OMG_DDPG_Integrator_no_pastVals + OMG_DDPG_Integrator_no_pastVals_500 +OMG_DDPG_Integrator_no_pastVals_corr = OMG_DDPG_Integrator_no_pastVals_corr + OMG_DDPG_Integrator_no_pastVals_corr_500 +OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr + \ + OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500 + +OMG_DDPG_Actor = OMG_DDPG_Actor_500 +OMG_DDPG_Integrator_no_pastVals = OMG_DDPG_Integrator_no_pastVals_500 +OMG_DDPG_Integrator_no_pastVals_corr = OMG_DDPG_Integrator_no_pastVals_corr_500 +OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr = OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr_500 + +plt.plot(agents, OMG_DDPG_Actor) +plt.plot(agents, OMG_DDPG_Integrator_no_pastVals, 'r') +plt.plot(agents, OMG_DDPG_Integrator_no_pastVals_corr, 'g') +plt.plot(agents, OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, 'm') +# plt.fill_between(agents, m - s, m + s, facecolor='r') +plt.ylabel('Average return ') +plt.xlabel('Agents') +plt.ylim([-0.6, 0.2]) +plt.grid() +plt.title('I_term') +plt.show() + +if save_results: + matplotlib.rcParams.update(params) + +fig, ax = plt.subplots() # figsize =(6, 5)) +# plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr, +# OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals)) +ax.boxplot((OMG_DDPG_Integrator_no_pastVals, OMG_DDPG_Actor)) +# ax.plot( 3, 0.0332, marker='o' ) +plt.grid() +plt.ylim([-0.4, 0]) +plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$']) +plt.ylabel('$\overline{\sum{r_k}}$') +plt.tick_params(direction='in') + +if save_results: + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pgf') + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.png') + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pdf') + +fig = plt.figure() # figsize =(6, 5)) +plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr, + OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals)) +plt.grid() +# plt.ylim([-0.75, 0]) +plt.xticks([1, 2, 3, 4], ['$\mathrm{DDPG}$', '$\mathrm{DDPG}_\mathrm{I}$', + '$\mathrm{DDPG}_\mathrm{I,i_{load}}$', '$\mathrm{DDPG}_\mathrm{I,pv}$']) +plt.ylabel('$\overline{\sum{r_k}}$') +plt.show() +if save_results: + fig.savefig(f'{folder_name}/OMG_Errorbar.png') + fig.savefig(f'{folder_name}/OMG_Errorbar.pdf') + fig.savefig(f'{folder_name}/OMG_Errorbar.pgf') + +plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr, + OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals)) +plt.grid() +plt.ylim([-0.06, 0]) +plt.xticks([1, 2, 3, 4], ['$\mathrm{DDPG}$', '$\mathrm{DDPG}_\mathrm{I}$', + '$\mathrm{DDPG}_\mathrm{I,i_{load}}$', '$\mathrm{DDPG}_\mathrm{I,pv}$']) +plt.show() diff --git a/experiments/hp_tune/visualize_tests/plt_learningCurve.py b/experiments/hp_tune/visualize_tests/plt_learningCurve.py new file mode 100644 index 00000000..85d400f7 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/plt_learningCurve.py @@ -0,0 +1,180 @@ +import matplotlib +import matplotlib.pyplot as plt +import pandas as pd +import numpy as np + +save_results = True +folder_name = 'errorbar_plots/' + +# Plot setting +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 12, # fontsize for x and y labels (was 10) + 'axes.titlesize': 12, + 'font.size': 12, # was 10 + 'legend.fontsize': 12, # was 10 + 'xtick.labelsize': 12, + 'ytick.labelsize': 12, + 'text.usetex': True, + 'figure.figsize': [5.5, 3.7], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +""" +I_term = pd.read_pickle('GEM_I_term_4_1250_agents_data_with_rewards.pkl') +no_I_term = pd.read_pickle('GEM_no_I_term_4_1250_agents_data_with_rewards.pkl') + +asd = 1 + +m = np.array(I_term['return_Mean']) +s = np.array(I_term['return_Std']) +agents = np.arange(0,1250) + +# take the best 50 and the worst 50 and and 450 random + +idxs = np.random.randint(low=50, high=1200, size=450) +m_sort = np.sort(m) +m550 = np.concatenate([m_sort[0:50],m_sort[1200:1250], np.take(m_sort, idxs)]) +""" + +# typo! das sind die mit 5 pastVals! +OMG_DDPG_Integrator_no_pastVals = [-0.0566483, -0.177257, -0.22384, -0.0566379, -0.0613575, + -0.866927, -0.0591551, -0.0409672, -0.0410715, -0.0405743, + -0.0481607, -1.00176, -0.0398449, -0.0584291, -0.0428567, + -0.754902, -0.0499666, -0.346553, -0.0448563, -0.0424514, + -0.19927, -0.0424081, -0.0613121, -0.0501086, -0.287048, + -0.214733, -0.0421697, -0.0474572, -0.0464294, -0.0467267, + -0.0483718, -0.0584424, -0.354886, -0.0451979, -0.04627, + -0.047793, -0.0471481, -0.0846913, -0.0446951, -0.0500306, + -0.043155, -0.0718899, -0.039992, -0.0453119, -0.0673279, + -0.0408377, -0.047179, -0.0438636, -0.0430013, -0.0595805] + +OMG_DDPG_Integrator_no_pastVals_500 = \ + pd.read_pickle('OMG_DDPG_Integrator_no_pastValsreturn_500_agents.pkl')['return'].tolist() +OMG_SEC_return = OMG_DDPG_Integrator_no_pastVals + OMG_DDPG_Integrator_no_pastVals_500 + +# OMG_DDPG_return_798 = pd.read_pickle('OMG_DDPG_Actorreturn_8XX_agents.pkl')['return'].tolist() +OMG_DDPG_return_798 = pd.read_pickle('OMG_DDPG_Actorreturn_8XX_agents.pkl')['return_Mean'].tolist() + +idxs = np.random.randint(low=50, high=748, size=450) +m_sort = np.sort(OMG_DDPG_return_798) +OMG_DDPG_return = np.concatenate([m_sort[0:50], m_sort[747:798], np.take(m_sort, idxs)]) + +idx_DDPG_sort = np.argsort(OMG_DDPG_return_798) + +# OMG_DDPG_return = OMG_DDPG_return_798 + +if save_results: + matplotlib.rcParams.update(params) + +fig, ax = plt.subplots() # figsize =(6, 5)) +# plt.boxplot((OMG_DDPG_Actor, OMG_DDPG_Integrator_no_pastVals_corr, +# OMG_DDPG_Integrator_no_pastVals_i_load_feature_corr, OMG_DDPG_Integrator_no_pastVals)) +ax.boxplot((OMG_SEC_return, OMG_DDPG_return)) +# ax.plot( 3, 0.0332, marker='o' ) +plt.grid() +plt.ylim([-0.4, 0]) +plt.xticks([1, 2], ['$\mathrm{SEC}$', '$\mathrm{DDPG}$']) +plt.ylabel('$\overline{r}_{k,v}$') +# plt.ylabel('$1/K\,\sum_{k=0}^K{r_{k,v}}$') +plt.tick_params(direction='in') +plt.show() + +if save_results: + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pgf') + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.png') + fig.savefig(f'{folder_name}/OMG_Errorbar_lim.pdf') + +##########################LearningCurve############### + +params = {'backend': 'ps', + 'text.latex.preamble': [r'\usepackage{gensymb}' + r'\usepackage{amsmath,amssymb,mathtools}' + r'\newcommand{\mlutil}{\ensuremath{\operatorname{ml-util}}}' + r'\newcommand{\mlacc}{\ensuremath{\operatorname{ml-acc}}}'], + 'axes.labelsize': 12, # fontsize for x and y labels (was 10) + 'axes.titlesize': 12, + 'font.size': 12, # was 10 + 'legend.fontsize': 12, # was 10 + 'xtick.labelsize': 12, + 'ytick.labelsize': 12, + 'text.usetex': True, + 'figure.figsize': [5.5, 3.7], # [3.9, 3.1], + 'font.family': 'serif', + 'lines.linewidth': 1 + } + +matplotlib.rcParams.update(params) + +SEC_train_data = pd.read_pickle('OMG_DDPG_Integrator_no_pastVals_8XX_agents_train_data.pkl') +DDPG_train_data = pd.read_pickle('OMG_DDPG_Actor_8XX_agents_train_data.pkl') + +# DDPG data to long -> sort by mean -> take best/worst 50 and 450 random + +SEC_mean_learningCurve_550 = SEC_train_data.mean(axis=1) +SEC_std_learningCurve_550 = SEC_train_data.std(axis=1) + +# sort df by idx (return of test case from above) - not needed, just for doublecheck +df3 = DDPG_train_data.iloc[:, idx_DDPG_sort] + +# get the best/worst idx2 out ouf sort_idx and snip the df to 550 based on that idx2 +idx2 = np.concatenate([idx_DDPG_sort[0:50], idx_DDPG_sort[idxs], idx_DDPG_sort[747:798]]) +df550 = DDPG_train_data.iloc[:, idx2] + +DDPG_mean_learningCurve_550 = df3.mean(axis=1) +DDPG_std_learningCurve_550 = df3.std(axis=1) + +low = (SEC_mean_learningCurve_550 - SEC_std_learningCurve_550).to_numpy() +up = (SEC_mean_learningCurve_550 + SEC_std_learningCurve_550).to_numpy() +SEC = SEC_mean_learningCurve_550.to_numpy() +DDPG = DDPG_mean_learningCurve_550.to_numpy() +episode = np.array([list(range(0, 177))]).squeeze() + +fig, ax = plt.subplots() +plt.fill_between(episode, up, low, facecolor='b', alpha=0.25) +plt.fill_between(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), + (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), facecolor='r', alpha=0.25) +plt.plot(episode, SEC, 'b', label='$\mathrm{SEC}$', linewidth=2) +plt.plot(episode, low, '--b', linewidth=0.5) +plt.plot(episode, up, '--b', linewidth=0.5) +plt.plot(episode, DDPG, 'r', label='$\mathrm{DDPG}$', linewidth=2) +plt.plot(episode, (DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5) +plt.plot(episode, (DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550).to_numpy(), '--r', linewidth=0.5) +plt.grid() +plt.xlim([0, 176]) +plt.tick_params(direction='in') +plt.legend() +# plt.set_xlim([0, 10]) +plt.ylabel('$\overline{r}_{k,v}$') +plt.xlabel(r'$\mathrm{Episode}$') +plt.show() + +if save_results: + matplotlib.rcParams.update(params) + + fig.savefig(f'{folder_name}/OMG_learning_curve.pgf') + fig.savefig(f'{folder_name}/OMG_learning_curve.png') + fig.savefig(f'{folder_name}/OMG_learning_curve.pdf') + +plt.plot(SEC_mean_learningCurve_550, 'b', label='$\mathrm{SEC}$') +plt.plot(DDPG_mean_learningCurve_550, '-.b', label='$\mathrm{DDPG}$') +plt.fill_between(SEC_mean_learningCurve_550 - SEC_std_learningCurve_550, + SEC_mean_learningCurve_550 + SEC_std_learningCurve_550, facecolor='r') +plt.fill_between(DDPG_mean_learningCurve_550 - DDPG_std_learningCurve_550, + DDPG_mean_learningCurve_550 + DDPG_std_learningCurve_550, facecolor='r') +plt.grid() +plt.legend() +plt.xlim([0, 177]) +# plt.set_xlim([0, 10]) +plt.ylabel('$\overline{\sum{r}}$') +plt.xlabel(r'$\mathrm{Episode}$') +plt.show() +asd = 1 + +# not needed, but maybe interesting for futuer to reorder df: +# df2 = DDPG_train_data.iloc[:,idx_DDPG_sort] +# idx2 = np.concatenate([np.array([list(range(0,50))]).squeeze(), np.array([list(range(748,798))]).squeeze(), idxs]) diff --git a/experiments/hp_tune/visualize_tests/trial_analysis.py b/experiments/hp_tune/visualize_tests/trial_analysis.py new file mode 100644 index 00000000..5e223611 --- /dev/null +++ b/experiments/hp_tune/visualize_tests/trial_analysis.py @@ -0,0 +1,432 @@ +import matplotlib.pyplot as plt +import numpy as np +import pandas as pd +import plotly.graph_objects as px +import sshtunnel +from bson import ObjectId +from plotly import tools +from pymongo import MongoClient + +from openmodelica_microgrid_gym.util import dq0_to_abc, abc_to_dq0 + +# db_name = 'PC2_DDGP_Vctrl_single_inv_18_penalties' +db_name = 'PC2_TD3_Vctrl_single_inv_3' +# db_name = 'PC2_TD3_Vctrl_single_inv_2' +# db_name = 'DDPG_Retrain_Best_study18_6462' +# trial = '834' +show_episode_number = 10 +make_pyplot = False + +with sshtunnel.open_tunnel('lea38', remote_bind_address=('127.0.0.1', 12001)) as tun: + with MongoClient(f'mongodb://localhost:{tun.local_bind_port}/') as client: + db = client[db_name] + + trial = db.Trial_number_5 + # trial = db.Trial_number_6462 + + trial_config = trial.find_one({"Name": "Config"}) + trial_test = trial.find_one({"Name": "Test"}) + train_data = trial.find_one({"Name": "After_Training"}) + train_episode_data = trial.find_one({"Episode_number": show_episode_number}) + + print(f'Starttime = {trial_config["Start time"]}') + print(f'Starttime = {trial_test["End time"]}') + print(' ') + print(f'Node = {trial_config["Node"]}') + print(' ') + + print('Config-Params:') + print(*trial_config.items(), sep='\n') + + ts = 1e-4 # if ts stored: take from db + t_test = np.arange(0, len(trial_test['lc_capacitor1_v']) * ts, ts).tolist() + v_a_test = trial_test['lc_capacitor1_v'] + v_b_test = trial_test['lc_capacitor2_v'] + v_c_test = trial_test['lc_capacitor3_v'] + i_a_test = trial_test['lc_inductor1_i'] + i_b_test = trial_test['lc_inductor2_i'] + i_c_test = trial_test['lc_inductor3_i'] + R_load = trial_test['r_load_resistor1_R'] + + v_sp_d_test = trial_test['inverter1_v_ref_0'] + v_sp_q_test = trial_test['inverter1_v_ref_1'] + v_sp_0_test = trial_test['inverter1_v_ref_2'] + + phase_test = trial_test['Phase'] + + v_sp_abc = dq0_to_abc(np.array([v_sp_d_test, v_sp_q_test, v_sp_0_test]), np.array(phase_test[:-1])) + + v_mess_dq0 = abc_to_dq0(np.array([v_a_test, v_b_test, v_c_test]), np.array(phase_test[:-1])) + + plt.plot(t_test, R_load) + plt.grid() + # plt.xlim([0, 0.1]) + plt.xlabel("time") + plt.ylabel("R_load") + plt.title('Test') + plt.show() + + plt.plot(t_test, v_a_test) + plt.plot(t_test, v_b_test) + plt.plot(t_test, v_c_test) + # plt.plot(t_test, v_sp_abc[0, :]) + plt.grid() + # plt.xlim([0, 0.1]) + plt.xlabel("time") + plt.ylabel("v_abc") + plt.title('Test') + plt.show() + + plt.plot(t_test, v_mess_dq0[0, :]) + plt.plot(t_test, v_mess_dq0[1, :]) + plt.plot(t_test, v_mess_dq0[2, :]) + plt.plot(t_test, v_sp_d_test) + #plt.ylim([-30, 300]) + plt.xlim([0, 0.1]) + plt.grid() + plt.xlabel("time") + plt.ylabel("v_dq0") + plt.title('Test') + plt.show() + + + plt.plot(t_test, i_a_test) + plt.plot(t_test, i_b_test) + plt.plot(t_test, i_c_test) + plt.grid() + plt.xlabel("time") + plt.ylabel("v_abc") + plt.title('Test') + plt.show() + + if 1: + actionP0_test = trial_test['ActionP0'] + actionP1_test = trial_test['ActionP1'] + actionP2_test = trial_test['ActionP2'] + actionI0_test = trial_test['ActionI0'] + actionI1_test = trial_test['ActionI1'] + actionI2_test = trial_test['ActionI2'] + + plt.plot(t_test[1:], actionP0_test) + plt.plot(t_test[1:], actionP1_test) + plt.plot(t_test[1:], actionP2_test) + # plt.xlim([0, 0.1]) + plt.grid() + plt.xlabel("time") + plt.ylabel("action_P") + plt.title('Test') + plt.show() + + plt.plot(t_test[1:], actionI0_test) + plt.plot(t_test[1:], actionI1_test) + plt.plot(t_test[1:], actionI2_test) + # plt.xlim([0, 0.1]) + plt.grid() + plt.xlabel("time") + plt.ylabel("action_I") + plt.title('Test') + plt.show() + + integrator_sum0 = trial_test['integrator_sum0'] # np.cumsum( + # np.array(actionI0_test) * trial_config['integrator_weight']) # trial_test['integrator_sum0']# + integrator_sum1 = trial_test[ + 'integrator_sum1'] # np.cumsum(np.array(actionI1_test) * trial_config['integrator_weight']) + integrator_sum2 = trial_test[ + 'integrator_sum2'] # np.cumsum(np.array(actionI2_test) * trial_config['integrator_weight']) + + plt.plot(t_test[1:], integrator_sum0) + plt.plot(t_test[1:], integrator_sum1) + plt.plot(t_test[1:], integrator_sum2) + # plt.xlim([0, 0.1]) + plt.grid() + plt.xlabel("time") + plt.ylabel("Integratorzustand") + plt.title('Test') + plt.show() + + if make_pyplot: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=actionI0_test)) + plot.add_trace( + px.Scatter(y=actionI1_test)) + plot.add_trace( + px.Scatter(y=actionI2_test)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + plot = px.Figure() + plot.add_trace( + px.Scatter(y=actionP0_test)) + plot.add_trace( + px.Scatter(y=actionP1_test)) + plot.add_trace( + px.Scatter(y=actionP2_test)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + if make_pyplot: + # pyplot v_abc + plot = px.Figure() + plot.add_trace( + px.Scatter(x=t_test, y=v_a_test)) + # px.Scatter(x=x, y=v_mess_dq0[0][:])) + + plot.add_trace( + px.Scatter(x=t_test, y=v_b_test)) + # px.Scatter(x=x, y=v_mess_dq0[1][:])) + plot.add_trace( + px.Scatter(x=t_test, y=v_c_test)) + # px.Scatter(x=x, y=v_mess_dq0[2][:])) + + plot.add_trace( + px.Scatter(x=t_test, y=v_sp_abc[1, :])) + # px.Scatter(x=x, y=df2['v_1_SP'])) + + plot.add_trace( + px.Scatter(x=t_test, y=v_sp_abc[2, :])) + # px.Scatter(x=x, y=df2['v_2_SP'])) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + ############################################################## + # After Training + + train_reward_per_episode = train_data['Mean_eps_reward'] + number_learning_steps = trial_config['Number_learning_Steps'] + episode_len = 2000 # trial_config['training_episode_length'] + learning_rate = trial_config['learning_rate'] + lr_decay_start = trial_config['lr_decay_start'] + lr_decay_duration = trial_config['lr_decay_duration'] + final_lr = trial_config['final_lr'] * learning_rate + + ax = plt.plot(train_reward_per_episode) + plt.grid() + plt.xlabel("Episodes") + # plt.yscale('log') + plt.ylabel("Mean episode Reward") + # plt.ylim([-0.06, -0.025]) + # plt.title("1.000.000") + plt.show() + + if make_pyplot: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=train_reward_per_episode)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() + + t = np.arange(number_learning_steps) + + progress_remaining = 1.0 - (t / number_learning_steps) + + t_start = int(lr_decay_start * number_learning_steps) + t_end = int(np.minimum(lr_decay_start * number_learning_steps + lr_decay_duration * number_learning_steps, + number_learning_steps)) + + lr_curve = np.maximum( + np.minimum(learning_rate, learning_rate + (t_start * (learning_rate - final_lr)) / (t_end - t_start) \ + - (learning_rate - final_lr) / (t_end - t_start) * ((1.0 - progress_remaining) \ + * number_learning_steps)), final_lr) + + # no step-vise MA needed but episode-vise! + # lr_ma = np.convolve(lr_curve, np.ones(episode_len), 'valid') / episode_len + num_episodes = int(number_learning_steps / episode_len) + lr_ma = np.zeros(num_episodes) + count = 0 + + for i in range(num_episodes): + lr_ma[i] = np.mean(lr_curve[count:count + episode_len]) + count += episode_len + + # plt.plot(lr_curve) + # plt.show() + + fig = plt.figure() # a new figure window + ax = fig.add_subplot(2, 1, 1) # a new axes + ax = plt.plot(train_reward_per_episode) + plt.grid() + # plt.xlabel("Episodes") + # plt.yscale('log') + plt.ylabel("Mean episode Reward") + + ax2 = fig.add_subplot(2, 1, 2) # a new axes + ax2 = plt.plot(lr_ma) + plt.grid() + plt.xlabel("Episodes") + plt.ylabel("Mean episode LR") + plt.show() + + plt.show() + + if train_episode_data is not None: + # only available if loglevel == 'train' + ############################################################## + # Plot example Training Episode + R_load = train_episode_data['R_load_training'] + i_a = train_episode_data['i_a_training'] + i_b = train_episode_data['i_b_training'] + i_c = train_episode_data['i_c_training'] + v_a = train_episode_data['v_a_training'] + v_b = train_episode_data['v_b_training'] + v_c = train_episode_data['v_c_training'] + reward = train_episode_data['Rewards'] + phase = train_episode_data['Phase'] + + plt.plot(R_load) + plt.grid() + plt.xlabel("steps") + plt.ylabel("R_load") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(i_a) + plt.plot(i_b) + plt.plot(i_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("i_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(v_a) + plt.plot(v_b) + plt.plot(v_c) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_abc") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + plt.plot(reward) + plt.grid() + plt.xlabel("steps") + plt.ylabel("Reward") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + df = pd.DataFrame() + df['R_load'] = R_load + + hist = df['R_load'].hist(bins=50) + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + """ + plot = px.Figure() + plot.add_trace( + px.Scatter(y=R_load) + """ + # df2['v_0_SP'] = pd.DataFrame(test_data['inverter1_v_ref_0']) + # df2['v_1_SP'] = pd.DataFrame(test_data['inverter1_v_ref_1']) + # df2['v_2_SP'] = pd.DataFrame(test_data['inverter1_v_ref_2']) + + # df2['phase'] = pd.DataFrame(test_data['Phase']) + + # v_sp_abc = dq0_to_abc(np.array([df2['v_0_SP'], df2['v_1_SP'], df2['v_2_SP']]), np.array(df2['phase'])) + + v_mess_dq0 = abc_to_dq0(np.array([v_a, v_b, v_c]), np.array(phase)) + + # x = df2['t'] + v_d = v_mess_dq0[0][:] # df2['v_a'] + v_q = v_mess_dq0[1][:] # df2['v_b'] + v_0 = v_mess_dq0[2][:] # df2['v_c'] + + plt.plot(v_d) + plt.plot(v_q) + plt.plot(v_0) + plt.grid() + plt.xlabel("steps") + plt.ylabel("v_dq0") + plt.title(f"Trainingepisode {show_episode_number}") + plt.show() + + # v_a_SP = df2['v_0_SP']#v_sp_abc[0,:] + # v_b_SP = df2['v_1_SP']#v_sp_abc[1,:] + # v_c_SP = df2['v_2_SP']#v_sp_abc[2,:] + if make_pyplot: + plot = px.Figure() + plot.add_trace( + px.Scatter(y=v_a)) + + plot.add_trace( + px.Scatter(y=v_b)) + + plot.add_trace( + px.Scatter(y=v_c)) + + plot.update_layout( + xaxis=dict( + rangeselector=dict( + buttons=list([ + dict(count=1, + step="day", + stepmode="backward"), + ]) + ), + rangeslider=dict( + visible=True + ), + ) + ) + + plot.show() diff --git a/experiments/issue51_new/env/rewards.py b/experiments/issue51_new/env/rewards.py new file mode 100644 index 00000000..f557fce8 --- /dev/null +++ b/experiments/issue51_new/env/rewards.py @@ -0,0 +1,113 @@ +import numpy as np +from openmodelica_microgrid_gym.util import nested_map +from typing import List + + +class Reward: + def __init__(self, nom, lim, v_DC, gamma, det_run=False, nom_region=1.1, use_gamma_normalization=1): + self._idx = None + self.nom = nom + self.lim = lim + self.v_DC = v_DC + self.use_gamma_normalization = use_gamma_normalization + if self.use_gamma_normalization == 1: + self.gamma = gamma + else: + self.gamma = 0 + self.nom_region = nom_region + self.det_run = det_run + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012'], + [f'lc.capacitor{k}.v' for k in '123'], [f'inverter1.v_ref.{k}' for k in '012']]) + + def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + Defines the reward function for the environment. Uses the observations and set-points to evaluate the quality of + the used parameters. + Takes current and voltage measurements and set-points to calculate the mean-root control error and uses a + logarithmic barrier function in case of violating the current limit. Barrier function is adjustable using + parameter mu. + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + iabc_master = data[idx[0]] # 3 phase currents at LC inductors + vabc_master = data[idx[2]] # 3 phase currents at LC inductors + + # set points (sp) + isp_abc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + vsp_abc_master = data[idx[3]] # convert dq set-points into three-phase abc coordinates + + SP = vsp_abc_master * self.lim + mess = vabc_master * self.lim + + if all(np.abs(mess) <= self.nom*1.1): + #if all(np.abs(mess) <= self.lim*self.nom_region): + """ + 1st area - inside wanted (nom) operation range + -v_nom -> + v_nom + rew = 1; if mess = SP + rew = 1/3; if error = SP-mess = 2*v_nom (worst case without braking out from nom area) + """ + rew = np.sum((1 - np.abs(SP - mess) / (2 * self.nom)) * 2 * (1 - self.gamma) / 3 + (1 - self.gamma) / 3) / 3 + + elif any(np.abs(mess) > self.lim): + """ + 3rd area - outside valid area - above lim - possible if enough v_DC - DANGEROUS + +-v_lim -> +-v_DC + + V1: + @ SP = +v_nom AND mess = -v_DC: + rew = -1; if error = v_DC + v_nom -> Worst case, +v_nom wanted BUT -v_DC measured + @ SP = -v_nom AND mess = -v_lim + rew ~ -1/3 - f[(lim-nom)/(nom+v_DC)] + rew -> -1 - 2/3*(1 - |lim - nom| / (nom+v_DC)) + The latter fraction is quite small but leads to depending on the system less then 2/3 is + substracted and we have a gap to the 2nd area! :) + + V2: None is returned to stop the episode (hint: in the env env.abort_reward is given back as reward(?) + + V3: rew = -1 + """ + + # V1: + # rew = np.sum( + # (1 - np.abs(SP - mess) / (self.nom + self.v_DC)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma)) / 3 + + # V2: + # if return -> rew = None and in env abort_reward is given to agent + if self.det_run: + return -(1 - self.gamma) + else: + return + + # V3: + # rew = (1 - gamma) + + # elif any(np.abs(vabc_master) > v_DC): + # rew = (1-gamma) + + else: + """ + 2nd area + +-v_nom -> +- v_lim + + @ SP = v_nom AND mess = v_nom (-µV), da if mess > v_nom (hier noch Sicherheitsabstand?) + rew = 1/3 + @ SP = v_nom AND mess = -v_lim + rew = -1/3 + + """ + rew = np.sum( + (1 - np.abs(SP - mess) / (self.nom + self.lim)) * 2 * (1 - self.gamma) / 3 - (1 - self.gamma) / 3) / 3 + + return rew # * (1-0.9) + # return -np.clip(error.squeeze(), 0, 1e5) diff --git a/experiments/issue51_new/omniboard_docker_compose.yaml b/experiments/issue51_new/omniboard_docker_compose.yaml new file mode 100644 index 00000000..1e84922d --- /dev/null +++ b/experiments/issue51_new/omniboard_docker_compose.yaml @@ -0,0 +1,28 @@ +version: '3' +services: + + mongo: + image: mongo + ports: + - 127.0.0.1:27017:27017 + environment: + MONGO_INITDB_ROOT_USERNAME: sample + MONGO_INITDB_ROOT_PASSWORD: password + MONGO_INITDB_DATABASE: db + expose: + - 27017 + networks: + - omniboard + + omniboard: + image: vivekratnavel/omniboard:latest + command: [ "--mu", "mongodb://sample:password@mongo:27017/db?authSource=admin" ] + ports: + - 127.0.0.1:9000:9000 + networks: + - omniboard + depends_on: + - mongo + +networks: + omniboard: \ No newline at end of file diff --git a/experiments/issue51_new/policy.py b/experiments/issue51_new/policy.py new file mode 100644 index 00000000..489e3c9a --- /dev/null +++ b/experiments/issue51_new/policy.py @@ -0,0 +1,122 @@ +from collections import Callable +from typing import Optional, Type, Union, List, Dict, Any + +import gym +import torch as th +from stable_baselines.common.schedules import get_schedule_fn +from stable_baselines.td3.policies import TD3Policy +from stable_baselines3 import DDPG +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor, FlattenExtractor, get_actor_critic_arch +from stable_baselines3.common.utils import update_learning_rate +from torch import nn + + +class MultiLRPolicy(TD3Policy): + def __init__( + self, + observation_space: gym.spaces.Space, + action_space: gym.spaces.Space, + lr_schedule: Callable, + net_arch: Optional[Union[List[int], Dict[str, List[int]]]] = None, + activation_fn: Type[nn.Module] = nn.ReLU, + features_extractor_class: Type[BaseFeaturesExtractor] = FlattenExtractor, + features_extractor_kwargs: Optional[Dict[str, Any]] = None, + normalize_images: bool = True, + optimizer_class: Type[th.optim.Optimizer] = th.optim.Adam, + optimizer_kwargs: Optional[Dict[str, Any]] = None, + n_critics: int = 2, + share_features_extractor: bool = True, + ): + super().__init__( + observation_space, + action_space, + features_extractor_class, + features_extractor_kwargs, + optimizer_class=optimizer_class, + optimizer_kwargs=optimizer_kwargs, + squash_output=True, + ) + + # Default network architecture, from the original paper + if net_arch is None: + if features_extractor_class == FlattenExtractor: + net_arch = [400, 300] + else: + net_arch = [] + + actor_arch, critic_arch = get_actor_critic_arch(net_arch) + + self.net_arch = net_arch + self.activation_fn = activation_fn + self.net_args = { + "observation_space": self.observation_space, + "action_space": self.action_space, + "net_arch": actor_arch, + "activation_fn": self.activation_fn, + "normalize_images": normalize_images, + } + self.actor_kwargs = self.net_args.copy() + self.critic_kwargs = self.net_args.copy() + self.critic_kwargs.update( + { + "n_critics": n_critics, + "net_arch": critic_arch, + "share_features_extractor": share_features_extractor, + } + ) + + self.actor, self.actor_target = None, None + self.critic, self.critic_target = None, None + self.share_features_extractor = share_features_extractor + + self._build(lr_schedule) + + def _build(self, lr_schedule: Callable) -> None: + # Create actor and target + # the features extractor should not be shared + self.actor = self.make_actor(features_extractor=None) + self.actor_target = self.make_actor(features_extractor=None) + # Initialize the target to have the same weights as the actor + self.actor_target.load_state_dict(self.actor.state_dict()) + + self.actor.optimizer = self.optimizer_class(self.actor.parameters(), lr=lr_schedule[0](1), + **self.optimizer_kwargs) + + if self.share_features_extractor: + self.critic = self.make_critic(features_extractor=self.actor.features_extractor) + # Critic target should not share the features extactor with critic + # but it can share it with the actor target as actor and critic are sharing + # the same features_extractor too + # NOTE: as a result the effective poliak (soft-copy) coefficient for the features extractor + # will be 2 * tau instead of tau (updated one time with the actor, a second time with the critic) + self.critic_target = self.make_critic(features_extractor=self.actor_target.features_extractor) + else: + # Create new features extractor for each network + self.critic = self.make_critic(features_extractor=None) + self.critic_target = self.make_critic(features_extractor=None) + + self.critic_target.load_state_dict(self.critic.state_dict()) + self.critic.optimizer = self.optimizer_class(self.critic.parameters(), lr=lr_schedule[1](1), + **self.optimizer_kwargs) + + +class MultiLRAlgorithm(DDPG): + def _setup_lr_schedule(self) -> None: + """Transform to callable if needed.""" + self.lr_schedule = [get_schedule_fn(lr) for lr in self.learning_rate] + + def _update_learning_rate(self, optimizers: Union[List[th.optim.Optimizer], th.optim.Optimizer]) -> None: + """ + Update the optimizers learning rate using the current learning rate schedule + and the current progress remaining (from 1 to 0). + + :param optimizers: + An optimizer or a list of optimizers. + """ + # Log the current learning rate + logger.record("train/learning_rate", self.lr_schedule[0](self._current_progress_remaining)) + + if not isinstance(optimizers, list): + optimizers = [optimizers] + for i, optimizer in enumerate(optimizers): + update_learning_rate(optimizer, self.lr_schedule[i](self._current_progress_remaining)) diff --git a/experiments/issue51_new/stable_baselines.py b/experiments/issue51_new/stable_baselines.py new file mode 100644 index 00000000..84395674 --- /dev/null +++ b/experiments/issue51_new/stable_baselines.py @@ -0,0 +1,112 @@ +from datetime import datetime +from os import makedirs +from typing import List + +import gym +import numpy as np +from stable_baselines3 import PPO +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor + +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import nested_map + +np.random.seed(0) + +timestamp = datetime.now().strftime(f'%Y.%b.%d %X ') +makedirs(timestamp) + +# Simulation definitions +net = Network.load('../../net/net_single-inv-curr.yaml') +max_episode_steps = 300 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +iLimit = 30 # inverter current limit / A +iNominal = 20 # nominal inverter current / A +mu = 2 # factor for barrier function (see below) + + +class Reward: + def __init__(self): + self._idx = None + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']]) + + def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the + used parameters. + Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic + barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu. + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + Iabc_master = data[idx[0]] # 3 phase currents at LC inductors + ISPabc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + + # control error = mean-root-error (MRE) of reference minus measurement + # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides + # better, i.e. more significant, gradients) + # plus barrier penalty for violating the current constraint + error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \ + # + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0) + error /= max_episode_steps + + return -np.clip(error.squeeze(), 0, 1e5) + + +def xylables(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + fig.savefig(f'{timestamp}/Inductor_currents.pdf') + + +env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=Reward().rew_fun, + viz_cols=[ + PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + net=net, + model_path='../../omg_grid/grid.network_singleInverter.fmu') + +with open(f'{timestamp}/env.txt', 'w') as f: + print(str(env), file=f) +env = Monitor(env) + + +class RecordEnvCallback(BaseCallback): + def _on_step(self) -> bool: + obs = env.reset() + for _ in range(max_episode_steps): + env.render() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + if done: + break + env.close() + env.reset() + return True + + +model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=f'{timestamp}/') +checkpoint_on_event = CheckpointCallback(save_freq=100000, save_path=f'{timestamp}/checkpoints/') +record_env = RecordEnvCallback() +plot_callback = EveryNTimesteps(n_steps=2000, callback=record_env) +model.learn(total_timesteps=5000000, callback=[checkpoint_on_event, plot_callback]) diff --git a/experiments/issue51_new/stable_baselinesDDPG.py b/experiments/issue51_new/stable_baselinesDDPG.py new file mode 100644 index 00000000..b25199c2 --- /dev/null +++ b/experiments/issue51_new/stable_baselinesDDPG.py @@ -0,0 +1,266 @@ +import logging +from datetime import datetime +from os import makedirs +from tempfile import NamedTemporaryFile +from typing import List + +import torch as th +import torch.nn as nn +from labwatch.hyperparameters import UniformFloat +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +import gym +import numpy as np +from stable_baselines3 import DDPG +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor + +import matplotlib.pyplot as plt +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import nested_map + +from sacred import Experiment +from sacred.observers import FileStorageObserver, MongoObserver + +from labwatch.assistant import LabAssistant +from labwatch.optimizers.random_search import RandomSearch + +np.random.seed(0) + +folder_name = 'DDPG/' +experiment_name = 'DDPG_CC_Reward_MRE_randsearch' +timestamp = datetime.now().strftime(f'%Y.%b.%d_%X') + +makedirs(folder_name + experiment_name + timestamp) + +train_steps = 20000 + +# Simulation definitions +net = Network.load('../../net/net_single-inv-curr.yaml') +max_episode_steps = 300 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +iLimit = 30 # inverter current limit / A +iNominal = 20 # nominal inverter current / A +mu = 2 # factor for barrier function (see below) + + +class Reward: + def __init__(self): + self._idx = None + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']]) + + def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the + used parameters. + Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic + barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu. + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + Iabc_master = data[idx[0]] # 3 phase currents at LC inductors + ISPabc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + + # control error = mean-root-error (MRE) of reference minus measurement + # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides + # better, i.e. more significant, gradients) + # plus barrier penalty for violating the current constraint + error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \ + + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0) + error /= max_episode_steps + + return -np.clip(error.squeeze(), 0, 1e5) + + +def xylables(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + fig.savefig(f'{folder_name + experiment_name + timestamp}/Inductor_currents{datetime.now()}.pdf') + plt.show() + + +env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=Reward().rew_fun, + viz_cols=[ + PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + net=net, + model_path='../../omg_grid/grid.network_singleInverter.fmu', + is_normalized=True, + log_level=logging.WARNING) + +with open(f'{folder_name + experiment_name + timestamp}/env.txt', 'w') as f: + print(str(env), file=f) +env = Monitor(env) + +n_actions = env.action_space.shape[-1] +action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) + + +class CustomMPL(BaseFeaturesExtractor): + + def __init__(self, observation_space: gym.spaces.Box, features_dim: int = 256): + super(CustomMPL, self).__init__(observation_space, features_dim) + # We assume CxHxW images (channels first) + # Re-ordering will be done by pre-preprocessing or wrapper + n_input_channels = observation_space.shape[0] + self.cnn = nn.Sequential( + nn.Linear(n_input_channels, 32), + nn.ReLU(), + nn.Linear(32, 64), + nn.ReLU(), + ) + + # Compute shape by doing one forward pass + with th.no_grad(): + n_flatten = self.cnn( + th.as_tensor(observation_space.sample()[None]).float() + ).shape[1] + + self.linear = nn.Sequential(nn.Linear(n_flatten, features_dim), nn.ReLU()) + + def forward(self, observations: th.Tensor) -> th.Tensor: + return self.linear(self.cnn(observations)) + + +policy_kwargs = dict( + features_extractor_class=CustomMPL, + features_extractor_kwargs=dict(features_dim=128, net_arch=[32, 32]), +) +ex = Experiment(experiment_name + timestamp) +# ex.observers.append(FileStorageObserver('runs')) +ex.observers.append(MongoObserver(url=f'mongodb://sample:password@localhost:27017/?authMechanism=SCRAM-SHA-1', + db_name='db', failure_dir='fail')) + +a = LabAssistant(ex, "labwatch_demo_keras", optimizer=RandomSearch) + + +@a.search_space +def search_space(): + learning_rate = UniformFloat(lower=10e-6, + upper=10e-1, + default=10e-2, + log_scale=True) + + +# MongoObserver.create(url=f'mongodb://sample:password@localhost:27017/?authMechanism=SCRAM-SHA-1', +# db_name='db')) +@ex.config +def cfg(): + # DDPG learning parameters + gamma = 0.9 # discount factor + batch_size = 128 + memory_interval = 1 + # alpha_actor = 5e-4#5e-6 + learning_rate = 5e-3 # 5e-4 + noise_var = 0.2 + noise_theta = 5 # stiffness of OU + alpha_lRelu = 0.1 + weigth_regularizer = 0.01 + + memory_lim = 5000 # = buffersize? + warm_up_steps_actor = 2048 + warm_up_steps_critic = 1024 + target_model_update = 1000 + + # NN architecture + actor_hidden_size = 100 # Using LeakyReLU + # output linear + critic_hidden_size_1 = 75 # Using LeakyReLU + critic_hidden_size_2 = 75 # Using LeakyReLU + critic_hidden_size_3 = 75 # Using LeakyReLU + # output linear + + n_actions = env.action_space.shape[-1] + + # description = experiment_name + # start_time = timestamp + # corresponding_data_in = folder_name + experiment_name + timestamp + + max_learning_steps = train_steps + + +@ex.automain +def main(gamma, batch_size, memory_interval, learning_rate, noise_var, noise_theta, alpha_lRelu, + weigth_regularizer, + memory_lim, warm_up_steps_actor, warm_up_steps_critic, target_model_update, actor_hidden_size, + critic_hidden_size_1, critic_hidden_size_2, critic_hidden_size_3, n_actions, + max_learning_steps): # description, start_time, + # corresponding_data_in): + class RecordEnvCallback(BaseCallback): + def _on_step(self) -> bool: + obs = env.reset() + for _ in range(max_episode_steps): + env.render() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + if done: + break + env.close() + env.reset() + return True + + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1, + critic_hidden_size_2, + critic_hidden_size_3])) + # policy_kwargs = dict( activation_fn=th.nn.LeakyReLU(negative_slope=alpha_lRelu), net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1, + # critic_hidden_size_2, + # critic_hidden_size_3])) + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + timestamp}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic, + batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + checkpoint_on_event = CheckpointCallback(save_freq=1000, + save_path=f'{folder_name + experiment_name + timestamp}/checkpoints/') + record_env = RecordEnvCallback() + plot_callback = EveryNTimesteps(n_steps=1000, callback=record_env) + model.learn(total_timesteps=max_learning_steps, callback=[checkpoint_on_event, plot_callback]) + + model.save(f'{folder_name + experiment_name + timestamp}/model.zip') + ex.add_artifact(f'{folder_name + experiment_name + timestamp}/model.zip') + + # model.save(experiment_name) + # ex.add_artifact(f'{experiment_name}.zip') + + # with NamedTemporaryFile() as t: + + # model.save(t.name) + # ex.add_artifact(t.name, f'{experiment_name}.zip') + + # del model # remove to demonstrate saving and loading + + # model = DDPG.load("ddpg_CC") + + # obs = env.reset() + # while True: + # action, _states = model.predict(obs) + # obs, rewards, dones, info = env.step(action) + # env.render() + + return 0 diff --git a/experiments/issue51_new/stable_baselinesDDPG_double_lr.py b/experiments/issue51_new/stable_baselinesDDPG_double_lr.py new file mode 100644 index 00000000..63efcfaa --- /dev/null +++ b/experiments/issue51_new/stable_baselinesDDPG_double_lr.py @@ -0,0 +1,286 @@ +from datetime import datetime +from functools import partial +from itertools import accumulate +from os import makedirs + +import gym +import matplotlib.pyplot as plt +import numpy as np +import optuna +import torch as th +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.noise import OrnsteinUhlenbeckActionNoise +from stochastic.processes import VasicekProcess + +from experiments.issue51_new.env.rewards import Reward +from experiments.issue51_new.policy import MultiLRPolicy, MultiLRAlgorithm +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import RandProcess + +np.random.seed(0) + +folder_name = 'DDPG_VC_hyperoptTEST_Neu/' +# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED' +experiment_name = 'DDPG_VC_' +timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X') + +makedirs(folder_name) + +# Simulation definitions +net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +max_episode_steps = 1000 # number of simulation steps per episode +# num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +# iLimit = 30 # inverter current limit / A +# iNominal = 20 # nominal inverter current / A +mu_c = 2 # factor for barrier function (see below) +mu_v = 2 # factor for barrier function (see below) +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +# plant +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +R = 28 # nomVoltPeak / 7.5 # / Ohm +lower_bound_load = 11 # to allow maximal load that draws i_limit (toDo: let exceed?) +upper_bound_load = 45 # to apply symmetrical load bounds + +loadstep_timestep = max_episode_steps / 2 + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + +def load_step(t, gain): + """ + Changes the load parameters + :param t: + :param gain: device parameter + :return: Sample from SP + """ + # Defines a load step after 0.01 s + # if loadstep_timestep*net.ts < t <= loadstep_timestep*net.ts + net.ts: + # gen.proc.mean = gain * 0.55 + # gen.reserve = gain * 0.55 + # elif t <= net.ts: + # gen.proc.mean = gain + + return gen.sample(t) + + +def experiment_fit_DDPG(learning_rate, gamma, n_trail): + makedirs(folder_name + experiment_name + n_trail) + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + fig.show() + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + fig.savefig(f'{folder_name + experiment_name + n_trail}/Capacitor_voltages{datetime.now()}.pdf') + fig.show() + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + fig.savefig(f'{folder_name + experiment_name + n_trail}/Load.pdf') + fig.show() + + rew = Reward(v_nom, v_lim, v_DC, gamma) + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + # PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + # callback=xylables_R, + # color=[['b', 'r', 'g']], + # style=[[None]] + # ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(load_step, gain=R), + 'r_load.resistor2.R': partial(load_step, gain=R), + 'r_load.resistor3.R': partial(load_step, gain=R), + # 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim, + # high=v_lim) if t == 0 else None, + # 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim, + # high=v_lim) if t == 0 else None, + # 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim, + # high=v_lim) if t == 0 else None, + # 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim, + # high=i_lim) if t == 0 else None, + # 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim, + # high=i_lim) if t == 0 else None, + # 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim, + # high=i_lim) if t == 0 else None, + }, + net=net, + model_path='../../omg_grid/grid.paper_loadstep.fmu', + on_episode_reset_callback=partial(gen.reset, initial=R), + is_normalized=True + ) + + with open(f'{folder_name + experiment_name + n_trail}/env.txt', 'w') as f: + print(str(env), file=f) + env = Monitor(env) + + # DDPG learning parameters + # gamma = 0.9 # discount factor + batch_size = 128 + memory_interval = 1 + # alpha_actor = 5e-6 + # learning_rate = 5e-3 + + # learning_rate = trail.suggest_loguniform("lr", 1e-5, 1) + + noise_var = 0.2 + noise_theta = 5 # stiffness of OU + alpha_lRelu = 0.1 + weigth_regularizer = 0.5 + + memory_lim = 5000 # = buffersize? + warm_up_steps_actor = 2048 + warm_up_steps_critic = 1024 + target_model_update = 1000 + + # NN architecture + actor_hidden_size = 100 # Using LeakyReLU + # output linear + critic_hidden_size_1 = 75 # Using LeakyReLU + critic_hidden_size_2 = 75 # Using LeakyReLU + critic_hidden_size_3 = 75 # Using LeakyReLU + + # output linear + + class RecordEnvCallback(BaseCallback): + def _on_step(self) -> bool: + rewards = [] + obs = env.reset() + for _ in range(max_episode_steps): + env.render() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + rewards.append(reward) + if done: + break + + acc_Reward = list(accumulate(rewards)) + + plt.plot(rewards) + plt.xlabel(r'$t\,/\,\mathrm{s}$') + plt.ylabel('$Reward$') + plt.grid(which='both') + plt.savefig(f'{folder_name + experiment_name + n_trail}/reward{datetime.now()}.pdf') + plt.show() + + # plt.plot(acc_Reward) + # plt.xlabel(r'$t\,/\,\mathrm{s}$') + # plt.ylabel('$Reward_sum$') + # plt.grid(which='both') + # plt.savefig(f'{folder_name + experiment_name + timestamp}/reward_sum_{datetime.now()}.pdf') + # plt.show() + + env.close() + env.reset() + return True + + n_actions = env.action_space.shape[-1] + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1, + critic_hidden_size_2, + critic_hidden_size_3])) + + # model = DDPG('', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + n_trail}/', + # policy_kwargs=policy_kwargs, + # learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic, + # batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + # train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + # create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + # + model = MultiLRAlgorithm(MultiLRPolicy, env, verbose=1, + tensorboard_log=f'{folder_name + experiment_name + n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=[0, learning_rate], buffer_size=memory_lim, + learning_starts=warm_up_steps_critic, + batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + checkpoint_on_event = CheckpointCallback(save_freq=10000, + save_path=f'{folder_name + experiment_name + n_trail}/checkpoints/') + record_env = RecordEnvCallback() + plot_callback = EveryNTimesteps(n_steps=10000, callback=record_env) + model.learn(total_timesteps=200000, callback=[checkpoint_on_event, plot_callback]) + + model.save(f'{folder_name + experiment_name + n_trail}/model.zip') + + return_sum = 0.0 + obs = env.reset() + while True: + + action, _states = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + return_sum += rewards + if done: + break + + return return_sum + + +experiment_fit_DDPG(.001, .9, str(0)) +exit() + + +def objective(trail): + learning_rate = trail.suggest_loguniform("lr", 1e-5, 1) + gamma = trail.suggest_loguniform("gamma", 0.8, 1) + + return experiment_fit_DDPG(learning_rate, gamma, str(trail.number)) + + +study = optuna.create_study(direction='maximize', storage=f'sqlite:///{folder_name}optuna_data.sqlite3') + +study.optimize(objective, n_trials=50) +print(study.best_params, study.best_value) + +# pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter() diff --git a/experiments/issue51_new/stable_baselinesDDPG_optuna.py b/experiments/issue51_new/stable_baselinesDDPG_optuna.py new file mode 100644 index 00000000..55de2174 --- /dev/null +++ b/experiments/issue51_new/stable_baselinesDDPG_optuna.py @@ -0,0 +1,198 @@ +import logging +from datetime import datetime +from os import makedirs +from typing import List + +import pandas as pd +import gym +import matplotlib.pyplot as plt +import numpy as np +import optuna +import torch as th +import torch.nn as nn +from stable_baselines3 import DDPG +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import nested_map + +np.random.seed(0) + +folder_name = 'DDPG/' +experiment_name = 'DDPG_CC_Reward_MRE_randsearch' +timestamp = datetime.now().strftime(f'%Y.%b.%d_%X') + +makedirs(folder_name + experiment_name + timestamp) + +train_steps = 5000 + +# Simulation definitions +net = Network.load('../../net/net_single-inv-curr.yaml') +max_episode_steps = 300 # number of simulation steps per episode +num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +iLimit = 30 # inverter current limit / A +iNominal = 20 # nominal inverter current / A +mu = 2 # factor for barrier function (see below) + + +class Reward: + def __init__(self): + self._idx = None + + def set_idx(self, obs): + if self._idx is None: + self._idx = nested_map( + lambda n: obs.index(n), + [[f'lc1.inductor{k}.i' for k in '123'], [f'inverter1.i_ref.{k}' for k in '012']]) + + def rew_fun(self, cols: List[str], data: np.ndarray, risk) -> float: + """ + Defines the reward function for the environment. Uses the observations and setpoints to evaluate the quality of the + used parameters. + Takes current measurement and setpoints so calculate the mean-root-error control error and uses a logarithmic + barrier function in case of violating the current limit. Barrier function is adjustable using parameter mu. + + :param cols: list of variable names of the data + :param data: observation data from the environment (ControlVariables, e.g. currents and voltages) + :return: Error as negative reward + """ + self.set_idx(cols) + idx = self._idx + + Iabc_master = data[idx[0]] # 3 phase currents at LC inductors + ISPabc_master = data[idx[1]] # convert dq set-points into three-phase abc coordinates + + # control error = mean-root-error (MRE) of reference minus measurement + # (due to normalization the control error is often around zero -> compared to MSE metric, the MRE provides + # better, i.e. more significant, gradients) + # plus barrier penalty for violating the current constraint + error = np.sum((np.abs((ISPabc_master - Iabc_master)) / iLimit) ** 0.5, axis=0) \ + + -np.sum(mu * np.log(1 - np.maximum(np.abs(Iabc_master) - iNominal, 0) / (iLimit - iNominal)), axis=0) + error /= max_episode_steps + + return -np.clip(error.squeeze(), 0, 1e5) + + +def xylables(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + fig.savefig(f'{folder_name + experiment_name + timestamp}/Inductor_currents{datetime.now()}.pdf') + plt.show() + + +env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=Reward().rew_fun, + viz_cols=[ + PlotTmpl([[f'lc1.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + net=net, + model_path='../../omg_grid/grid.network_singleInverter.fmu', + is_normalized=True, + log_level=logging.WARNING) + +with open(f'{folder_name + experiment_name + timestamp}/env.txt', 'w') as f: + print(str(env), file=f) +env = Monitor(env) + +n_actions = env.action_space.shape[-1] +action_noise = NormalActionNoise(mean=np.zeros(n_actions), sigma=0.1 * np.ones(n_actions)) + + + +def objective(trail): + # DDPG learning parameters + gamma = 0.9 # discount factor + batch_size = 128 + memory_interval = 1 + + noise_var = 0.2 + noise_theta = 5 # stiffness of OU + alpha_lRelu = 0.1 + weigth_regularizer = 0.01 + + memory_lim = 5000 # = buffersize? + warm_up_steps_actor = 2048 + warm_up_steps_critic = 1024 + target_model_update = 1000 + + # NN architecture + actor_hidden_size = 100 # Using LeakyReLU + # output linear + critic_hidden_size_1 = 75 # Using LeakyReLU + critic_hidden_size_2 = 75 # Using LeakyReLU + critic_hidden_size_3 = 75 # Using LeakyReLU + # output linear + + n_actions = env.action_space.shape[-1] + + + max_learning_steps = train_steps + + learning_rate = trail.suggest_loguniform("lr", 1e-5, 1) + + class RecordEnvCallback(BaseCallback): + def _on_step(self) -> bool: + obs = env.reset() + for _ in range(max_episode_steps): + env.render() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + if done: + break + env.close() + env.reset() + return True + + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1, + critic_hidden_size_2, + critic_hidden_size_3])) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + timestamp}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic, + batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + checkpoint_on_event = CheckpointCallback(save_freq=1000, + save_path=f'{folder_name + experiment_name + timestamp}/checkpoints/') + record_env = RecordEnvCallback() + plot_callback = EveryNTimesteps(n_steps=1000, callback=record_env) + model.learn(total_timesteps=max_learning_steps, callback=[checkpoint_on_event, plot_callback]) + + model.save(f'{folder_name + experiment_name + timestamp}/model.zip') + + return_sum = 0.0 + obs = env.reset() + while True: + + action, _states = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + return_sum += rewards + if done: + break + + return return_sum + + +study = optuna.create_study(direction='maximize', storage='sqlite:///db.sqlite3') +# change to MAXIMIZE +study.optimize(objective, n_trials=2) + +pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter() diff --git a/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py b/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py new file mode 100644 index 00000000..cb394ed3 --- /dev/null +++ b/experiments/issue51_new/stable_baselinesDDPG_voltage_control.py @@ -0,0 +1,351 @@ +from datetime import datetime +from functools import partial +from itertools import accumulate +from os import makedirs + +import time + +import torch as th +import torch.nn as nn +from stable_baselines3.common.torch_layers import BaseFeaturesExtractor + +import gym +import numpy as np +import pandas as pd +import optuna + +import matplotlib.pyplot as plt + +from stable_baselines3 import DDPG +from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise +from stable_baselines3.common.callbacks import BaseCallback, CheckpointCallback, EveryNTimesteps +from stable_baselines3.common.monitor import Monitor +from stochastic.processes import VasicekProcess + +from experiments.issue51_new.env.rewards import Reward +from openmodelica_microgrid_gym.env import PlotTmpl +from openmodelica_microgrid_gym.net import Network +from openmodelica_microgrid_gym.util import nested_map, RandProcess + +np.random.seed(0) + +folder_name = 'DDPG_VC_randLoad_exploringStarts/' +# experiment_name = 'DDPG_VC_Reward_MRE_reward_NOT_NORMED' +experiment_name = 'DDPG_VC_bestParamsTest' +timestamp = datetime.now().strftime(f'_%Y.%b.%d_%X') + + + +makedirs(folder_name, exist_ok=True) + +# Simulation definitions +net = Network.load('../../net/net_single-inv-Paper_Loadstep.yaml') +max_episode_steps = 1000 # number of simulation steps per episode +# num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +# iLimit = 30 # inverter current limit / A +# iNominal = 20 # nominal inverter current / A +mu_c = 2 # factor for barrier function (see below) +mu_v = 2 # factor for barrier function (see below) +i_lim = net['inverter1'].i_lim # inverter current limit / A +i_nom = net['inverter1'].i_nom # nominal inverter current / A +v_nom = net.v_nom +v_lim = net['inverter1'].v_lim +v_DC = net['inverter1'].v_DC +# plant +L_filter = 2.3e-3 # / H +R_filter = 400e-3 # / Ohm +C_filter = 10e-6 # / F +R = 28 # nomVoltPeak / 7.5 # / Ohm +lower_bound_load = 11 # to allow maximal load that draws i_limit (toDo: let exceed?) +upper_bound_load = 45 # to apply symmetrical load bounds + +loadstep_timestep = max_episode_steps / 2 + +gen = RandProcess(VasicekProcess, proc_kwargs=dict(speed=1000, vol=10, mean=R), initial=R, + bounds=(lower_bound_load, upper_bound_load)) + + +class RandomLoad: + def __init__(self, max_episode_steps, ts, loadstep_time=None): + self.max_episode_steps = max_episode_steps + self.ts = ts + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.max_episode_steps) + else: + self.loadstep_time = loadstep_time + + def reset(self, loadstep_time=None): + if loadstep_time is None: + self.loadstep_time = np.random.randint(0, self.max_episode_steps) + else: + self.loadstep_time = loadstep_time + + def load_step(self, t, gain): + """ + Changes the load parameters + :param t: + :param gain: device parameter + :return: Sample from SP + """ + # Defines a load step after 0.01 s + if self.loadstep_time * self.ts < t <= self.loadstep_time * self.ts + self.ts: + gen.proc.mean = gain * 0.55 + gen.reserve = gain * 0.55 + elif t <= self.ts: + gen.proc.mean = gain + + return gen.sample(t) + + +class CallbackList(list): + def fire(self, *args, **kwargs): + for listener in self: + listener(*args, **kwargs) + + +# def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, n_trail): +def experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size, + actor_hidden_size, critic_hidden_size, n_trail): + makedirs(folder_name + experiment_name + n_trail, exist_ok=True) + + def xylables_i(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$i_{\mathrm{abc}}\,/\,\mathrm{A}$') + ax.grid(which='both') + fig.savefig(f'{folder_name + experiment_name + n_trail}/Inductor_currents.pdf') + plt.close() + + def xylables_v(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$v_{\mathrm{abc}}\,/\,\mathrm{V}$') + ax.grid(which='both') + #ax.set_xlim([0, 0.005]) + ts = time.gmtime() + fig.savefig(f'{folder_name + experiment_name + n_trail}/Capacitor_voltages{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + def xylables_R(fig): + ax = fig.gca() + ax.set_xlabel(r'$t\,/\,\mathrm{s}$') + ax.set_ylabel('$R_{\mathrm{abc}}\,/\,\mathrm{\Omega}$') + ax.grid(which='both') + ax.set_ylim([lower_bound_load - 2, upper_bound_load + 2]) + fig.savefig(f'{folder_name + experiment_name + n_trail}/Load.pdf') + plt.close() + + rew = Reward(v_nom, v_lim, v_DC, gamma, use_gamma_normalization=use_gamma_in_rew) + rand_load = RandomLoad(max_episode_steps, net.ts) + + cb = CallbackList() + cb.append(partial(gen.reset, initial=R)) + cb.append(rand_load.reset) + + env = gym.make('openmodelica_microgrid_gym:ModelicaEnv_test-v1', + reward_fun=rew.rew_fun, + viz_cols=[ + PlotTmpl([[f'lc.capacitor{i}.v' for i in '123'], [f'inverter1.v_ref.{k}' for k in '012']], + callback=xylables_v, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'lc.inductor{i}.i' for i in '123'], [f'inverter1.i_ref.{k}' for k in '012']], + callback=xylables_i, + color=[['b', 'r', 'g'], ['b', 'r', 'g']], + style=[[None], ['--']] + ), + PlotTmpl([[f'r_load.resistor{i}.R' for i in '123']], + callback=xylables_R, + color=[['b', 'r', 'g']], + style=[[None]] + ) + ], + viz_mode='episode', + max_episode_steps=max_episode_steps, + model_params={'lc.resistor1.R': R_filter, + 'lc.resistor2.R': R_filter, + 'lc.resistor3.R': R_filter, + 'lc.resistor4.R': 0.0000001, + 'lc.resistor5.R': 0.0000001, + 'lc.resistor6.R': 0.0000001, + 'lc.inductor1.L': L_filter, + 'lc.inductor2.L': L_filter, + 'lc.inductor3.L': L_filter, + 'lc.capacitor1.C': C_filter, + 'lc.capacitor2.C': C_filter, + 'lc.capacitor3.C': C_filter, + 'r_load.resistor1.R': partial(rand_load.load_step, gain=R), + 'r_load.resistor2.R': partial(rand_load.load_step, gain=R), + 'r_load.resistor3.R': partial(rand_load.load_step, gain=R), + 'lc.capacitor1.v': lambda t: np.random.uniform(low=-v_lim, + high=v_lim) if t == 0 else None, + 'lc.capacitor2.v': lambda t: np.random.uniform(low=-v_lim, + high=v_lim) if t == 0 else None, + 'lc.capacitor3.v': lambda t: np.random.uniform(low=-v_lim, + high=v_lim) if t == 0 else None, + 'lc.inductor1.i': lambda t: np.random.uniform(low=-i_lim, + high=i_lim) if t == 0 else None, + 'lc.inductor2.i': lambda t: np.random.uniform(low=-i_lim, + high=i_lim) if t == 0 else None, + 'lc.inductor3.i': lambda t: np.random.uniform(low=-i_lim, + high=i_lim) if t == 0 else None, + }, + net=net, + model_path='../../omg_grid/grid.paper_loadstep.fmu', + # on_episode_reset_callback=partial(gen.reset, initial=R), + # on_episode_reset_callback=[partial(gen.reset, initial=R), partial(rand_load.reset)], + # on_episode_reset_callback=rand_load.reset, + on_episode_reset_callback=cb.fire, + is_normalized=True + ) + + with open(f'{folder_name + experiment_name + n_trail}/env.txt', 'w') as f: + print(str(env), file=f) + env = Monitor(env) + + # DDPG learning parameters + # gamma = 0.9 # discount factor + batch_size = batch_size + memory_interval = 1 + # alpha_actor = 5e-6 + # learning_rate = 5e-3 + + # learning_rate = trail.suggest_loguniform("lr", 1e-5, 1) + + noise_var = 0.2 + noise_theta = 5 # stiffness of OU + #alpha_lRelu = alpha_lRelu + weigth_regularizer = 0.5 + + memory_lim = 5000 # = buffersize? + warm_up_steps_actor = 2048 + warm_up_steps_critic = 1024 + target_model_update = 1000 + + # NN architecture + actor_hidden_size = actor_hidden_size # Using LeakyReLU + # output linear + critic_hidden_size_1 = critic_hidden_size # Using LeakyReLU + critic_hidden_size_2 = critic_hidden_size # Using LeakyReLU + critic_hidden_size_3 = critic_hidden_size # Using LeakyReLU + + # output linear + + class RecordEnvCallback(BaseCallback): + def _on_step(self) -> bool: + rewards = [] + obs = env.reset() + for _ in range(max_episode_steps): + env.render() + action, _states = model.predict(obs, deterministic=True) + obs, reward, done, info = env.step(action) + rewards.append(reward) + if done: + break + + acc_Reward = list(accumulate(rewards)) + + plt.plot(rewards) + plt.xlabel(r'$t\,/\,\mathrm{s}$') + plt.ylabel('$Reward$') + plt.grid(which='both') + ts = time.gmtime() + plt.savefig(f'{folder_name + experiment_name + n_trail}/reward{time.strftime("%Y_%m_%d__%H_%M_%S", ts)}.pdf') + plt.close() + + # plt.plot(acc_Reward) + # plt.xlabel(r'$t\,/\,\mathrm{s}$') + # plt.ylabel('$Reward_sum$') + # plt.grid(which='both') + # plt.savefig(f'{folder_name + experiment_name + timestamp}/reward_sum_{datetime.now()}.pdf') + # plt.show() + + env.close() + env.reset() + return True + + n_actions = env.action_space.shape[-1] + action_noise = OrnsteinUhlenbeckActionNoise(mean=np.zeros(n_actions), theta=noise_theta * np.ones(n_actions), + sigma=noise_var * np.ones(n_actions), dt=net.ts) + + policy_kwargs = dict(activation_fn=th.nn.LeakyReLU, net_arch=dict(pi=[actor_hidden_size], qf=[critic_hidden_size_1, + critic_hidden_size_2, + critic_hidden_size_3])) + + model = DDPG('MlpPolicy', env, verbose=1, tensorboard_log=f'{folder_name + experiment_name + n_trail}/', + policy_kwargs=policy_kwargs, + learning_rate=learning_rate, buffer_size=memory_lim, learning_starts=warm_up_steps_critic, + batch_size=batch_size, tau=0.005, gamma=gamma, action_noise=action_noise, + train_freq=- 1, gradient_steps=- 1, n_episodes_rollout=1, optimize_memory_usage=False, + create_eval_env=False, seed=None, device='auto', _init_setup_model=True) + + model.actor.mu._modules['0'].weight.data = model.actor.mu._modules['0'].weight.data * weight_scale + model.actor.mu._modules['2'].weight.data = model.actor.mu._modules['2'].weight.data * weight_scale + model.actor_target.mu._modules['0'].weight.data = model.actor_target.mu._modules['0'].weight.data * weight_scale + model.actor_target.mu._modules['2'].weight.data = model.actor_target.mu._modules['2'].weight.data * weight_scale + # model.actor.mu._modules['0'].bias.data = model.actor.mu._modules['0'].bias.data * weight_bias_scale + # model.actor.mu._modules['2'].bias.data = model.actor.mu._modules['2'].bias.data * weight_bias_scale + + checkpoint_on_event = CheckpointCallback(save_freq=10000, + save_path=f'{folder_name + experiment_name + n_trail}/checkpoints/') + record_env = RecordEnvCallback() + plot_callback = EveryNTimesteps(n_steps=10000, callback=record_env) + model.learn(total_timesteps=200000, callback=[checkpoint_on_event, plot_callback]) + + model.save(f'{folder_name + experiment_name + n_trail}/model.zip') + + return_sum = 0.0 + obs = env.reset() + rew.gamma = 0 + while True: + + action, _states = model.predict(obs) + obs, rewards, done, info = env.step(action) + env.render() + return_sum += rewards + if done: + break + + return return_sum + + +def objective(trail): + learning_rate = 0.0004 # trail.suggest_loguniform("lr", 1e-5, 5e-3) # 0.0002# + gamma = 0.7 # trail.suggest_loguniform("gamma", 0.5, 0.99) + weight_scale = 0.02 # trail.suggest_loguniform("weight_scale", 5e-4, 1) # 0.005 + batch_size = 128 # trail.suggest_int("batch_size", 32, 1024) # 128 + # alpha_lRelu = trail.suggest_loguniform("alpha_lRelu", 0.0001, 0.5) #0.1 + actor_hidden_size = 100 # trail.suggest_int("actor_hidden_size", 10, 500) # 100 # Using LeakyReLU + # output linear + critic_hidden_size = 100 # trail.suggest_int("critic_hidden_size", 10, 500) # # Using LeakyReLU + + # memory_interval = 1 + # noise_var = 0.2 + # noise_theta = 5 # stiffness of OU + # weigth_regularizer = 0.5 + + memory_lim = 5000 # = buffersize? + warm_up_steps_actor = 2048 + warm_up_steps_critic = 1024 + target_model_update = 1000 + + # + + # n_trail = str(0)#str(trail.number) + # gamma = 0.75 + use_gamma_in_rew = 1 + + return experiment_fit_DDPG(learning_rate, gamma, use_gamma_in_rew, weight_scale, batch_size, + actor_hidden_size, critic_hidden_size, str(trail.number)) + + +# study = optuna.load_study(study_name="V-crtl_learn_use_hyperopt_params", storage="sqlite:///Hyperotp_visualization/test.sqlite3") + +study = optuna.create_study(study_name="V-crtl_stochLoad_single_Loadstep_exploring_starts", + direction='maximize', storage=f'sqlite:///{folder_name}optuna_data.sqlite3') + +study.optimize(objective, n_trials=1) +print(study.best_params, study.best_value) + +# pd.Series(index=[trail.params['lr'] for trail in study.trials], data=[trail.value for trail in study.trials]).scatter() diff --git a/experiments/model_validation/execution/monte_carlo_runner.py b/experiments/model_validation/execution/monte_carlo_runner.py index a242c428..7c2b298b 100644 --- a/experiments/model_validation/execution/monte_carlo_runner.py +++ b/experiments/model_validation/execution/monte_carlo_runner.py @@ -66,13 +66,15 @@ def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prep for m in tqdm(range(n_mc), desc='monte_carlo_run', unit='epoch', leave=False): prepare_mc_experiment() # reset stoch components - r_vec = np.zeros(self.env.max_episode_steps) + r_vec = np.zeros(self.env.max_episode_steps + 1) obs = self.env.reset() - for p in tqdm(range(self.env.max_episode_steps), desc='steps', unit='step', leave=False): + for p in tqdm(range(self.env.max_episode_steps + 1), desc='steps', unit='step', leave=False): self.agent.observe(r, False) act = self.agent.act(obs) + if p == 1999: + asd = 1 obs, r, done, info = self.env.step(act) r_vec[p] = r self.env.render() @@ -97,7 +99,7 @@ def run(self, n_episodes: int = 10, n_mc: int = 5, visualise: bool = False, prep dev_return = 0 print('NO DEV RETURN!!!!') - dev_fac = 5 # 3 + dev_fac = 0.5 # 3 print(self.agent.episode_return) print(dev_return) diff --git a/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py b/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py index a39491e1..5ba0b8bf 100644 --- a/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py +++ b/experiments/model_validation/single_inverter_voltage_current_control_safe_opt_includingTB.py @@ -50,14 +50,14 @@ matplotlib.rcParams.update(params) include_simulate = True -show_plots = True +show_plots = False balanced_load = False do_measurement = False -save_results = False +save_results = True # Files saves results and resulting plots to the folder saves_VI_control_safeopt in the current directory current_directory = os.getcwd() -save_folder = os.path.join(current_directory, r'VSim_rebase2_MC3') +save_folder = os.path.join(current_directory, r'V_ctrl_delay_included') os.makedirs(save_folder, exist_ok=True) np.random.seed(1) @@ -67,7 +67,7 @@ delta_t = 1e-4 # simulation time step size / s undersample = 1 max_episode_steps = 2000 # number of simulation steps per episode -num_episodes = 1 # number of simulation episodes (i.e. SafeOpt iterations) +num_episodes = 40 # number of simulation episodes (i.e. SafeOpt iterations) n_MC = 1 # number of Monte-Carlo samples for simulation - samples device parameters (e.g. L,R, noise) from v_DC = 600 # DC-link voltage / V; will be set as model parameter in the FMU nomFreq = 60 # nominal grid frequency / Hz @@ -188,7 +188,7 @@ def cal_J_min(phase_shift, amp_dev): # unsafe, if the new measured performance drops below 20 % of the initial performance of the initial safe (!) # parameter set safe_threshold = 0 - j_min = cal_J_min(phase_shift, amp_dev) # cal min allowed performance + j_min = 15000 # cal_J_min(phase_shift, amp_dev) # cal min allowed performance # The algorithm will not try to expand any points that are below this threshold. This makes the algorithm stop # expanding points eventually. @@ -206,6 +206,7 @@ def cal_J_min(phase_shift, amp_dev): # Definition of the controllers # Choose Kp and Ki for the current and voltage controller as mutable parameters mutable_params = dict(voltageP=MutableFloat(0.0175), voltageI=MutableFloat(12)) # 300Hz + #mutable_params = dict(voltageP=MutableFloat(0.022), voltageI=MutableFloat(213)) # 300Hz voltage_dqp_iparams = PI_params(kP=mutable_params['voltageP'], kI=mutable_params['voltageI'], limits=(-iLimit, iLimit)) @@ -224,11 +225,9 @@ def cal_J_min(phase_shift, amp_dev): # Define a voltage forming inverter using the PIPI and droop parameters from above - # Controller with observer - # ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, droop_param, qdroop_param, - # observer=[Lueneberger(*params) for params in - # repeat((A, B, C, L, delta_t * undersample, v_DC / 2), 3)], undersampling=undersample, - # name='master') + # Controller with observer ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, delta_t, + # droop_param, qdroop_param, observer=[Lueneberger(*params) for params in repeat((A, B, C, L, delta_t * + # undersample, v_DC / 2), 3)], undersampling=undersample, name='master') # Controller without observer ctrl = MultiPhaseDQ0PIPIController(voltage_dqp_iparams, current_dqp_iparams, droop_param, qdroop_param, @@ -356,7 +355,7 @@ def reset_loads(): runner = MonteCarloRunner(agent, env) runner.run(num_episodes, n_mc=n_MC, visualise=True, prepare_mc_experiment=reset_loads, - return_gradient_extend=True) + return_gradient_extend=False) df_len = pd.DataFrame({'lengthscale': lengthscale, 'bounds': bounds, diff --git a/net/net_p10.yaml b/net/net_p10.yaml new file mode 100644 index 00000000..56d096f3 --- /dev/null +++ b/net/net_p10.yaml @@ -0,0 +1,51 @@ +v_nom: 325.27 +freq_nom: 50 +ts: 1e-4 +#max_episode_steps: 1000 + +components: + inv1: + id: inverter1 + i_nom: 300 + i_lim: 400 + #v_nom: 190 + v_lim: 650 + v_DC: 800 + pdroop: + gain: 0 + tau: 0.005 + qdroop: + gain: 0 + tau: 0.005 + v_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.4 + clip: + a_min: 0.3 + a_max: 0.5 + i_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.0018 + clip: # np.clip + a_min: 0.0005 + a_max: 0.32 + cls: MasterInverter_dq0 + in: + u: [ i1p1, i1p2, i1p3 ] # names of the inputs + out: + v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ] + i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ] + + # iref: [0,0,0] + # vref: [1,0,0] + + load: + id: r_load + cls: Load + out: + i: [ .resistor1.i, .resistor2.i, .resistor3.i ] + R: [ .resistor1.R, .resistor2.R, .resistor3.R ] diff --git a/net/net_single-inv-Paper_Loadstep.yaml b/net/net_single-inv-Paper_Loadstep.yaml index 856506d0..fd534b89 100644 --- a/net/net_single-inv-Paper_Loadstep.yaml +++ b/net/net_single-inv-Paper_Loadstep.yaml @@ -5,8 +5,10 @@ ts: 1e-4 components: inv1: id: inverter1 - #i_nom: 20 - #i_lim: 30 + i_nom: 12 + i_lim: 16 + #v_nom: 190 + v_lim: 285 v_DC: 600 v_noise: fun: diff --git a/net/net_single-inv-curr.yaml b/net/net_single-inv-curr.yaml index aedfed2c..a3664577 100644 --- a/net/net_single-inv-curr.yaml +++ b/net/net_single-inv-curr.yaml @@ -1,6 +1,6 @@ v_nom: 230*sqrt(2) -#freq_nom: 50 -ts: .5e-4 +freq_nom: 50 +ts: 0.5e-4 components: inv1: diff --git a/net/net_vctrl_single_inv.yaml b/net/net_vctrl_single_inv.yaml new file mode 100644 index 00000000..7e165dfe --- /dev/null +++ b/net/net_vctrl_single_inv.yaml @@ -0,0 +1,51 @@ +v_nom: 169.7 +freq_nom: 60 +ts: 1e-4 +#max_episode_steps: 1000 + +components: + inv1: + id: inverter1 + i_nom: 12 + i_lim: 16 + #v_nom: 190 + v_lim: 285 + v_DC: 600 + pdroop: + gain: 0 + tau: 0.005 + qdroop: + gain: 0 + tau: 0.005 + v_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.4 + clip: + a_min: 0.3 + a_max: 0.5 + i_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.0018 + clip: # np.clip + a_min: 0.0005 + a_max: 0.32 + cls: MasterInverter + in: + u: [ i1p1, i1p2, i1p3 ] # names of the inputs + out: + v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ] + i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ] + + # iref: [0,0,0] + # vref: [1,0,0] + + load: + id: r_load + cls: Load + out: + #i: [ .resistor1.i, .resistor2.i, .resistor3.i ] + R: [ .resistor1.R, .resistor2.R, .resistor3.R ] diff --git a/net/net_vctrl_single_inv_dq0.yaml b/net/net_vctrl_single_inv_dq0.yaml new file mode 100644 index 00000000..6c78a400 --- /dev/null +++ b/net/net_vctrl_single_inv_dq0.yaml @@ -0,0 +1,51 @@ +v_nom: 169.7 +freq_nom: 60 +ts: 1e-4 +#max_episode_steps: 1000 + +components: + inv1: + id: inverter1 + i_nom: 12 + i_lim: 16 + #v_nom: 190 + v_lim: 285 + v_DC: 600 + pdroop: + gain: 0 + tau: 0.005 + qdroop: + gain: 0 + tau: 0.005 + v_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.4 + clip: + a_min: 0.3 + a_max: 0.5 + i_noise: + fun: + normal: # np.random.* + loc: 0 + scale: 0.0018 + clip: # np.clip + a_min: 0.0005 + a_max: 0.32 + cls: MasterInverter_dq0 + in: + u: [ i1p1, i1p2, i1p3 ] # names of the inputs + out: + v: [ lc.capacitor1.v, lc.capacitor2.v, lc.capacitor3.v ] + i: [ lc.inductor1.i, lc.inductor2.i, lc.inductor3.i ] + + # iref: [0,0,0] + # vref: [1,0,0] + + load: + id: r_load + cls: Load + out: + i: [ .resistor1.i, .resistor2.i, .resistor3.i ] + R: [ .resistor1.R, .resistor2.R, .resistor3.R ] diff --git a/omg_grid/create_fmu.mos b/omg_grid/create_fmu.mos index 317ab001..5be93f79 100644 --- a/omg_grid/create_fmu.mos +++ b/omg_grid/create_fmu.mos @@ -3,4 +3,4 @@ setCommandLineOptions("-d=newInst"); getErrorString(); setCommandLineOptions("-d=initialization"); getErrorString(); setCommandLineOptions("--simCodeTarget=Cpp"); getErrorString(); setCommandLineOptions("-d=-disableDirectionalDerivatives"); getErrorString(); -OpenModelica.Scripting.translateModelFMU(grid.network, version="2.0", fmuType = "me"); getErrorString(); +OpenModelica.Scripting.translateModelFMU(grid.P10_R_load, version="2.0", fmuType = "me"); getErrorString(); diff --git a/omg_grid/grid.mo b/omg_grid/grid.mo index ad04737e..a7a589ce 100644 --- a/omg_grid/grid.mo +++ b/omg_grid/grid.mo @@ -2152,6 +2152,42 @@ package grid annotation( Diagram); end testbench_SC_load; + + model P10_R_load + grid.inverters.inverter inverter1(v_DC = 60) annotation( + Placement(visible = true, transformation(origin = {-70, 30}, extent = {{-10, 10}, {10, -10}}, rotation = 0))); + Modelica.Blocks.Interfaces.RealInput i1p1 annotation( + Placement(visible = true, transformation(origin = {-104, 18}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 18}, extent = {{-8, -8}, {8, 8}}, rotation = 0))); + Modelica.Blocks.Interfaces.RealInput i1p2 annotation( + Placement(visible = true, transformation(origin = {-104, 30}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 30}, extent = {{-8, -8}, {8, 8}}, rotation = 0))); + Modelica.Blocks.Interfaces.RealInput i1p3 annotation( + Placement(visible = true, transformation(origin = {-104, 42}, extent = {{-8, -8}, {8, 8}}, rotation = 0), iconTransformation(origin = {-104, 42}, extent = {{-8, -8}, {8, 8}}, rotation = 0))); + grid.filter.lc lc(C1 = 0.0000136, C2 = 0.0000136, C3 = 0.0000136, L1 = 0.0023, L2 = 0.0023, L3 = 0.0023, R1 = 0.4, R2 = 0.4, R3 = 0.4, R4 = 0.0000000001, R5 = 0.0000000001, R6 = 0.0000000001) annotation( + Placement(visible = true, transformation(origin = {-32, 30}, extent = {{-10, -10}, {10, 10}}, rotation = 0))); + grid.loads.r r_load(R1 = 7.15, R2 = 7.15, R3 = 7.15) annotation( + Placement(visible = true, transformation(origin = {10, 30}, extent = {{-10, -10}, {10, 10}}, rotation = 0))); + equation + connect(i1p1, inverter1.u1) annotation( + Line(points = {{-104, 18}, {-86, 18}, {-86, 36}, {-80, 36}}, color = {0, 0, 127})); + connect(i1p2, inverter1.u2) annotation( + Line(points = {{-104, 30}, {-80, 30}}, color = {0, 0, 127})); + connect(i1p3, inverter1.u3) annotation( + Line(points = {{-104, 42}, {-86, 42}, {-86, 24}, {-80, 24}}, color = {0, 0, 127})); + connect(lc.pin3, inverter1.pin3) annotation( + Line(points = {{-42, 36}, {-51, 36}, {-51, 24}, {-60, 24}}, color = {0, 0, 255})); + connect(lc.pin2, inverter1.pin2) annotation( + Line(points = {{-42, 30}, {-60, 30}}, color = {0, 0, 255})); + connect(lc.pin1, inverter1.pin1) annotation( + Line(points = {{-42, 24}, {-51, 24}, {-51, 36}, {-60, 36}}, color = {0, 0, 255})); + connect(r_load.pin3, lc.pin6) annotation( + Line(points = {{0, 36}, {-22, 36}, {-22, 36}, {-22, 36}}, color = {0, 0, 255})); + connect(r_load.pin2, lc.pin5) annotation( + Line(points = {{0, 30}, {-22, 30}, {-22, 30}, {-22, 30}}, color = {0, 0, 255})); + connect(r_load.pin1, lc.pin4) annotation( + Line(points = {{0, 24}, {-22, 24}, {-22, 24}, {-22, 24}}, color = {0, 0, 255})); + annotation( + Diagram); + end P10_R_load; annotation( uses(Modelica(version = "3.2.3"))); end grid; \ No newline at end of file diff --git a/omg_grid/grid.paper_loadstep.fmu b/omg_grid/grid.paper_loadstep.fmu new file mode 100644 index 00000000..86bf39c9 Binary files /dev/null and b/omg_grid/grid.paper_loadstep.fmu differ diff --git a/omg_grid/grid.paper_loadstepWIN.fmu b/omg_grid/grid.paper_loadstepWIN.fmu new file mode 100644 index 00000000..fe4bb924 Binary files /dev/null and b/omg_grid/grid.paper_loadstepWIN.fmu differ diff --git a/openmodelica_microgrid_gym/env/modelica.py b/openmodelica_microgrid_gym/env/modelica.py index bed19710..7ce72af7 100644 --- a/openmodelica_microgrid_gym/env/modelica.py +++ b/openmodelica_microgrid_gym/env/modelica.py @@ -341,7 +341,7 @@ def step(self, action: Sequence) -> Tuple[np.ndarray, float, bool, Mapping]: if params: self.model.set_params(**params) risk = self.net.risk() - + risk = 0 # Simulate and observe result state outputs = self._create_state() diff --git a/openmodelica_microgrid_gym/util/randproc.py b/openmodelica_microgrid_gym/util/randproc.py index 33436e85..71cec70e 100644 --- a/openmodelica_microgrid_gym/util/randproc.py +++ b/openmodelica_microgrid_gym/util/randproc.py @@ -27,8 +27,18 @@ def __init__(self, process_cls: Type[BaseProcess], proc_kwargs=None, bounds=None self._last_t = 0 self._reserve = None - def reset(self, initial): - self._last = initial + def reset(self, initial=None): + """ + Resets the process, if initial is None, it is set randomly in the range of bounds + """ + if initial is None: + self._last = np.random.uniform(low=self.bounds[0], high=self.bounds[1]) + self.proc.mean = self._last + # self.reserve = self._last + else: + self._last = initial + self.proc.mean = self._last + #self.reserve = self._last self._last_t = 0 self._reserve = None